├── setup_run.sh ├── images ├── image-20200817122734929.png ├── image-20200817124336272.png ├── image-20200817124655908.png ├── image-20200817124742419.png ├── image-20200817124803989.png ├── image-20200817135007515.png ├── image-20200817135158335.png ├── image-20200817140709050.png ├── image-20200817143339662.png ├── image-20200817161633785.png └── image-20200817162530108.png ├── setup.txt ├── data ├── dbg.txt ├── algorithm.txt ├── vm.txt ├── coin.txt ├── pool.txt ├── domain_suffix.txt ├── av.json └── OPCODE.txt ├── run.sh ├── requirements.txt ├── old ├── cg │ ├── gen_fcg.idc │ └── funtion_name.py └── gray.ipynb ├── yara_check ├── check_crypto_by_static.py └── check_packer_by_static.py ├── .gitignore ├── test.py ├── train_histogram.py ├── run.py ├── feature_engineering.py ├── train_pe_raw.py ├── rules └── black_rules.yar ├── README.md ├── feature_engineering_test.ipynb └── raw_features.py /setup_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip install -r requirements.txt -------------------------------------------------------------------------------- /images/image-20200817122734929.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817122734929.png -------------------------------------------------------------------------------- /images/image-20200817124336272.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817124336272.png -------------------------------------------------------------------------------- /images/image-20200817124655908.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817124655908.png -------------------------------------------------------------------------------- /images/image-20200817124742419.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817124742419.png -------------------------------------------------------------------------------- /images/image-20200817124803989.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817124803989.png -------------------------------------------------------------------------------- /images/image-20200817135007515.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817135007515.png -------------------------------------------------------------------------------- /images/image-20200817135158335.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817135158335.png -------------------------------------------------------------------------------- /images/image-20200817140709050.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817140709050.png -------------------------------------------------------------------------------- /images/image-20200817143339662.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817143339662.png -------------------------------------------------------------------------------- /images/image-20200817161633785.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817161633785.png -------------------------------------------------------------------------------- /images/image-20200817162530108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuriufo/DataCon2020/HEAD/images/image-20200817162530108.png -------------------------------------------------------------------------------- /setup.txt: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | ## 环境安装 4 | 5 | * chmod +x setup_run.sh 6 | * chmod +x run.sh 7 | * ./setup_run.sh 8 | 9 | ## 测试验证 10 | 11 | * ./run.sh 12 | -------------------------------------------------------------------------------- /data/dbg.txt: -------------------------------------------------------------------------------- 1 | ollydbg 2 | windbg 3 | x32dbg 4 | x64dbg 5 | ida 6 | idat 7 | idaw 8 | ida64 9 | idat64 10 | idaw64 11 | cheat engine 12 | dbgview 13 | spyxx 14 | procexp 15 | procmon 16 | apimonitor 17 | kerberos -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | start_time=$(date +%s) 4 | 5 | python run.py 6 | python test.py 7 | 8 | end_time=$(date +%s) 9 | cost_time=$[ $end_time-$start_time ] 10 | echo "Time is out. $(($cost_time/60))min $(($cost_time%60))s" 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==2.3.0 2 | scikit-learn==0.23.1 3 | lightgbm==2.3.1 4 | XGBoost==1.1.1 5 | yara-python==4.0.2 6 | lief==0.10.1 7 | capstone==4.0.2 8 | pillow==7.2.0 9 | tqdm==4.48.2 10 | numpy==1.18.5 11 | pandas==1.1.0 12 | matplotlib==3.3.0 -------------------------------------------------------------------------------- /data/algorithm.txt: -------------------------------------------------------------------------------- 1 | aes 2 | des 3 | rc4 4 | md5 5 | sha0 6 | sha1 7 | sha2 8 | sha256 9 | sha512 10 | sha224 11 | sha384 12 | sha-0 13 | sha-1 14 | sha-2 15 | sha-256 16 | sha-512 17 | sha-224 18 | sha-384 19 | shake128 20 | shake256 21 | rsa 22 | knapsack 23 | elgamal 24 | ecc 25 | lattice 26 | lsrf 27 | arx 28 | ecb 29 | cbc 30 | pcbc 31 | cfb 32 | ofb 33 | ctr 34 | keccak256 35 | keccak-256 36 | ripemd160 37 | ripemd-160 38 | cryptonight -------------------------------------------------------------------------------- /data/vm.txt: -------------------------------------------------------------------------------- 1 | vmtoolsd 2 | vmacthlp 3 | vmware 4 | Vmmouse 5 | vmtray 6 | VMToolsHook 7 | vmhgfs 8 | vmGuestLib 9 | VMTools 10 | Vmrawdsk 11 | Vmusbmouse 12 | Vmvss 13 | Vmscsi 14 | Vmxnet 15 | vmx_svga 16 | 00:05:69 17 | 00:0C:29 18 | 00:1C:14 19 | 00:50:56 20 | vboxservice 21 | vboxtray 22 | VirtualBox 23 | VBoxMouse 24 | VBoxGuest 25 | VBoxSF 26 | VBoxVideo 27 | vboxdisp 28 | vboxhook 29 | vboxoglerrorspu 30 | vboxoglpassthroughspu 31 | vboxservice 32 | vboxtray 33 | VBoxControl 34 | 08:00:27 -------------------------------------------------------------------------------- /old/cg/gen_fcg.idc: -------------------------------------------------------------------------------- 1 | #include 2 | static main() 3 | { 4 | // turn on coagulation of data in the final pass of analysis 5 | SetShortPrm(INF_AF2, GetShortPrm(INF_AF2) | AF2_DODATA); 6 | Message("Waiting for the end of the auto analysis...\n"); 7 | Wait(); 8 | Message("\n\n------ Creating the output file.... --------\n"); 9 | auto file = GetIdbPath()[0:-4] + ".gdl"; 10 | GenCallGdl(file, file, CHART_GEN_GDL|CHART_PRINT_NAMES); // create the assembler file 11 | Message("All done, exiting...\n"); 12 | Exit(0); // exit to OS, error code 0 - success 13 | } 14 | -------------------------------------------------------------------------------- /data/coin.txt: -------------------------------------------------------------------------------- 1 | BitCoin 2 | Ethereum 3 | Ripple 4 | Tether 5 | Bitcoin Cash 6 | Bitcoin SV 7 | LiteCoin 8 | Cardano 9 | Binance Coin 10 | ChainLink 11 | EOS 12 | Stellar 13 | Tezos 14 | OKB 15 | Monero 16 | TRON 17 | CRO 18 | Huobi Token 19 | Dash 20 | Etherium Classic 21 | MIOTA 22 | NEO 23 | NEM 24 | Ontology 25 | Zcash 26 | DogeCoin 27 | EthLend 28 | Maker 29 | Basic Attention Token 30 | Kyber Network 31 | 0xProject 32 | Digibyte 33 | Quantum 34 | Makerdao 35 | True USD 36 | Augur 37 | PayPie 38 | Theta Token 39 | Decred 40 | Zilliqa 41 | Bitcoin Gold 42 | BitBay 43 | Waves 44 | OmiseGO 45 | Lisk 46 | Bitcoin Diamond 47 | Celsius 48 | Bytom 49 | ICON 50 | Ravencoin 51 | Grin 52 | Electroneum 53 | BTC 54 | ETH 55 | XRP 56 | USDT 57 | BCH 58 | BSV 59 | LTC 60 | ADA 61 | BNB 62 | LINK 63 | EOS 64 | XLM 65 | XTZ 66 | OKB 67 | XMR 68 | TRX 69 | CRO 70 | HT 71 | DASH 72 | ETC 73 | IOTA 74 | NEO 75 | XEM 76 | ONT 77 | ZEC 78 | DOGE 79 | LEND 80 | MKR 81 | BAT 82 | KNC 83 | ZRX 84 | DGB 85 | QTUM 86 | DAI 87 | TUSD 88 | REP 89 | PPP 90 | THETA 91 | DCR 92 | ZIL 93 | BTG 94 | BAY 95 | WAVES 96 | OMG 97 | LSK 98 | BCD 99 | CEL 100 | BTM 101 | ICX 102 | RVN 103 | ETN -------------------------------------------------------------------------------- /yara_check/check_crypto_by_static.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import unicode_literals, print_function, division 5 | 6 | from yara import compile 7 | import os 8 | 9 | 10 | class YaraCheck(object): 11 | def __init__(self, rule_path): 12 | super(YaraCheck, self).__init__() 13 | self.Rules = self._setRules(rule_path) 14 | 15 | def _setRules(self, path_): 16 | yaraRule = compile(path_) 17 | return yaraRule 18 | 19 | def scan(self, file_info, is_path): 20 | if is_path: 21 | with open(file_info,"rb") as fin: 22 | bdata = fin.read() 23 | matches = self.Rules.match(data=bdata) 24 | else: 25 | matches = self.Rules.match(data=file_info) 26 | # for i in matches: 27 | # print(i.rule, i.tags) 28 | return [i.rule for i in matches] 29 | 30 | 31 | def check_crypto_by_static(yc, file_info, is_path=True): 32 | '''基于特征码的加密识别 33 | :param yc: YaraCheck类实例 34 | :param file_info: PE文件路径或PE文件数据 35 | :param is_path: bool,指明file_info是PE文件路径还是PE文件数据,默认True为文件路径 36 | :return: 成功True, list;失败False, string 37 | ''' 38 | try: 39 | results = yc.scan(file_info, is_path) 40 | return True, results 41 | except Exception as e: 42 | return False, str(e) 43 | 44 | 45 | if __name__ == '__main__': 46 | # 初始化YaraChek类,给出参数yara_rules地址 47 | yara_rules_path = "../rules/crypto_index.yar" 48 | yc = YaraCheck(rule_path=yara_rules_path) 49 | 50 | # 方法一:调用识别函数,用文件路径 51 | #check_suss, results = check_crypto_by_static(yc, "./reverse1_final.exe") 52 | 53 | # 方法二:调用识别函数,用文件数据 54 | for root, dirs, files in os.walk("../1_2000_black/"): 55 | # root 表示当前正在访问的文件夹路径 56 | # dirs 表示该文件夹下的子目录名list 57 | # files 表示该文件夹下的文件list 58 | 59 | # 遍历文件 60 | 61 | for file_name in files: 62 | file_path = os.path.join(root,file_name) 63 | with open(file_path, 'rb') as fin: 64 | bdata = fin.read() 65 | 66 | check_suss, results = check_crypto_by_static(yc, bdata, False) 67 | 68 | # 检查结果 69 | if check_suss: 70 | print(results) 71 | else: 72 | print(check_suss,results) 73 | 74 | -------------------------------------------------------------------------------- /yara_check/check_packer_by_static.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import unicode_literals, print_function, division 5 | 6 | from yara import compile 7 | import os 8 | 9 | 10 | class YaraCheck(object): 11 | def __init__(self, rule_path): 12 | super(YaraCheck, self).__init__() 13 | self.Rules = self._setRules(rule_path) 14 | 15 | def _setRules(self, path_): 16 | yaraRule = compile(path_) 17 | return yaraRule 18 | 19 | def scan(self, file_info, is_path): 20 | if is_path: 21 | with open(file_info,"rb") as fin: 22 | bdata = fin.read() 23 | matches = self.Rules.match(data=bdata) 24 | else: 25 | matches = self.Rules.match(data=file_info) 26 | # for i in matches: 27 | # print(i.rule, i.tags) 28 | return [i.rule for i in matches] 29 | 30 | 31 | def check_packers_by_static(yc, file_info, is_path=True): 32 | '''基于特征码的静态壳识别 33 | :param yc: YaraCheck类实例 34 | :param file_info: PE文件路径或PE文件数据 35 | :param is_path: bool,指明file_info是PE文件路径还是PE文件数据,默认True为文件路径 36 | :return: 成功True, list;失败False, string 37 | ''' 38 | try: 39 | results = yc.scan(file_info, is_path) 40 | return True, results 41 | except Exception as e: 42 | return False, str(e) 43 | 44 | 45 | if __name__ == '__main__': 46 | # 初始化YaraChek类,给出参数yara_rules地址 47 | yara_rules_path = "../rules/packers_index.yar" 48 | yc = YaraCheck(rule_path=yara_rules_path) 49 | 50 | # 方法一:调用识别函数,用文件路径 51 | #check_suss, results = check_packers_by_static(yc, "./reverse1_final.exe") 52 | 53 | # 方法二:调用识别函数,用文件数据 54 | for root, dirs, files in os.walk("../1_2000_black/"): 55 | # root 表示当前正在访问的文件夹路径 56 | # dirs 表示该文件夹下的子目录名list 57 | # files 表示该文件夹下的文件list 58 | 59 | # 遍历文件 60 | 61 | for file_name in files: 62 | file_path = os.path.join(root,file_name) 63 | with open(file_path, 'rb') as fin: 64 | bdata = fin.read() 65 | 66 | check_suss, results = check_packers_by_static(yc, bdata, False) 67 | 68 | # 检查结果 69 | if check_suss: 70 | print(results) 71 | else: 72 | print(check_suss,results) 73 | 74 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /old/cg/funtion_name.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def get_funtion(fp): 4 | with open(fp, 'rb') as f: 5 | data = f.readlines() 6 | 7 | color_line_pre = 0 8 | while b'node' != data[color_line_pre][:4]: 9 | color_line_pre += 1 10 | 11 | node_line_pre = color_line_pre 12 | nodes = {} 13 | pat1 = b'node: { title: \"(.*)\" label: \"(.*)\" color: (.*) textcolor: (.*) bordercolor: (.*) }' 14 | pat2 = b'node: { title: \"(.*)\" label: \"(.*)\" color: (.*) bordercolor: (.*) }' 15 | while b'// node' != data[node_line_pre][:7]: 16 | so1 = re.search(pat1, data[node_line_pre], re.I) 17 | so2 = re.search(pat2, data[node_line_pre], re.I) 18 | if so1 is not None: 19 | nodes[so1.group(1)] = [so1.group(2), so1.group(3), so1.group(4), so1.group(5)] 20 | elif so2 is not None: 21 | nodes[so2.group(1)] = [so2.group(2), so2.group(3), so2.group(4)] 22 | else: 23 | print(data[node_line_pre]) 24 | node_line_pre += 1 25 | 26 | edge_line_pre = node_line_pre 27 | weight = {k: 0 for k in list(nodes.keys())} 28 | source = {k: [] for k in list(nodes.keys())} 29 | pat3 = b'edge: { sourcename: \"(.*)\" targetname: \"(.*)\" }' 30 | while 125 != data[edge_line_pre][0]: 31 | so3 = re.search(pat3, data[edge_line_pre], re.I) 32 | if so3 is not None: 33 | weight[so3.group(1)] += 1 34 | source[so3.group(1)].append(so3.group(2)) 35 | edge_line_pre += 1 36 | 37 | weight_list = sorted(weight.items(), key=lambda d: d[1], reverse=True) 38 | function = [(nodes[t][0], i, n) for i, (t, n) in enumerate(weight_list)] 39 | # node_list = [node for node, _ in weight_list] 40 | # if len(weight_list) == 200: 41 | # new_node = [] 42 | # new_edge = [] 43 | # for i, (node, _) in enumerate(weight_list): 44 | # node_info = nodes[node] 45 | # if len(node_info) == 4: 46 | # new_node.append(b'node: { title: "' + str(i).encode('utf-8') + b'" label: "' + node_info[0] + b'" color: ' + node_info[1] + b' textcolor: ' + node_info[2] + b' bordercolor: ' + node_info[3] + b' }\n') 47 | # else: 48 | # new_node.append(b'node: { title: "' + str(i).encode('utf-8') + b'" label: "' + node_info[0] + b'" color: ' + node_info[1] + b' bordercolor: ' + node_info[2] + b' }\n') 49 | # for target in source[node]: 50 | # if target in node_list: 51 | # new_edge.append(b'edge: { sourcename: "' + node + b'" targetname: "' + target + b'" }\n') 52 | # new_data = data[0:color_line_pre] + new_node + new_edge + [b'}'] 53 | # with open(fp, 'wb') as f: 54 | # f.writelines(new_data) 55 | 56 | return function 57 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import pandas as pd 5 | import pickle 6 | from multiprocessing import Manager, Pool 7 | 8 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 9 | import tensorflow as tf 10 | 11 | with open("/home/jovyan/models/hash_list.pkl", "rb") as f: 12 | hash_list = pickle.load(f) 13 | 14 | # ---------------------直方图------------------------ 15 | 16 | start_time = time.time() 17 | 18 | print("Histogram predict: {0:.2f}s".format(time.time()-start_time)) 19 | 20 | test_path = [os.path.join("/home/jovyan/histogram", sp) for sp in hash_list] 21 | 22 | test_num = len(test_path) 23 | 24 | raw_feature = np.empty((test_num, 512)) 25 | 26 | for i, fp in enumerate(test_path): 27 | with open(fp+'.txt', 'r') as f: 28 | feature = f.readlines() 29 | feature = [float(his.strip()) for his in feature] 30 | raw_feature[i] = feature 31 | 32 | model = tf.keras.models.load_model('/home/jovyan/models/histogram_0.97.h5') 33 | 34 | histogram_test = model.predict(raw_feature) 35 | 36 | # ---------------------PE静态特征------------------------ 37 | 38 | print("PE raw predict: {0:.2f}s".format(time.time()-start_time)) 39 | start_time = time.time() 40 | with open("/home/jovyan/models/raw_feature.pkl", "rb") as fp: 41 | pe_raw_models = pickle.load(fp) 42 | 43 | with open("/home/jovyan/models/raw_feature_names.pkl", "rb") as fp: 44 | raw_feature_names = pickle.load(fp) 45 | 46 | with open("/home/jovyan/models/rfc_pe_model.pkl", "rb") as f: 47 | rfc_pe_model = pickle.load(f) 48 | 49 | with open("/home/jovyan/pe_raw/pe_raw_vectors.pkl", "rb") as f: 50 | pe_raw_vectors = pickle.load(f) 51 | 52 | n_splits = 5 53 | pe_raw_vectors = np.array(pe_raw_vectors, dtype=np.float32) 54 | 55 | oof_test_skf = Manager().list([0] * n_splits) # np.empty((n_splits, test_num)) 56 | def pe_raw_predict(ind, model): 57 | oof_test_skf[ind] = model.predict(pe_raw_vectors) 58 | 59 | stacking_test = [] 60 | 61 | for name in raw_feature_names: 62 | pool = Pool(5) 63 | for i, model in enumerate(pe_raw_models[name]): 64 | pool.apply_async(func=pe_raw_predict, args=(i, model)) 65 | pool.close() 66 | pool.join() 67 | oof_test = np.array(list(oof_test_skf), dtype=np.float32).mean(axis=0) 68 | stacking_test.append(oof_test.reshape(-1, 1)) 69 | 70 | stacking_test = np.hstack(stacking_test) 71 | raw_feature_test = rfc_pe_model.predict(stacking_test).reshape(-1, 1) 72 | 73 | # ---------------------特征工程------------------------ 74 | 75 | print("Feature Engineering predict: {0:.2f}s".format(time.time()-start_time)) 76 | start_time = time.time() 77 | with open("/home/jovyan/feature_engineering/feature_engineering_features.pkl", 'rb') as f: 78 | feature_engineering_features = pickle.load(f) 79 | 80 | with open("/home/jovyan/models/keys.pkl", 'rb') as f: 81 | keys = pickle.load(f) 82 | 83 | with open("/home/jovyan/models/lgb_models.pkl", "rb") as fp: 84 | lgb_models = pickle.load(fp) 85 | 86 | train_df = pd.DataFrame(feature_engineering_features, columns=keys) 87 | 88 | n_splits = 5 89 | 90 | oof_test_skf = np.empty((n_splits, test_num)) 91 | 92 | for i, model in enumerate(lgb_models): 93 | oof_test_skf[i, :] = model.predict(train_df, num_iteration=model.best_iteration) 94 | 95 | feature_engineerin_test = oof_test_skf.mean(axis=0).reshape(-1, 1) 96 | 97 | # ---------------------融合------------------------ 98 | 99 | print("Final predict: {0:.2f}s".format(time.time()-start_time)) 100 | start_time = time.time() 101 | with open("/home/jovyan/models/lr_rfc.pkl", "rb") as f: 102 | lr_rfc = pickle.load(f) 103 | 104 | test = np.hstack([feature_engineerin_test, histogram_test , raw_feature_test]) 105 | 106 | labels_lr = lr_rfc[0].predict_proba(test) 107 | labels_rfc = lr_rfc[1].predict_proba(test) 108 | 109 | test_labels = [] 110 | 111 | for x, y in zip(labels_lr, labels_rfc): 112 | if x[1]*0.6+ y[1]*0.4 < 0.5: 113 | test_labels.append(0) 114 | else: 115 | test_labels.append(1) 116 | 117 | print("Found {0} black samples. {1:.2f}s".format(sum(test_labels), time.time()-start_time)) 118 | 119 | result = [] 120 | for pt, label in zip(hash_list, test_labels): 121 | result.append("{0}, {1}\n".format(pt, label)) 122 | with open("/home/jovyan/malware_final.txt", 'w') as f: 123 | f.write(''.join(result).strip()) 124 | -------------------------------------------------------------------------------- /data/pool.txt: -------------------------------------------------------------------------------- 1 | poolmining 2 | lcc 3 | aurorapool 4 | etherdig 5 | baypool 6 | minerbay 7 | p2p-multipool 8 | dynamo-pool 9 | poolxyz 10 | moneropond 11 | poolfun 12 | sparkpool 13 | mzc 14 | nas 15 | dpool 16 | monerohash 17 | pascalpool 18 | dsh 19 | cryptocoinminingco 20 | baikalmine 21 | btcz 22 | minepool 23 | mole-pool 24 | etc-poolcrypto 25 | minexpool 26 | miningcity 27 | crypto 28 | minerhills 29 | rustylock 30 | dvrtech 31 | progpowpool 32 | cryptobitpool 33 | coin-mining 34 | miningpoolhub 35 | cominers 36 | ftc 37 | uleypool 38 | ethermine 39 | trigonmc 40 | gpumine 41 | adaminers 42 | c3pool 43 | multipool 44 | supportxmr 45 | easyblock 46 | mtech 47 | antpool 48 | minecraftpool 49 | unepa 50 | sonofatech 51 | etn 52 | minerall 53 | anorak 54 | saturnpool 55 | ubqkings 56 | ethepool 57 | latampool 58 | 2miners 59 | bcd 60 | ele 61 | dnr 62 | cloudpools 63 | bern 64 | noobpool 65 | coinminerz 66 | kmd 67 | ucrypto 68 | hasher 69 | moriaxmr 70 | etp 71 | frc 72 | green-pools 73 | edgestile 74 | etherem 75 | europool 76 | etnpool 77 | myhashpool 78 | jedipool 79 | minebarn 80 | ethteam 81 | 6420 82 | getmusicoin 83 | myaeon 84 | sumokoin 85 | digipools 86 | happyminer 87 | cruxpool 88 | cryptowide 89 | exp 90 | xbtmoon 91 | fullhashed 92 | bitcoingold 93 | hash2mine 94 | mining 95 | akumaxmr 96 | pool-node 97 | orchardcoins 98 | smartpool 99 | siamining 100 | cryptowarlords 101 | ella 102 | tetrapool 103 | music 104 | vaux-all 105 | nocroom 106 | miningpool 107 | carpecrypto 108 | 254 109 | semipool 110 | linx 111 | luxor 112 | 6litrcrypto 113 | cryptopools 114 | tooroot 115 | ltc 116 | minerpool 117 | zecmine 118 | virtualmining 119 | btcp 120 | nevermining 121 | xeminer 122 | coinmine 123 | gomine 124 | ethmypool 125 | dash 126 | cool-pool 127 | arhash 128 | miningpatriot 129 | moneroocean 130 | dbix 131 | e-hashs 132 | sandpool 133 | pasc 134 | dgb 135 | abcnet 136 | moneromilk 137 | cryptocrush 138 | com 139 | kratos 140 | ubq 141 | ellaism 142 | cann 143 | log 144 | xmr 145 | btg 146 | luckpool 147 | weeminepool 148 | sib 149 | pirl-pool 150 | xmrpool 151 | flo 152 | hushmine 153 | emcd 154 | xmrget 155 | 99miners 156 | prohash 157 | etcpoolmining 158 | cure 159 | b2g 160 | waterhole 161 | 2zo 162 | pirl 163 | dwarfpool 164 | zen 165 | promine 166 | ppc 167 | thesevendwarfs 168 | flypool 169 | coinotron 170 | etherscan 171 | myminers 172 | p2pool 173 | fairpool 174 | minexmr 175 | poolto 176 | nicehash 177 | megapool 178 | pandapool 179 | etnminers 180 | alhafeez 181 | comining 182 | pink 183 | monerawr 184 | aur 185 | dedpool 186 | techvergepool 187 | poolminer 188 | thecoin 189 | coinfoundry 190 | black-pool 191 | bitcointalk 192 | drowningpool 193 | 01xmrpool 194 | supportcryptonight 195 | marshsoftware 196 | bcn 197 | etnpoolhash 198 | nasf 199 | zet-tech 200 | elevenpool 201 | dearmon 202 | zec 203 | mymininghub 204 | monerominer 205 | rvn 206 | eth 207 | hvpps 208 | doge 209 | hiveon 210 | lbc 211 | slushpool 212 | xjo 213 | rca-pool 214 | partyvibe 215 | superpools 216 | privpool 217 | suprnova 218 | hashanywhere 219 | top-miningpool 220 | whalesburg 221 | mnx 222 | dbixmine 223 | hush 224 | daxx 225 | bloxcruncha 226 | vtc 227 | hashcity 228 | arsmine 229 | secumine 230 | mining-pool 231 | bitcoin-russia 232 | miningspeed 233 | glt 234 | sumo 235 | etnpeople 236 | ethmine 237 | bitcoin 238 | rnova 239 | zpool 240 | start 241 | ethashpool 242 | krb 243 | soil 244 | flexpool 245 | ubiqpool 246 | cryptoserb 247 | aeon 248 | nextgen-mining 249 | btc 250 | pac 251 | minergate 252 | ethertrench 253 | pool 254 | ethereumpool 255 | hyperpool 256 | electroneum 257 | bw 258 | etn-mining-pool 259 | blc 260 | clona 261 | goldminer 262 | wattpool 263 | omine 264 | xmr-now 265 | minemonero 266 | hashminer 267 | etcminers 268 | cryptmonero 269 | maxhash 270 | atomminer 271 | net 272 | aikapool 273 | strongpool 274 | ethfans 275 | moneropool 276 | mona 277 | expmine 278 | poolway 279 | altcoinix 280 | soyminero 281 | cryptomach 282 | ethpool 283 | easyhash 284 | gpuminer 285 | europoool 286 | cash 287 | whl 288 | etnhashpool 289 | dcr 290 | github 291 | crypto-pool 292 | grs 293 | mine 294 | icanmining 295 | hashvault 296 | mineflowpool 297 | max 298 | f2pool 299 | etc 300 | gigantpool 301 | chileminers 302 | zenmine 303 | epool 304 | zclmine 305 | ethblockchain 306 | prohashing 307 | nanopool 308 | spacepools 309 | miningclub 310 | sc 311 | zcl 312 | bch 313 | labbinarymining 314 | trc -------------------------------------------------------------------------------- /train_histogram.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from PIL import Image 4 | from tqdm import tqdm 5 | import numpy as np 6 | import pandas as pd 7 | import pickle 8 | 9 | from sklearn.model_selection import train_test_split 10 | 11 | import tensorflow as tf 12 | from tensorflow.keras import layers, models 13 | from tensorflow.keras.preprocessing.image import ImageDataGenerator 14 | 15 | # 超参数 16 | 17 | TRAIN_SIZE = 0.8 18 | VAL_SIZE = 0.1 19 | TEST_SIZE = 0.1 20 | SEED = 4396 21 | 22 | LENGTH = 512 23 | WIDTH, HEIGHT = 32, 16 24 | BATCH_SIZE = 16 25 | EPOCH = 300 26 | SHUFFLE = False 27 | CLASSES = 2 28 | 29 | LR = 1e-4 30 | 31 | 32 | datapath = "/home/jovyan/histogram" 33 | 34 | with open("/home/datacon/malware/XXX/black.txt", 'r') as f: 35 | black_list = f.read().strip().split() 36 | 37 | with open("/home/datacon/malware/XXX/white.txt", 'r') as f: 38 | white_list = f.read().strip().split() 39 | 40 | black_path = [os.path.join(datapath, sp) for sp in black_list] 41 | white_path = [os.path.join(datapath, sp) for sp in white_list] 42 | 43 | raw_feature, raw_labels = [], [] 44 | 45 | with tqdm(total=11647, ncols=80, desc="histogram") as pbar: 46 | for fp in black_path: 47 | with open(fp+'.txt', 'r') as f: 48 | feature = f.readlines() 49 | feature = [float(his.strip()) for his in feature] 50 | raw_feature.append(feature) 51 | raw_labels.append(1) 52 | pbar.update(1) 53 | for fp in white_path: 54 | with open(fp+'.txt', 'r') as f: 55 | feature = f.readlines() 56 | feature = [float(his.strip()) for his in feature] 57 | raw_feature.append(feature) 58 | raw_labels.append(0) 59 | pbar.update(1) 60 | 61 | # 打乱顺序 62 | 63 | np.random.seed(SEED) 64 | tf.random.set_seed(SEED) 65 | 66 | features, labels = np.array(raw_feature, dtype=np.float32), np.array(raw_labels, dtype=np.int32) 67 | 68 | index = list(range(len(labels))) 69 | np.random.shuffle(index) 70 | 71 | features = features[index] 72 | labels = labels[index] 73 | 74 | # 划分数据集 75 | 76 | train_features, test_features, train_label, test_label = train_test_split( 77 | features, 78 | labels, 79 | test_size=TEST_SIZE, 80 | stratify=labels, 81 | random_state=SEED) 82 | train_features, valid_features, train_label, valid_label = train_test_split( 83 | train_features, 84 | train_label, 85 | test_size=VAL_SIZE, 86 | stratify=train_label, 87 | random_state=SEED) 88 | 89 | # 加载dataset 90 | 91 | train_ds = tf.data.Dataset.from_tensor_slices((train_features, train_label)) \ 92 | .batch(BATCH_SIZE) \ 93 | .prefetch(buffer_size = tf.data.experimental.AUTOTUNE) 94 | 95 | valid_ds = tf.data.Dataset.from_tensor_slices((valid_features, valid_label)) \ 96 | .batch(BATCH_SIZE) \ 97 | .prefetch(buffer_size = tf.data.experimental.AUTOTUNE) 98 | 99 | test_ds = tf.data.Dataset.from_tensor_slices((test_features, test_label)) \ 100 | .batch(BATCH_SIZE) \ 101 | .prefetch(buffer_size = tf.data.experimental.AUTOTUNE) 102 | 103 | 104 | # 模型 105 | 106 | inputs = layers.Input(shape=(LENGTH, 1), dtype='float32') 107 | re_inputs = tf.reshape(inputs, [-1, WIDTH, HEIGHT, 1]) 108 | Conv_1 = layers.Conv2D(60, (2, 2), padding='same', activation='relu')(re_inputs) 109 | pool_1 = layers.MaxPooling2D()(Conv_1) 110 | Conv_2 = layers.Conv2D(200, (2, 2), padding='same', activation='relu')(pool_1) 111 | pool_2 = layers.MaxPooling2D()(Conv_2) 112 | Flat = layers.Flatten()(pool_2) 113 | Dense_1 = layers.Dense(500, activation='relu')(Flat) 114 | dropout = layers.Dropout(0.2)(Dense_1) 115 | # Dense_2 = layers.Dense(50, activation='relu')(dropout) 116 | outputs = layers.Dense(1, activation='sigmoid')(Dense_1) 117 | 118 | model = models.Model(inputs=inputs, outputs=outputs) 119 | 120 | model.compile(optimizer=tf.keras.optimizers.Nadam(LR), 121 | loss='binary_crossentropy', 122 | metrics=['accuracy']) 123 | 124 | model.fit(train_ds, 125 | validation_data=valid_ds, 126 | # class_weight=class_weight_dict, 127 | epochs=EPOCH, 128 | workers=4, 129 | callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=6, min_delta=1e-4, mode='min'), 130 | tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=4, factor=0.5, verbose=0)]) 131 | 132 | predict = model.evaluate(test_ds) 133 | print(predict) 134 | 135 | model.save('./models/histogram_{0:.2f}.h5'.format(predict[1]), save_format="tf") 136 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import lief 4 | import time 5 | import struct 6 | 7 | import numpy as np 8 | import pickle 9 | 10 | import threading 11 | import subprocess 12 | from multiprocessing import Manager, Pool 13 | 14 | from raw_features import ByteHistogram, ByteEntropyHistogram, PEFeatureExtractor 15 | 16 | from feature_engineering import Feature_engineering 17 | 18 | 19 | from tqdm import tqdm 20 | 21 | if len(sys.argv) > 1: 22 | datapath = sys.argv[1] 23 | else: 24 | datapath = "/home/datacon/malware/YYY_step1" 25 | 26 | test_path = [] 27 | for parent, dirnames, filenames in os.walk(datapath): 28 | for filename in filenames: 29 | fp = os.path.join(parent, filename) 30 | test_path.append(fp) 31 | 32 | hash_list = [os.path.split(sp)[-1] for sp in test_path] 33 | test_fixed_path = [os.path.join("/home/jovyan/tmp", sp) for sp in hash_list] 34 | 35 | test_num = len(test_fixed_path) 36 | print("Found {0} samples.".format(test_num)) 37 | 38 | with open("/home/jovyan/models/hash_list.pkl", "wb") as f: 39 | pickle.dump(hash_list, f) 40 | 41 | emp = threading.Semaphore(value=12) 42 | 43 | pe = PEFeatureExtractor() 44 | fn = Feature_engineering() 45 | 46 | def fix_header(fp, ha): 47 | with open(fp, 'rb') as f: 48 | data = f.read() 49 | e_lfnew = data[0x3C: 0x40] 50 | offset = int.from_bytes(e_lfnew, byteorder='little', signed=True) 51 | new_data = b"MZ" + data[2:offset] + b"PE\0\0" + data[offset+4:] 52 | 53 | new_path = "/home/jovyan/tmp/{0}".format(ha) 54 | with open(new_path, 'wb') as f: 55 | f.write(new_data) 56 | emp.release() 57 | 58 | # ---------------------直方图------------------------ 59 | 60 | def histogram_feature(sample_path): 61 | with open(sample_path, "rb") as f: 62 | data = f.read() 63 | file_size = len(data) 64 | Histogram = ByteHistogram().raw_features(data, None) 65 | Byte_Entropy = ByteEntropyHistogram().raw_features(data, None) 66 | 67 | Sum = 0 68 | for i in range(len(Byte_Entropy)): 69 | Sum += Byte_Entropy[i] 70 | 71 | Histogram = np.array(Histogram) / file_size 72 | Byte_Entropy = np.array(Byte_Entropy) / Sum 73 | 74 | feature = np.concatenate((Histogram, Byte_Entropy), axis=-1) 75 | feature = list(feature) 76 | path = sample_path.replace("tmp", "histogram") + ".txt" 77 | with open(path, 'w') as f: 78 | for i in feature: 79 | f.write("{}\n".format(str(i))) 80 | 81 | 82 | # ---------------------PE静态特征------------------------ 83 | 84 | pe_raw_vectors = Manager().list([0] * test_num) 85 | 86 | def get_pe_raw_vector(idx, fp, res_default): 87 | res = res_default 88 | try: 89 | with open(fp, 'rb') as f: 90 | raw_data = f.read() 91 | res = pe.feature_vector(raw_data) 92 | except Exception: 93 | pass 94 | pe_raw_vectors[idx] = res 95 | 96 | 97 | # ---------------------特征工程------------------------ 98 | 99 | feature_engineering_features = Manager().list([0] * test_num) 100 | 101 | def get_fn(idx, fp): 102 | with open(fp, 'rb') as f: 103 | data = f.read() 104 | res = fn.get_feature_engineering(data) 105 | feature_engineering_features[idx] = res 106 | 107 | 108 | if __name__ == '__main__': 109 | print("Preprecess started.") 110 | 111 | # 修复MZ和PE头 112 | os.system("rm -rf /home/jovyan/tmp") 113 | os.makedirs("/home/jovyan/tmp") 114 | table = [] 115 | with tqdm(total=test_num, ncols=80, desc="fix") as pbar: 116 | for fp, ha in zip(test_path, hash_list): 117 | emp.acquire() 118 | t = threading.Thread(target=fix_header, args=(fp, ha), daemon=True) 119 | t.start() 120 | table.append(t) 121 | pbar.update(1) 122 | for t in table: 123 | t.join() 124 | 125 | # 直方图 126 | os.system("rm -rf /home/jovyan/histogram") 127 | os.makedirs("/home/jovyan/histogram") 128 | start_time = time.time() 129 | with Pool(12) as pool: 130 | for fp in test_fixed_path: 131 | pool.apply_async(func=histogram_feature, args=(fp, )) 132 | pool.close() 133 | pool.join() 134 | end_time = time.time() 135 | print("hostogram: {0:.2f}s".format(end_time - start_time)) 136 | start_time = end_time 137 | 138 | # PE静态特征 139 | os.system("rm -rf /home/jovyan/pe_raw") 140 | os.makedirs("/home/jovyan/pe_raw") 141 | res_default = np.zeros(shape=(967,), dtype=np.float32) 142 | with Pool(12) as pool: 143 | for i, fp in enumerate(test_fixed_path): 144 | pool.apply_async(func=get_pe_raw_vector, args=(i, fp, res_default)) 145 | pool.close() 146 | pool.join() 147 | with open("/home/jovyan/pe_raw/pe_raw_vectors.pkl", "wb") as f: 148 | pickle.dump(list(pe_raw_vectors), f) 149 | end_time = time.time() 150 | print("pe raw: {0:.2f}s".format(end_time - start_time)) 151 | start_time = end_time 152 | 153 | # 特征工程 154 | os.system("rm -rf /home/jovyan/feature_engineering") 155 | os.makedirs("/home/jovyan/feature_engineering") 156 | with Pool(12) as pool: 157 | for i, fp in enumerate(test_fixed_path): 158 | pool.apply_async(func=get_fn, args=(i, fp)) 159 | pool.close() 160 | pool.join() 161 | end_time = time.time() 162 | print("feature engineering: {0:.2f}s".format(end_time - start_time)) 163 | 164 | with open("/home/jovyan/feature_engineering/feature_engineering_features.pkl", 'wb') as f: 165 | pickle.dump(list(feature_engineering_features), f) 166 | 167 | print("Preprecess done.") 168 | -------------------------------------------------------------------------------- /data/domain_suffix.txt: -------------------------------------------------------------------------------- 1 | aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|academy|accenture|accountant|accountants|aco|active|actor|adac|ads|adult|aeg|aetna|afamilycompany|afl|africa|agakhan|agency|aig|aigo|airbus|airforce|airtel|akdn|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|aol|apartments|app|apple|aquarelle|arab|aramco|archi|army|art|arte|asda|associates|athleta|attorney|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aws|axa|azure|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bbc|bbt|bbva|bcg|bcn|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bharti|bible|bid|bike|bing|bingo|bio|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bms|bmw|bnl|bnpparibas|boats|boehringer|bofa|bom|bond|boo|book|booking|boots|bosch|bostik|boston|bot|boutique|box|bradesco|bridgestone|broadway|broker|brother|brussels|budapest|bugatti|build|builders|business|buy|buzz|bzh|cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|catering|catholic|cba|cbn|cbre|cbs|ceb|center|ceo|cern|cfa|cfd|chanel|channel|chase|chat|cheap|chintai|chloe|christmas|chrome|chrysler|church|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|corsica|country|coupon|coupons|courses|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cuisinella|cymru|cyou|dabur|dad|dance|data|date|dating|datsun|day|dclk|dds|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dnp|docs|doctor|dodge|dog|doha|domains|doosan|dot|download|drive|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dvr|earth|eat|eco|edeka|education|email|emerck|energy|engineer|engineering|enterprises|epost|epson|equipment|ericsson|erni|esq|estate|esurance|etisalat|eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|flickr|flights|flir|florist|flowers|flsmidth|fly|foo|food|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|gal|gallery|gallo|gallup|game|games|gap|garden|gbiz|gdn|gea|gent|genting|george|ggee|gift|gifts|gives|giving|glade|glass|gle|global|globo|gmail|gmbh|gmo|gmx|godaddy|gold|goldpoint|golf|goo|goodhands|goodyear|goog|google|gop|got|grainger|graphics|gratis|green|gripe|grocery|group|guardian|gucci|guge|guide|guitars|guru|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hkt|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|hospital|host|hosting|hot|hoteles|hotels|hotmail|house|how|hsbc|htc|hughes|hyatt|hyundai|ibm|icbc|ice|icu|ieee|ifm|iinet|ikano|imamat|imdb|immo|immobilien|industries|infiniti|info|ing|ink|institute|insurance|insure|intel|international|intuit|investments|ipiranga|irish|iselect|ismaili|ist|istanbul|itau|itv|iveco|iwc|jaguar|java|jcb|jcp|jeep|jetzt|jewelry|jio|jlc|jll|jmp|jnj|joburg|jot|joy|jpmorgan|jprs|juegos|juniper|kaufen|kddi|kerryhotels|kerrylogistics|kerryproperties|kfh|kia|kim|kinder|kindle|kitchen|kiwi|koeln|komatsu|kosher|kpmg|kpn|krd|kred|kuokgroup|kyoto|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|llc|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|ltd|ltda|lundbeck|lupin|luxe|luxury|macys|madrid|maif|maison|makeup|man|management|mango|map|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mcd|mcdonalds|mckinsey|med|media|meet|melbourne|meme|memorial|men|menu|meo|merckmsd|metlife|miami|microsoft|mini|mint|mit|mitsubishi|mlb|mls|mma|mobi|mobile|mobily|moda|moe|moi|mom|monash|money|monster|montblanc|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|msd|mtn|mtpc|mtr|mutual|mutuelle|nab|nadex|nagoya|nationwide|natura|navy|nba|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nfl|ngo|nhk|nico|nike|nikon|ninja|nissan|nissay|nokia|northwesternmutual|norton|now|nowruz|nowtv|nra|nrw|ntt|nyc|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|org|organic|orientexpress|origins|osaka|otsuka|ott|ovh|page|pamperedchef|panasonic|panerai|paris|pars|partners|parts|party|passagens|pay|pccw|pet|pfizer|pharmacy|phd|philips|phone|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|place|play|playstation|plumbing|plus|pnc|pohl|poker|politie|porn|pramerica|praxi|press|prime|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|pub|pwc|qpon|quebec|quest|qvc|racing|radio|raid|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|rocher|rocks|rodeo|rogers|room|rsvp|rugby|ruhr|run|rwe|ryukyu|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sapo|sarl|sas|save|saxo|sbi|sbs|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|search|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|showtime|shriram|silk|sina|singles|site|ski|skin|sky|skype|sling|smart|smile|sncf|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|sport|spot|spreadbetting|srl|srt|stada|staples|star|starhub|statebank|statefarm|statoil|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|sucks|supplies|supply|support|surf|surgery|suzuki|swatch|swiftcover|swiss|sydney|symantec|systems|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tci|tdk|team|tech|technology -------------------------------------------------------------------------------- /feature_engineering.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json 4 | import lief 5 | import time 6 | import struct 7 | import binascii 8 | import numpy as np 9 | import pickle 10 | 11 | from capstone import * 12 | 13 | from collections import ChainMap 14 | 15 | from yara_check.check_packer_by_static import check_packers_by_static, YaraCheck 16 | 17 | import warnings 18 | warnings.filterwarnings("ignore") 19 | 20 | class Feature_engineering(object): 21 | 22 | def __init__(self): 23 | self.path_pattern = re.compile(b'[C-Zc-z]:(?:(?:\\\\|/)[^\\\\/:*?"<>|"\x00-\x19\x7f-\xff]+)+(?:\\\\|/)?') 24 | self.regs_pattern = re.compile(b'reg', re.IGNORECASE)# re.compile(b'[A-Z_ ]{5,}(?:\\\\[a-zA-Z ]+)+') 25 | self.urls_pattern = re.compile(b'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+') 26 | # self.strings_pattern = re.compile(b'[\x20-\x7f]{5,}') 27 | self.ip_pattern = re.compile(b'(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})') 28 | 29 | # #比特币钱包地址 30 | self.wallet_pattern_btc = re.compile(b'(?:1|3|bc1|bitcoincash:q)(?:(?![0OIi])[0-9A-Za-z]){25,34}') 31 | self.wallet_pattern_ltc = re.compile(b'(?:ltc1|M|L)[A-Za-z0-9]{25,36}') 32 | self.wallet_pattern_xmr = re.compile(b'[0-9A-Za-z]{90,100}') #门罗币 33 | 34 | self.mz_pattern = re.compile(b'MZ') 35 | self.pe_pattern = re.compile(b'PE') 36 | self.pool_pattern = re.compile(b'pool', re.IGNORECASE) 37 | self.cpu_pattern = re.compile(b'cpu', re.IGNORECASE) 38 | self.gpu_pattern = re.compile(b'gpu', re.IGNORECASE) 39 | self.coin_pattern = re.compile(b'coin', re.IGNORECASE) 40 | 41 | self.pat_list = {"btc": self.wallet_pattern_btc, "ltc": self.wallet_pattern_ltc, "xmr": self.wallet_pattern_xmr, "paths": self.path_pattern, 42 | "regs": self.regs_pattern, "urls": self.urls_pattern, "ips": self.ip_pattern, "mz": self.mz_pattern, # "other": self.strings_pattern, 43 | "pe": self.pe_pattern, "pool": self.pool_pattern, "cpu": self.cpu_pattern, "gpu": self.gpu_pattern, 'coin': self.coin_pattern} 44 | 45 | self.yc_pakcer = YaraCheck(rule_path="rules/rule20.yar") 46 | self.yc_gen = YaraCheck(rule_path="rules/black_rules.yar") 47 | 48 | with open('data/av.json', 'r', encoding="utf-8")as fp: 49 | avs = json.load(fp) 50 | avs = [av.split('.exe')[0].lower() for av in avs] 51 | self.avs = [av.encode() for av in avs] 52 | with open('data/dbg.txt', 'r')as f: 53 | dbgs = f.read().strip().lower().replace('\r', '\n').split("\n") 54 | self.dbgs = [dbg.encode() for dbg in dbgs] 55 | with open('data/pool.txt', 'rb')as fp: 56 | self.pools = fp.read().strip().split(b'\n') 57 | with open('data/algorithm.txt', 'rb')as fp: 58 | self.algorithms = fp.read().strip().split(b'\n') 59 | with open('data/coin.txt', 'rb')as fp: 60 | self.coins = fp.read().strip().lower().split(b'\n') 61 | with open('data/OPCODE.txt','r') as f: 62 | self.opcode_list = f.read().split() 63 | self.opcode_dict = {opc: i for i, opc in enumerate(self.opcode_list)} 64 | self.md32 = Cs(CS_ARCH_X86, CS_MODE_32) 65 | self.md64 = Cs(CS_ARCH_X86, CS_MODE_64) 66 | 67 | self.m32_pat = re.compile(b'\x55\x8b\xec[^\xc3]*\xc3') 68 | self.m64_pat = re.compile(b'\x48[\x83\x81]\xec[^\xc3]*[\xc3\xc2]') 69 | 70 | with open("models/keys.pkl", 'rb') as f: 71 | self.keys = pickle.load(f) 72 | 73 | def get_pattern(self, binary): 74 | op_pattern = [] 75 | PE_Offset = struct.unpack(" DataCon2020 3 | DataCon2020大数据安全分析大赛,🏆【方向五】恶意代码分析冠军源码和方案。 4 | 5 | - [ DataCon2020](#head1) 6 | - [ 最终排名(部分)](#head2) 7 | - [ 赛题回顾](#head3) 8 | - [ 启发与思路](#head4) 9 | - [ 算法与模型](#head5) 10 | - [ 灰度图](#head6) 11 | - [ 直方图](#head7) 12 | - [ PE静态特征模型](#head8) 13 | - [ 特征工程](#head9) 14 | - [ Section信息](#head10) 15 | - [ 字符匹配](#head11) 16 | - [ Yara匹配](#head12) 17 | - [ Opcode](#head13) 18 | - [ 其他布尔信息](#head14) 19 | - [ 函数名(CG图)](#head15) 20 | - [ 复赛模型融合](#head16) 21 | - [ 结果与改进](#head17) 22 | - [ 复赛结果](#head18) 23 | - [ 改进方向](#head19) 24 | - [ 团队介绍](#head20) 25 | - [ 参考资料](#head21) 26 | 27 | ## 最终排名(部分) 28 | 29 | ![最终排名](images/image-20200817122734929.png) 30 | 31 | ## 赛题回顾 32 | 33 | ![赛题回顾](images/image-20200817135007515.png) 34 | 35 | ## 启发与思路 36 | 37 | * [挖矿软件常见套路][1] 38 | * ![挖矿软件常见套路](images/image-20200817124336272.png) 39 | 40 | * 资格赛中获得的启发 41 | 42 | * ①~②:需要关注虚拟机、调试软件、反编译软件、逆向分析工具和杀软名 43 | * ③~⑤:需要关注系统关键路径、注册表 44 | * ⑥~⑦:需要关注域名、IP、端口、钱包地址、可见字符串 45 | * ⑧~⑩:同③~⑤ 46 | * 逆向工程中得到的思路 47 | 48 | * 通过逆向分析,发现许多样本函数名包含数据货币名、密码学算法名(哈希算法)。 49 | * ![函数名中数字货币名、哈希算法名](images/image-20200817124655908.png) 50 | * 很多带壳样本:UPX、Pelite、VMP…… 51 | * ![UPX壳](images/image-20200817124742419.png) 52 | * 白样本含有很多其他类别恶意程序,如病毒、外挂…… 53 | * ![外挂](images/image-20200817124803989.png) 54 | 55 | ## 算法与模型 56 | 57 | > 复赛环境搭建说明:[setup.txt](setup.txt) 58 | 59 | > 复赛预处理脚本:[run.py](run.py) 60 | 61 | > 复赛测试脚本:[test.py](test.py) 62 | 63 | > 复赛一键验证测试脚本:[run.sh](run.sh) 64 | 65 | 本次初赛、附加赛与复赛我们队使用的五种算法或模型如下(其中在复赛中因为有性能的需求,部分模型未使用): 66 | 67 | ### 灰度图 68 | 69 | PE文件二进制每一个字节对应一个像素,最后缩放成固定大小的灰度图。这是最常见也容易实现的模型,在恶意代码检测中已经广泛使用。 70 | 71 | ![灰度图转换](images/image-20200817135158335.png) 72 | 73 | 但我们仅在初赛时使用,原因如下: 74 | 75 | * 文件大小差异较大,缩放比例不一致。 76 | * 给定的样本集中包含许多加壳样本,使得数据分布被打乱,黑白特征不明显。 77 | * 预处理时间较长。 78 | 79 | > 初赛时部分代码:[old/gray.ipynb](old/gray.ipynb) 80 | 81 | ### 直方图 82 | 83 | 这也是除了灰度图外一种不需要解析PE文件格式来进行提取学习的特征方法,我们主要使用两类直方图: 84 | 85 | * 字节直方图:统计字节0-255出现个数 86 | * [字节熵直方图][2]: 87 | * 滑动一个1024字节的窗口,步长为256字节 88 | * 计算每个1024字节窗口的熵 89 | * 统计滑动窗口的(字节,熵值)对,最后转换成1x256维的特征向量 90 | * ![字节熵直方图](images/image-20200817140709050.png) 91 | 92 | 最后连接这两个特征向量,使用深度学习模型学习。效果好,预处理快,初赛单用这个模型便拿到`93.8425`分。 93 | 94 | > 预处理和验证可见本节一开始提到的脚本。 95 | 96 | > 复赛时的模型训练代码:[train_histogram.py](train_histogram.py) 97 | 98 | ### PE静态特征模型 99 | 100 | 虽然提供的样本被抹掉了样本PE结构中的MZ、PE、导入导出表等信息,但我们只需要恢复`MZ`头和`PE\0\0`即可使用常规的分析工具对PE样本进行分析。 101 | 102 | 因为恢复也只是能解析PE文件的静态格式和特征,并不能将其运行,所以只能从静态特征入手。最后我们使用的是著名[EMBER][3]数据集提到的PE文件静态特征提取方法。虽然原文用于检测恶意Windows PE文件,但是我们也将其移植过来检测挖矿软件。 103 | 104 | 原始方法提取了许多PE文件静态特征,如下: 105 | 106 | - [x] ByteHistogram、ByteEntropyHistogram:直方图 107 | - [x] GeneralFileInfo:调试信息、TLS段、重定位信息…… 108 | - [x] HeaderFileInfo:PE头基本所有信息 109 | - [x] ExportsInfo:导出表个数、名称 110 | - [x] SectionInfo:Section名、大小、熵、属性等…… 111 | - [ ] ImportsInfo:导入表被破坏,无法解析导入函数信息 112 | - [ ] StringExtractor:字符串提取在特征工程里做,这里删掉一是为了节省时间,二是防止特征重叠 113 | 114 | ![PE文件静态特征](images/image-20200817143339662.png) 115 | 116 | > 预处理和验证可见本节一开始提到的脚本。 117 | 118 | > 复赛时的模型训练代码:[train_pe_raw.py](train_pe_raw.py) 119 | 120 | ### 特征工程 121 | 122 | 我们队所用特征工程主要包括五部分,分别为:Section信息、字符匹配、Yara匹配、Opcode和其他布尔信息。 123 | 124 | > 预处理和验证可见本节一开始提到的脚本。 125 | 126 | > 复赛时的模型训练代码:[feature_engineering.py](feature_engineering.py) 127 | 128 | #### Section信息 129 | 130 | 节区特征是PE文件一种重要特征,过多的节区、异常的节区名、异常的资源节区个数等指标都可以指示这个PE文件的可疑程度,因此我们首先针对节区进行特征统计: 131 | 132 | * OEP所在节区名长度 133 | * OEP所在节区名一般为`.text`,如果过长或过短说明很可能被混淆 134 | * 比如UPX壳OEP处节区名为`UPX1`。 135 | * 各可读、可写、可执行节区大小和熵,和各属性节区占文件大小比例 136 | * 举例:如果可执行节区占比过小,很可能加壳了(压缩壳) 137 | * 资源节区个数 138 | * 资源节区一般藏又一些压缩数据,比如挖矿恶意载荷 139 | * 节区总个数 140 | * 恶意软件节区数一般比较多 141 | 142 | ```Python 143 | # OEP处section名长度 144 | section_info["entry"] = len(entry_section) 145 | section_info["section_num"] = len(lief_binary.sections) 146 | # 可读、可写、可执行sections大小均值 147 | sR, sW, sX = [], [], [] 148 | # 可读、可写、可执行sections熵值均值 149 | entrR, entrW, entrX = [], [], [] 150 | # 资源section个数 151 | rsrc_num = 0 152 | for s in lief_binary.sections: 153 | props = [str(c).split('.')[-1] for c in s.characteristics_lists] 154 | if "MEM_READ" in props: 155 | sR.append(s.size) 156 | entrR.append(s.entropy) 157 | if "MEM_WRITE" in props: 158 | sW.append(s.size) 159 | entrW.append(s.entropy) 160 | if "MEM_EXECUTE" in props: 161 | sX.append(s.size) 162 | entrX.append(s.entropy) 163 | if 'rsrc' in s.name: 164 | rsrc_num += 1 165 | section_info['size_R'], section_info['size_W'], section_info['size_X'] = np.mean(sR), np.mean(sW), np.mean(sX) 166 | section_info['entr_R'], section_info['entr_W'], section_info['entr_X'] = np.mean(entrR), np.mean(entrW), np.mean(entrX) 167 | section_info['rsrc_num'] = rsrc_num 168 | ``` 169 | 170 | #### 字符匹配 171 | 172 | 根据资格赛获得的启发,队员们手写相应的正则匹配模式,其中包括 173 | 174 | * 路径、注册表、URL、IP地址正则匹配 175 | * 其中因为注册表正则模式存在回溯问题,有的样本存在特别长的字符串,导致一个样本可能匹配了八分钟,所以我们复赛简单粗暴改成匹配字符串”reg”。主要原因是我们认为操作注册表必然存在相应函数,而这些函数名基本含有”reg”。 176 | * 比特币钱包地址正则匹配 177 | * 主要写了三种货币:比特币、莱特币、门罗币 178 | * 一些重要字符串匹配 179 | * ”MZ”、”PE”指示可能含别的PE文件 180 | * ”pool”、”cpu”、”gpu”、”coin”则是我们认为挖矿软件普遍存在的字符串 181 | 182 | ```Python 183 | self.path_pattern = re.compile(b'[C-Zc-z]:(?:(?:\\\\|/)[^\\\\/:*?"<>|"\x00-\x19\x7f-\xff]+)+(?:\\\\|/)?') 184 | self.regs_pattern = re.compile(b'reg', re.IGNORECASE)# re.compile(b'[A-Z_ ]{5,}(?:\\\\[a-zA-Z ]+)+') 185 | self.urls_pattern = re.compile(b'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+') 186 | # self.strings_pattern = re.compile(b'[\x20-\x7f]{5,}') 187 | self.ip_pattern = re.compile(b'(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})') 188 | ​ 189 | # #比特币钱包地址 190 | self.wallet_pattern_btc = re.compile(b'(?:1|3|bc1|bitcoincash:q)(?:(?![0OIi])[0-9A-Za-z]){25,34}') 191 | self.wallet_pattern_ltc = re.compile(b'(?:ltc1|M|L)[A-Za-z0-9]{25,36}') 192 | self.wallet_pattern_xmr = re.compile(b'[0-9A-Za-z]{90,100}') #门罗币 193 | ​ 194 | self.mz_pattern = re.compile(b'MZ') 195 | self.pe_pattern = re.compile(b'PE') 196 | self.pool_pattern = re.compile(b'pool', re.IGNORECASE) 197 | self.cpu_pattern = re.compile(b'cpu', re.IGNORECASE) 198 | self.gpu_pattern = re.compile(b'gpu', re.IGNORECASE) 199 | self.coin_pattern = re.compile(b'coin', re.IGNORECASE) 200 | ``` 201 | 202 | #### Yara匹配 203 | 204 | Yara规则是基于二进制文件中包含的文本或二进制字符串的描述创建的。我们首先使用[Yara-Rules][4]提供的规则进行匹配,其中包括: 205 | 206 | * [壳规则](rules/packer.yar) 207 | 208 | * 包含许多已知壳的Yara匹配规则 209 | 210 | * [密码学常量规则](rules/crypto_signatures.yar) 211 | 212 | * 特别是哈希算法初始值 213 | * 匹配时间略长,复赛忍痛舍弃 214 | 215 | 最后我们还使用了[yarGen][5]工具,提取**训练集黑样本**特征,其原理是先解析出样本集中的共同的字符串,然后经过白名单库的过滤,最后通过启发式、机器学习等方式筛选出最优的Yara规则。根据得到的Yara规则集结果,选择匹配度大于某一阈值的规则形成新的规则集,查看匹配黑白样本的分布比例,筛选部分白样本规则。通过不断的调整阈值参数与筛除比例,在尽可能泛化的同时匹配到更多的黑样本,最后人工结合挖矿特征筛选出更值得关注的部分,优化规则集。 216 | 217 | 最终得到的[自定义Yara规则集](rules/rule20.yar)阈值为20,即每条规则在**训练集黑样本**中匹配样本个数大于等于20。 218 | 219 | #### Opcode 220 | 221 | 通过传统逆向工具解析PE文件中的函数实在太耗时,因此我们打算通过简单的正则搜索识别代码中的函数,然后提取函数片段中的Opcode并保存。例如x86下,按栈匹配`push ebp; mov ebp, esp; ……; ret`如下代码段。 222 | 223 | ```Python 224 | self.m32_pat = re.compile(b'\x55\x8b\xec[^\xc3]*\xc3') 225 | # ………… 226 | all_functions = self.m32_pat.findall(binary) 227 | for function in all_functions: 228 | function_op = [] 229 | for _, _, mnemonic, _ in self.md32.disasm_lite(function, 0x0): 230 | try: 231 | function_op.append(self.opcode_dict[mnemonic]) 232 | except Exception: 233 | break 234 | else: 235 | op_pattern.append(function_op) 236 | ``` 237 | 238 | 原因是发现在挖矿样本中有大量样本间共有的opcode特征,而白样本中却不明显。因此可以统计匹配出的函数个数、opcode种类个数、平均值、方差等特征。 239 | 240 | #### 其他布尔信息 241 | 242 | 根据资格赛所提供的启发,我们还收集了各种类别的进程名、数字货币名、密码学算法名等信息,并将它们存储在`./data`中,以检测它们是否在给定PE文件内,具体如下表所示。 243 | 244 | | 文件名 | 注释 | 245 | | ----------------- | ----------------------------- | 246 | | algorithm.txt | 常见密码学算法名 | 247 | | av.json | [常见的杀毒软件的进程名称][6] | 248 | | coin.txt | 数字货币名 | 249 | | dbg.txt | 调试器名 | 250 | | domain_suffix.txt | 常见顶级域名(复赛时未使用 | 251 | | OPCODE.txt | Opcode词汇表 | 252 | | pool.txt | [常见矿池二级域名][7] | 253 | | vm.txt | 虚拟机内软件名 | 254 | 255 | 我们搜集了常见的杀毒软件的进程名称,以此作为挖矿软件对杀毒软件的检测行为的特征。 256 | 257 | 考虑到挖矿软件需要反分析、反调试来保证持久化,我们还以常见的调试器进程名作为挖矿软件的反调试行为的特征,主要为我们日常使用的调试工具。 258 | 259 | 考虑到挖矿软件会进行反沙箱对抗,我们搭建了各种不同的虚拟机软件环境,整理了挖矿软件可能检测的虚拟机环境特征。 260 | 由于挖矿过程必然存在矿池,因此我们想到检测矿池二级域名来作为挖矿软件的一个有效特征,于是编写了爬虫脚本通过正则匹配的方式爬取了目前常用的矿池域名300余条。 261 | 262 | ### 函数名(CG图) 263 | 264 | 借鉴一篇[论文][8]的思路,处理流程如下: 265 | 266 | 1. IDA Pro提取函数调用生成GDL(Graph Description Language)文件 267 | 2. GDL文件包含函数名(结点)、调用关系(边),如下述代码段所示 268 | 3. 这样可以对函数调用次数进行排序,作为一种序列信息进行训练 269 | 270 | ```Yaml 271 | graph: { 272 | title: "Building graph" 273 | // IDA palette 274 | // .... 275 | colorentry 71: 255 255 0 276 | colorentry 72: 0 0 0 277 | colorentry 73: 0 0 0 278 | colorentry 74: 0 0 0 279 | colorentry 75: 0 255 255 280 | colorentry 76: 192 192 192 281 | // .... 282 | node: { title: "165" label: "__aulldiv" color: 75 textcolor: 73 bordercolor: black } 283 | node: { title: "166" label: "__aulldvrm" color: 75 textcolor: 73 bordercolor: black } 284 | node: { title: "167" label: "__aullshr" color: 75 textcolor: 73 bordercolor: black } 285 | // .... 286 | // node 169 287 | edge: { sourcename: "169" targetname: "135" } 288 | edge: { sourcename: "169" targetname: "136" } 289 | edge: { sourcename: "169" targetname: "170" } 290 | edge: { sourcename: "169" targetname: "171" } 291 | // .... 292 | } 293 | ``` 294 | 295 | 初赛使用效果不错,复赛因IDA Pro耗时过长放弃。 296 | 297 | > 初赛时部分代码路径:[old/cg](old/cg) 298 | 299 | ### 复赛模型融合 300 | 301 | 这方面我们队员涉猎较少,可能选择的模型和融合的方式还有改进的空间,欢迎各位看客交流学习。 302 | 303 | ![复赛模型融合](images/image-20200817161633785.png) 304 | 305 | ## 结果与改进 306 | 307 | ### 复赛结果 308 | 309 | 在判分前主办方提供了1k个测试样本,我们使用这1k个样本进行检验与测试,最终耗时大约为`1min20s`,得分为`95.52`分。 310 | 311 | ![复赛预测分数](images/image-20200817162530108.png) 312 | 313 | 因此预估判分用的1w多个样本耗时在`20min`上下,扣掉0.2分后与最终的`95.38`十分接近,证明了我们所用方法泛化能力以及稳定性。 314 | 315 | ### 改进方向 316 | 317 | * 特征工程中我们提取的Opcode序列仅用了统计特征,我们也可以将其当作一种序列信息,使用NLP方法训练学习。 318 | 319 | * 提供的样本中还是含有很多加壳样本的,因此我们可以对Yara匹配出的加壳样本进行单独处理。 320 | 321 | * 特征工程的完善,例如: 322 | * 任务计划名: Drivers、WebServers、DnsScan 323 | * Powershell、Vbs脚本 324 | * 端口,特别是高端口(>10000) 325 | * …… 326 | 327 | ## 团队介绍 328 | 329 | 本战队所在的信工所六室威胁情报与威胁发现团队主要针对在线流量、落地样本(载荷)、安全日志、威胁情报等网络空间典型威胁数据进行分析,研究威胁情报智能处理、对抗性恶意代码分析、可疑网络/终端行为检测挖掘的技术与系统,培养具备高级威胁对抗分析技能的人才。同时,团队还建设和运营了国家网络空间威胁情报共享开放平台CNTIC(公众号cntic2017), 研制了大规模恶意代码智能分析平台iMAS 以及网络恶意通信检测系统,均已应用于国家有关部门和地区的实际工作中。我们欢迎保研生、实习生加入,联系方式jiangzhengwei#iie.ac.cn。 330 | 331 | ## 参考资料 332 | 333 | 1. [挖矿软件常见套路](https://zhuanlan.zhihu.com/p/164557943) 334 | 2. [Deep Neural Network Based Malware Detection Using Two Dimensional Binary Program Features](https://arxiv.org/pdf/1508.03096.pdf) 335 | 3. [EMBER: An Open Dataset for Training Static PE Malware Machine Learning Models](https://arxiv.org/abs/1804.04637) 336 | 4. [Yara-Rules / rules](https://github.com/Yara-Rules/rules) 337 | 5. [Neo23x0 / yarGen](https://github.com/Neo23x0/yarGen) 338 | 6. [BrownFly / findAV](https://github.com/BrownFly/findAV) 339 | 7. [Mining Pools Live Monitoring Tools](https://investoon.com/mining_pools) 340 | 8. [DeepCG: Classifying Metamorphic Malware Through Deep Learning of Call Graphs](https://www.researchgate.net/publication/337954044_DeepCG_Classifying_Metamorphic_Malware_Through_Deep_Learning_of_Call_Graphs) 341 | 342 | [1]: https://zhuanlan.zhihu.com/p/164557943 "挖矿软件常见套路" 343 | [2]: https://arxiv.org/pdf/1508.03096.pdf "Deep Neural Network Based Malware Detection Using Two Dimensional Binary Program Features" 344 | [3]: https://arxiv.org/abs/1804.04637 "EMBER: An Open Dataset for Training Static PE Malware Machine Learning Models" 345 | [4]: https://github.com/Yara-Rules/rules "Yara-Rules / rules" 346 | [5]: https://github.com/Neo23x0/yarGen "Neo23x0 / yarGen" 347 | [6]: https://github.com/BrownFly/findAV "BrownFly / findAV" 348 | [7]: https://investoon.com/mining_pools "Mining Pools Live Monitoring Tools" 349 | [8]: https://www.researchgate.net/publication/337954044_DeepCG_Classifying_Metamorphic_Malware_Through_Deep_Learning_of_Call_Graphs "DeepCG: Classifying Metamorphic Malware Through Deep Learning of Call Graphs" 350 | -------------------------------------------------------------------------------- /feature_engineering_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import re\n", 11 | "import copy\n", 12 | "import time\n", 13 | "import lief\n", 14 | "import json\n", 15 | "from tqdm import tqdm\n", 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import lightgbm as lgb\n", 19 | "import matplotlib.pyplot as plt\n", 20 | "import seaborn as sns\n", 21 | "import pickle\n", 22 | "\n", 23 | "from sklearn.model_selection import StratifiedKFold\n", 24 | "\n", 25 | "from sklearn import metrics\n", 26 | "from sklearn.metrics import accuracy_score\n", 27 | "from sklearn.metrics import classification_report\n", 28 | "\n", 29 | "%config InlineBackend.figure_format = 'svg'\n", 30 | "%matplotlib inline" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "with open(\"/home/jovyan/feature_engineering/feature_engineering_features.pkl\", 'rb') as f:\n", 40 | " feature_engineering_features = pickle.load(f)\n", 41 | "with open(\"models/keys.pkl\", 'rb') as f:\n", 42 | " keys = pickle.load(f)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "with open(\"/home/datacon/malware/XXX/black.txt\", 'r') as f:\n", 52 | " black_list = f.read().strip().split()\n", 53 | "\n", 54 | "with open(\"/home/datacon/malware/XXX/white.txt\", 'r') as f:\n", 55 | " white_list = f.read().strip().split()\n", 56 | "\n", 57 | "with open(\"models/hash_list.pkl\", 'rb') as f:\n", 58 | " hash_list = pickle.load(f)\n", 59 | "\n", 60 | "train_features = []\n", 61 | "for ha in hash_list:\n", 62 | " if ha in black_list:\n", 63 | " train_features.append(1)\n", 64 | " else:\n", 65 | " train_features.append(0)\n", 66 | "\n", 67 | "train_features = np.array(train_features, dtype=np.int32)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": [ 78 | "(11647,)" 79 | ] 80 | }, 81 | "execution_count": 4, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "train_features.shape" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "train_df = pd.DataFrame(feature_engineering_features, columns=keys)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 13, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "fold 1\n", 109 | "Training until validation scores don't improve for 50 rounds\n", 110 | "[500]\ttraining's binary_logloss: 0.0622826\tvalid_1's binary_logloss: 0.0692178\n", 111 | "[1000]\ttraining's binary_logloss: 0.0380324\tvalid_1's binary_logloss: 0.054276\n", 112 | "[1500]\ttraining's binary_logloss: 0.026614\tvalid_1's binary_logloss: 0.0492136\n", 113 | "[2000]\ttraining's binary_logloss: 0.0193443\tvalid_1's binary_logloss: 0.046569\n", 114 | "[2500]\ttraining's binary_logloss: 0.0148735\tvalid_1's binary_logloss: 0.045438\n", 115 | "Early stopping, best iteration is:\n", 116 | "[2740]\ttraining's binary_logloss: 0.0132744\tvalid_1's binary_logloss: 0.0452006\n", 117 | "Finished loading model, total used 2740 iterations\n", 118 | "fold 2\n", 119 | "Training until validation scores don't improve for 50 rounds\n", 120 | "[500]\ttraining's binary_logloss: 0.0592194\tvalid_1's binary_logloss: 0.0770158\n", 121 | "[1000]\ttraining's binary_logloss: 0.0367177\tvalid_1's binary_logloss: 0.0628496\n", 122 | "[1500]\ttraining's binary_logloss: 0.0256375\tvalid_1's binary_logloss: 0.0581123\n", 123 | "[2000]\ttraining's binary_logloss: 0.0187458\tvalid_1's binary_logloss: 0.0554952\n", 124 | "[2500]\ttraining's binary_logloss: 0.0146185\tvalid_1's binary_logloss: 0.054573\n", 125 | "Early stopping, best iteration is:\n", 126 | "[2668]\ttraining's binary_logloss: 0.0135243\tvalid_1's binary_logloss: 0.0543697\n", 127 | "Finished loading model, total used 2668 iterations\n", 128 | "fold 3\n", 129 | "Training until validation scores don't improve for 50 rounds\n", 130 | "[500]\ttraining's binary_logloss: 0.0589963\tvalid_1's binary_logloss: 0.0808982\n", 131 | "[1000]\ttraining's binary_logloss: 0.034963\tvalid_1's binary_logloss: 0.0696653\n", 132 | "Early stopping, best iteration is:\n", 133 | "[1294]\ttraining's binary_logloss: 0.0276091\tvalid_1's binary_logloss: 0.0679925\n", 134 | "Finished loading model, total used 1294 iterations\n", 135 | "fold 4\n", 136 | "Training until validation scores don't improve for 50 rounds\n", 137 | "[500]\ttraining's binary_logloss: 0.0627042\tvalid_1's binary_logloss: 0.0657594\n", 138 | "[1000]\ttraining's binary_logloss: 0.038282\tvalid_1's binary_logloss: 0.0519841\n", 139 | "[1500]\ttraining's binary_logloss: 0.0273375\tvalid_1's binary_logloss: 0.0475178\n", 140 | "[2000]\ttraining's binary_logloss: 0.0202554\tvalid_1's binary_logloss: 0.0454211\n", 141 | "Early stopping, best iteration is:\n", 142 | "[2088]\ttraining's binary_logloss: 0.0193356\tvalid_1's binary_logloss: 0.0451811\n", 143 | "Finished loading model, total used 2088 iterations\n", 144 | "fold 5\n", 145 | "Training until validation scores don't improve for 50 rounds\n", 146 | "[500]\ttraining's binary_logloss: 0.0620011\tvalid_1's binary_logloss: 0.0753361\n", 147 | "[1000]\ttraining's binary_logloss: 0.038365\tvalid_1's binary_logloss: 0.0619157\n", 148 | "[1500]\ttraining's binary_logloss: 0.0269653\tvalid_1's binary_logloss: 0.0578915\n", 149 | "Early stopping, best iteration is:\n", 150 | "[1689]\ttraining's binary_logloss: 0.0238503\tvalid_1's binary_logloss: 0.0570982\n", 151 | "Finished loading model, total used 1689 iterations\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "params = {'num_leaves': 20,\n", 157 | " 'min_data_in_leaf': 1,\n", 158 | " 'objective': 'binary', #定义的目标函数\n", 159 | " 'max_depth': 4,\n", 160 | " 'learning_rate': 0.01,\n", 161 | " \"min_sum_hessian_in_leaf\": 4,\n", 162 | " \"boosting\": \"gbdt\",\n", 163 | " \"feature_fraction\": 0.9, #提取的特征比率\n", 164 | " \"bagging_freq\": 1,\n", 165 | " \"bagging_fraction\": 0.9,\n", 166 | " \"bagging_seed\": 11,\n", 167 | " \"nthread\": 10,\n", 168 | " 'metric': {'binary_logloss'}, \n", 169 | " \"random_state\": 6666,\n", 170 | "}\n", 171 | "\n", 172 | "n_splits = 5\n", 173 | "\n", 174 | "kf = StratifiedKFold(n_splits=n_splits, random_state=2200, shuffle=True)\n", 175 | "\n", 176 | "prob_oof = np.zeros((len(train_features), ))\n", 177 | "\n", 178 | "feature_importance_df = pd.DataFrame()\n", 179 | "\n", 180 | "lgb_models = []\n", 181 | "\n", 182 | "for fold_idx, (train_index, test_index) in enumerate(kf.split(train_df, train_features)):\n", 183 | " print(\"fold {}\".format(fold_idx+1))\n", 184 | " trn_data = lgb.Dataset(train_df.iloc[train_index], label=train_features[train_index])\n", 185 | " val_data = lgb.Dataset(train_df.iloc[test_index], label=train_features[test_index])\n", 186 | "\n", 187 | " lgb_model = lgb.train(params,\n", 188 | " trn_data,\n", 189 | " 3000,\n", 190 | " valid_sets=[trn_data, val_data],\n", 191 | " early_stopping_rounds=50,\n", 192 | " verbose_eval=500)\n", 193 | " prob_oof[test_index] = lgb_model.predict(train_df.iloc[test_index], num_iteration=lgb_model.best_iteration)\n", 194 | "\n", 195 | " lgb_models.append(copy.deepcopy(lgb_model))\n", 196 | " fold_importance_df = pd.DataFrame()\n", 197 | " fold_importance_df[\"Feature\"] = keys\n", 198 | " fold_importance_df[\"importance\"] = lgb_model.feature_importance()\n", 199 | " fold_importance_df[\"fold\"] = fold_idx + 1\n", 200 | " feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 14, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/html": [ 211 | "
\n", 212 | "\n", 225 | "\n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | "
Featureimportancefold
33entr_X11661
33entr_X10892
34size_R_weight10321
10size_X_weight10162
10size_X_weight9961
............
26pe_mean01
2mz_mean04
2mz_mean02
26pe_mean03
2mz_mean03
\n", 303 | "

280 rows × 3 columns

\n", 304 | "
" 305 | ], 306 | "text/plain": [ 307 | " Feature importance fold\n", 308 | "33 entr_X 1166 1\n", 309 | "33 entr_X 1089 2\n", 310 | "34 size_R_weight 1032 1\n", 311 | "10 size_X_weight 1016 2\n", 312 | "10 size_X_weight 996 1\n", 313 | ".. ... ... ...\n", 314 | "26 pe_mean 0 1\n", 315 | "2 mz_mean 0 4\n", 316 | "2 mz_mean 0 2\n", 317 | "26 pe_mean 0 3\n", 318 | "2 mz_mean 0 3\n", 319 | "\n", 320 | "[280 rows x 3 columns]" 321 | ] 322 | }, 323 | "execution_count": 14, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "feature_importance_df.sort_values(by=\"importance\", ascending=False) # .to_csv(\"importance.csv\")" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 15, 335 | "metadata": {}, 336 | "outputs": [ 337 | { 338 | "data": { 339 | "text/plain": [ 340 | "3775" 341 | ] 342 | }, 343 | "execution_count": 15, 344 | "metadata": {}, 345 | "output_type": "execute_result" 346 | } 347 | ], 348 | "source": [ 349 | "sum([0 if i < 0.5 else 1 for i in prob_oof])" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 16, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "# with open(\"oof/feature_engineerin_train.pkl\", \"wb\") as fp:\n", 359 | "# pickle.dump(prob_oof.reshape((len(train_features), 1)), fp)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 17, 365 | "metadata": {}, 366 | "outputs": [], 367 | "source": [ 368 | "with open(\"models/lgb_models.pkl\", \"wb\") as fp:\n", 369 | " pickle.dump(lgb_models, fp)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [] 378 | } 379 | ], 380 | "metadata": { 381 | "kernelspec": { 382 | "display_name": "yuri", 383 | "language": "python", 384 | "name": "yuri" 385 | }, 386 | "language_info": { 387 | "codemirror_mode": { 388 | "name": "ipython", 389 | "version": 3 390 | }, 391 | "file_extension": ".py", 392 | "mimetype": "text/x-python", 393 | "name": "python", 394 | "nbconvert_exporter": "python", 395 | "pygments_lexer": "ipython3", 396 | "version": "3.6.11" 397 | } 398 | }, 399 | "nbformat": 4, 400 | "nbformat_minor": 4 401 | } -------------------------------------------------------------------------------- /data/av.json: -------------------------------------------------------------------------------- 1 | { 2 | "360tray.exe": "360安全卫士-实时保护", 3 | "360safe.exe": "360安全卫士-主程序", 4 | "ZhuDongFangYu.exe": "360安全卫士-主动防御", 5 | "360sd.exe": "360杀毒", 6 | "a2guard.exe": "a-squared杀毒", 7 | "ad-watch.exe": "Lavasoft杀毒", 8 | "cleaner8.exe": "The Cleaner杀毒", 9 | "vba32lder.exe": "vb32杀毒", 10 | "MongoosaGUI.exe": "Mongoosa杀毒", 11 | "CorantiControlCenter32.exe": "Coranti2012杀毒", 12 | "F-PROT.exe": "F-Prot AntiVirus", 13 | "CMCTrayIcon.exe": "CMC杀毒", 14 | "K7TSecurity.exe": "K7杀毒", 15 | "UnThreat.exe": "UnThreat杀毒", 16 | "CKSoftShiedAntivirus4.exe": "Shield Antivirus杀毒", 17 | "AVWatchService.exe": "VIRUSfighter杀毒", 18 | "ArcaTasksService.exe": "ArcaVir杀毒", 19 | "iptray.exe": "Immunet杀毒", 20 | "PSafeSysTray.exe": "PSafe杀毒", 21 | "nspupsvc.exe": "nProtect杀毒", 22 | "SpywareTerminatorShield.exe": "SpywareTerminator反间谍软件", 23 | "BKavService.exe": "Bkav杀毒", 24 | "MsMpEng.exe": "Windows Defender", 25 | "SBAMSvc.exe": "VIPRE", 26 | "ccSvcHst.exe": "Norton杀毒", 27 | "f-secure.exe": "冰岛", 28 | "avp.exe": "Kaspersky", 29 | "KvMonXP.exe": "江民杀毒", 30 | "RavMonD.exe": "瑞星杀毒", 31 | "Mcshield.exe": "McAfee", 32 | "Tbmon.exe": "McAfee", 33 | "Frameworkservice.exe": "McAfee", 34 | "egui.exe": "ESET NOD32", 35 | "ekrn.exe": "ESET NOD32", 36 | "eguiProxy.exe": "ESET NOD32", 37 | "kxetray.exe": "金山毒霸", 38 | "knsdtray.exe": "可牛杀毒", 39 | "TMBMSRV.exe": "趋势杀毒", 40 | "avcenter.exe": "Avira(小红伞)", 41 | "avguard.exe": "Avira(小红伞)", 42 | "avgnt.exe": "Avira(小红伞)", 43 | "sched.exe": "Avira(小红伞)", 44 | "ashDisp.exe": "Avast网络安全", 45 | "rtvscan.exe": "诺顿杀毒", 46 | "ccapp.exe": "SymantecNorton", 47 | "NPFMntor.exe": "Norton杀毒软件", 48 | "ccSetMgr.exe": "赛门铁克", 49 | "ccRegVfy.exe": "Norton杀毒软件", 50 | "ksafe.exe": "金山卫士", 51 | "QQPCRTP.exe": "QQ电脑管家", 52 | "avgwdsvc.exe": "AVG杀毒", 53 | "QUHLPSVC.exe": "QUICK HEAL杀毒", 54 | "mssecess.exe": "微软杀毒", 55 | "SavProgress.exe": "Sophos杀毒", 56 | "SophosUI.exe": "Sophos杀毒", 57 | "SophosFS.exe": "Sophos杀毒", 58 | "SophosHealth.exe": "Sophos杀毒", 59 | "SophosSafestore64.exe": "Sophos杀毒", 60 | "SophosCleanM.exe": "Sophos杀毒", 61 | "fsavgui.exe": "F-Secure杀毒", 62 | "vsserv.exe": "比特梵德", 63 | "remupd.exe": "熊猫卫士", 64 | "FortiTray.exe": "飞塔", 65 | "safedog.exe": "安全狗", 66 | "parmor.exe": "木马克星", 67 | "Iparmor.exe.exe": "木马克星", 68 | "beikesan.exe": "贝壳云安全", 69 | "KSWebShield.exe": "金山网盾", 70 | "TrojanHunter.exe": "木马猎手", 71 | "GG.exe": "巨盾网游安全盾", 72 | "adam.exe": "绿鹰安全精灵", 73 | "AST.exe": "超级巡警", 74 | "ananwidget.exe": "墨者安全专家", 75 | "AVK.exe": "AntiVirusKit", 76 | "avg.exe": "AVG Anti-Virus", 77 | "spidernt.exe": "Dr.web", 78 | "avgaurd.exe": "Avira Antivir", 79 | "vsmon.exe": "Zone Alarm", 80 | "cpf.exe": "Comodo", 81 | "outpost.exe": "Outpost Firewall", 82 | "rfwmain.exe": "瑞星防火墙", 83 | "kpfwtray.exe": "金山网镖", 84 | "FYFireWall.exe": "风云防火墙", 85 | "MPMon.exe": "微点主动防御", 86 | "pfw.exe": "天网防火墙", 87 | "BaiduSdSvc.exe": "百度杀毒-服务进程", 88 | "BaiduSdTray.exe": "百度杀毒-托盘进程", 89 | "BaiduSd.exe": "百度杀毒-主程序", 90 | "SafeDogGuardCenter.exe": "安全狗", 91 | "safedogupdatecenter.exe": "安全狗", 92 | "safedogguardcenter.exe": "安全狗", 93 | "SafeDogSiteIIS.exe": "安全狗", 94 | "SafeDogTray.exe": "安全狗", 95 | "SafeDogServerUI.exe": "安全狗", 96 | "D_Safe_Manage.exe": "D盾", 97 | "d_manage.exe": "D盾", 98 | "yunsuo_agent_service.exe": "云锁", 99 | "yunsuo_agent_daemon.exe": "云锁", 100 | "HwsPanel.exe": "护卫神", 101 | "hws_ui.exe": "护卫神", 102 | "hws.exe": "护卫神", 103 | "hwsd.exe": "护卫神", 104 | "HipsTray.exe": "火绒", 105 | "HipsDaemon.exe": "火绒", 106 | "wsctrl.exe": "火绒", 107 | "usysdiag.exe": "火绒", 108 | "SPHINX.exe": "SPHINX防火墙", 109 | "bddownloader.exe": "百度卫士", 110 | "baiduansvx.exe": "百度卫士-主进程", 111 | "AvastUI.exe": "Avast!5主程序", 112 | "emet_agent.exe": "EMET", 113 | "emet_service.exe": "EMET", 114 | "firesvc.exe": "McAfee", 115 | "firetray.exe": "McAfee", 116 | "hipsvc.exe": "McAfee", 117 | "mfevtps.exe": "McAfee", 118 | "mcafeefire.exe": "McAfee", 119 | "scan32.exe": "McAfee", 120 | "shstat.exe": "McAfee", 121 | "vstskmgr.exe": "McAfee", 122 | "engineserver.exe": "McAfee", 123 | "mfeann.exe": "McAfee", 124 | "mcscript.exe": "McAfee", 125 | "updaterui.exe": "McAfee", 126 | "udaterui.exe": "McAfee", 127 | "naprdmgr.exe": "McAfee", 128 | "cleanup.exe": "McAfee", 129 | "cmdagent.exe": "McAfee", 130 | "frminst.exe": "McAfee", 131 | "mcscript_inuse.exe": "McAfee", 132 | "mctray.exe": "McAfee", 133 | "_avp32.exe": "卡巴斯基", 134 | "_avpcc.exe": "卡巴斯基", 135 | "_avpm.exe": "卡巴斯基", 136 | "aAvgApi.exe": "AVG", 137 | "ackwin32.exe": "已知杀软进程,名称暂未收录", 138 | "alertsvc.exe": "Norton AntiVirus", 139 | "alogserv.exe": "McAfee VirusScan", 140 | "anti-trojan.exe": "Anti-Trojan Elite", 141 | "arr.exe": "Application Request Route", 142 | "atguard.exe": "AntiVir", 143 | "atupdater.exe": "已知杀软进程,名称暂未收录", 144 | "atwatch.exe": "Mustek", 145 | "au.exe": "NSIS", 146 | "aupdate.exe": "Symantec", 147 | "auto-protect.nav80try.exe": "已知杀软进程,名称暂未收录", 148 | "autodown.exe": "AntiVirus AutoUpdater", 149 | "avconsol.exe": "McAfee", 150 | "avgcc32.exe": "AVG", 151 | "avgctrl.exe": "AVG", 152 | "avgemc.exe": "AVG", 153 | "avgrsx.exe": "AVG", 154 | "avgserv.exe": "AVG", 155 | "avgserv9.exe": "AVG", 156 | "avgw.exe": "AVG", 157 | "avkpop.exe": "G DATA SOFTWARE AG", 158 | "avkserv.exe": "G DATA SOFTWARE AG", 159 | "avkservice.exe": "G DATA SOFTWARE AG", 160 | "avkwctl9.exe": "G DATA SOFTWARE AG", 161 | "avltmain.exe": "Panda Software Aplication", 162 | "avnt.exe": "H+BEDV Datentechnik GmbH", 163 | "avp32.exe": "Kaspersky Anti-Virus", 164 | "avpcc.exe": " Kaspersky AntiVirus", 165 | "avpdos32.exe": " Kaspersky AntiVirus", 166 | "avpm.exe": " Kaspersky AntiVirus", 167 | "avptc32.exe": " Kaspersky AntiVirus", 168 | "avpupd.exe": " Kaspersky AntiVirus", 169 | "avsynmgr.exe": "McAfee", 170 | "avwin.exe": " H+BEDV", 171 | "bargains.exe": "Exact Advertising SpyWare", 172 | "beagle.exe": "Avast", 173 | "blackd.exe": "BlackICE", 174 | "blackice.exe": "BlackICE", 175 | "blink.exe": "micromedia", 176 | "blss.exe": "CBlaster", 177 | "bootwarn.exe": "Symantec", 178 | "bpc.exe": "Grokster", 179 | "brasil.exe": "Exact Advertising", 180 | "ccevtmgr.exe": "Norton Internet Security", 181 | "cdp.exe": "CyberLink Corp.", 182 | "cfd.exe": "Motive Communications", 183 | "cfgwiz.exe": " Norton AntiVirus", 184 | "claw95.exe": "已知杀软进程,名称暂未收录", 185 | "claw95cf.exe": "已知杀软进程,名称暂未收录", 186 | "clean.exe": "windows流氓软件清理大师", 187 | "cleaner.exe": "windows流氓软件清理大师", 188 | "cleaner3.exe": "windows流氓软件清理大师", 189 | "cleanpc.exe": "windows流氓软件清理大师", 190 | "cpd.exe": "McAfee", 191 | "ctrl.exe": "已知杀软进程,名称暂未收录", 192 | "cv.exe": "已知杀软进程,名称暂未收录", 193 | "defalert.exe": "Symantec", 194 | "defscangui.exe": "Symantec", 195 | "defwatch.exe": "Norton Antivirus", 196 | "doors.exe": "已知杀软进程,名称暂未收录", 197 | "dpf.exe": "已知杀软进程,名称暂未收录", 198 | "dpps2.exe": "PanicWare", 199 | "dssagent.exe": "Broderbund", 200 | "ecengine.exe": "已知杀软进程,名称暂未收录", 201 | "emsw.exe": "Alset Inc", 202 | "ent.exe": "已知杀软进程,名称暂未收录", 203 | "espwatch.exe": "已知杀软进程,名称暂未收录", 204 | "ethereal.exe": "RationalClearCase", 205 | "exe.avxw.exe": "已知杀软进程,名称暂未收录", 206 | "expert.exe": "已知杀软进程,名称暂未收录", 207 | "f-prot95.exe": "已知杀软进程,名称暂未收录", 208 | "fameh32.exe": "F-Secure", 209 | "fast.exe": " FastUsr", 210 | "fch32.exe": "F-Secure", 211 | "fih32.exe": "F-Secure", 212 | "findviru.exe": "F-Secure", 213 | "firewall.exe": "AshampooSoftware", 214 | "fnrb32.exe": "F-Secure", 215 | "fp-win.exe": " F-Prot Antivirus OnDemand", 216 | "fsaa.exe": "F-Secure", 217 | "fsav.exe": "F-Secure", 218 | "fsav32.exe": "F-Secure", 219 | "fsav530stbyb.exe": "F-Secure", 220 | "fsav530wtbyb.exe": "F-Secure", 221 | "fsav95.exe": "F-Secure", 222 | "fsgk32.exe": "F-Secure", 223 | "fsm32.exe": "F-Secure", 224 | "fsma32.exe": "F-Secure", 225 | "fsmb32.exe": "F-Secure", 226 | "gbmenu.exe": "已知杀软进程,名称暂未收录", 227 | "guard.exe": "ewido", 228 | "guarddog.exe": "ewido", 229 | "htlog.exe": "已知杀软进程,名称暂未收录", 230 | "htpatch.exe": "Silicon Integrated Systems Corporation", 231 | "hwpe.exe": "已知杀软进程,名称暂未收录", 232 | "iamapp.exe": "Symantec", 233 | "iamserv.exe": "Symantec", 234 | "iamstats.exe": "Symantec", 235 | "iedriver.exe": " Urlblaze.com", 236 | "iface.exe": "Panda Antivirus Module", 237 | "infus.exe": "Infus Dialer", 238 | "infwin.exe": "Msviewparasite", 239 | "intdel.exe": "Inet Delivery", 240 | "intren.exe": "已知杀软进程,名称暂未收录", 241 | "jammer.exe": "已知杀软进程,名称暂未收录", 242 | "kavpf.exe": "Kapersky", 243 | "kazza.exe": "Kapersky", 244 | "keenvalue.exe": "EUNIVERSE INC", 245 | "launcher.exe": "Intercort Systems", 246 | "ldpro.exe": "已知杀软进程,名称暂未收录", 247 | "ldscan.exe": "Windows Trojans Inspector", 248 | "localnet.exe": "已知杀软进程,名称暂未收录", 249 | "luall.exe": "Symantec", 250 | "luau.exe": "Symantec", 251 | "lucomserver.exe": "Norton", 252 | "mcagent.exe": "McAfee", 253 | "mcmnhdlr.exe": "McAfee", 254 | "mctool.exe": "McAfee", 255 | "mcupdate.exe": "McAfee", 256 | "mcvsrte.exe": "McAfee", 257 | "mcvsshld.exe": "McAfee", 258 | "mfin32.exe": "MyFreeInternetUpdate", 259 | "mfw2en.exe": "MyFreeInternetUpdate", 260 | "mfweng3.02d30.exe": "MyFreeInternetUpdate", 261 | "mgavrtcl.exe": "McAfee", 262 | "mgavrte.exe": "McAfee", 263 | "mghtml.exe": "McAfee", 264 | "mgui.exe": "BullGuard", 265 | "minilog.exe": "Zone Labs Inc", 266 | "mmod.exe": "EzulaInc", 267 | "mostat.exe": "WurldMediaInc", 268 | "mpfagent.exe": "McAfee", 269 | "mpfservice.exe": "McAfee", 270 | "mpftray.exe": "McAfee", 271 | "mscache.exe": "Integrated Search Technologies Spyware", 272 | "mscman.exe": "OdysseusMarketingInc", 273 | "msmgt.exe": "Total Velocity Spyware", 274 | "msvxd.exe": "W32/Datom-A", 275 | "mwatch.exe": "已知杀软进程,名称暂未收录", 276 | "nav.exe": "Reuters Limited", 277 | "navapsvc.exe": "Norton AntiVirus", 278 | "navapw32.exe": "Norton AntiVirus", 279 | "navw32.exe": "Norton Antivirus", 280 | "ndd32.exe": "诺顿磁盘医生", 281 | "neowatchlog.exe": "已知杀软进程,名称暂未收录", 282 | "netutils.exe": "已知杀软进程,名称暂未收录", 283 | "nisserv.exe": "Norton", 284 | "nisum.exe": "Norton", 285 | "nmain.exe": "Norton", 286 | "nod32.exe": "ESET Smart Security", 287 | "norton_internet_secu_3.0_407.exe": "已知杀软进程,名称暂未收录", 288 | "notstart.exe": "已知杀软进程,名称暂未收录", 289 | "nprotect.exe": "Symantec", 290 | "npscheck.exe": "Norton", 291 | "npssvc.exe": "Norton", 292 | "ntrtscan.exe": "趋势反病毒应用程序", 293 | "nui.exe": "已知杀软进程,名称暂未收录", 294 | "otfix.exe": "已知杀软进程,名称暂未收录", 295 | "outpostinstall.exe": "Outpost", 296 | "patch.exe": "趋势科技", 297 | "pavw.exe": "已知杀软进程,名称暂未收录", 298 | "pcscan.exe": "趋势科技", 299 | "pdsetup.exe": "已知杀软进程,名称暂未收录", 300 | "persfw.exe": "Tiny Personal Firewall", 301 | "pgmonitr.exe": "PromulGate SpyWare", 302 | "pingscan.exe": "已知杀软进程,名称暂未收录", 303 | "platin.exe": "已知杀软进程,名称暂未收录", 304 | "pop3trap.exe": "PC-cillin", 305 | "poproxy.exe": "NortonAntiVirus", 306 | "popscan.exe": "已知杀软进程,名称暂未收录", 307 | "powerscan.exe": "Integrated Search Technologies", 308 | "ppinupdt.exe": "已知杀软进程,名称暂未收录", 309 | "pptbc.exe": "已知杀软进程,名称暂未收录", 310 | "ppvstop.exe": "已知杀软进程,名称暂未收录", 311 | "prizesurfer.exe": "Prizesurfer", 312 | "prmt.exe": "OpiStat", 313 | "prmvr.exe": "Adtomi", 314 | "processmonitor.exe": "Sysinternals", 315 | "proport.exe": "已知杀软进程,名称暂未收录", 316 | "protectx.exe": "ProtectX", 317 | "pspf.exe": "已知杀软进程,名称暂未收录", 318 | "purge.exe": "已知杀软进程,名称暂未收录", 319 | "qconsole.exe": "Norton AntiVirus Quarantine Console", 320 | "qserver.exe": "Norton Internet Security", 321 | "rapapp.exe": "BlackICE", 322 | "rb32.exe": "RapidBlaster", 323 | "rcsync.exe": "PrizeSurfer", 324 | "realmon.exe": "Realmon ", 325 | "rescue.exe": "已知杀软进程,名称暂未收录", 326 | "rescue32.exe": "卡巴斯基互联网安全套装", 327 | "rshell.exe": "已知杀软进程,名称暂未收录", 328 | "rtvscn95.exe": "Real-time virus scanner ", 329 | "rulaunch.exe": "McAfee User Interface", 330 | "run32dll.exe": "PAL PC Spy", 331 | "safeweb.exe": "PSafe Tecnologia", 332 | "sbserv.exe": "Norton Antivirus", 333 | "scrscan.exe": "360杀毒", 334 | "sfc.exe": "System file checker", 335 | "sh.exe": "MKS Toolkit for Win3", 336 | "showbehind.exe": "MicroSmarts Enterprise Component ", 337 | "soap.exe": "System Soap Pro", 338 | "sofi.exe": "已知杀软进程,名称暂未收录", 339 | "sperm.exe": "已知杀软进程,名称暂未收录", 340 | "supporter5.exe": "eScorcher反病毒", 341 | "symproxysvc.exe": "Symantec", 342 | "symtray.exe": "Symantec", 343 | "tbscan.exe": "ThunderBYTE", 344 | "tc.exe": "TimeCalende", 345 | "titanin.exe": "TitanHide", 346 | "tvmd.exe": "Total Velocity", 347 | "tvtmd.exe": " Total Velocity", 348 | "vettray.exe": "eTrust", 349 | "vir-help.exe": "已知杀软进程,名称暂未收录", 350 | "vnpc3000.exe": "已知杀软进程,名称暂未收录", 351 | "vpc32.exe": "Symantec", 352 | "vpc42.exe": "Symantec", 353 | "vshwin32.exe": "McAfee", 354 | "vsmain.exe": "McAfee", 355 | "vsstat.exe": "McAfee", 356 | "wfindv32.exe": "已知杀软进程,名称暂未收录", 357 | "zapro.exe": "Zone Alarm", 358 | "zonealarm.exe": "Zone Alarm", 359 | "AVPM.exe": "Kaspersky", 360 | "A2CMD.exe": "Emsisoft Anti-Malware", 361 | "A2SERVICE.exe": "a-squared free", 362 | "A2FREE.exe": "a-squared Free", 363 | "ADVCHK.exe": "Norton AntiVirus", 364 | "AGB.exe": "安天防线", 365 | "AHPROCMONSERVER.exe": "安天防线", 366 | "AIRDEFENSE.exe": "AirDefense", 367 | "ALERTSVC.exe": "Norton AntiVirus", 368 | "AVIRA.exe": "小红伞杀毒", 369 | "AMON.exe": "Tiny Personal Firewall", 370 | "AVZ.exe": "AVZ", 371 | "ANTIVIR.exe": "已知杀软进程,名称暂未收录", 372 | "APVXDWIN.exe": "熊猫卫士", 373 | "ASHMAISV.exe": "Alwil", 374 | "ASHSERV.exe": "Avast Anti-virus", 375 | "ASHSIMPL.exe": "AVAST!VirusCleaner", 376 | "ASHWEBSV.exe": "Avast", 377 | "ASWUPDSV.exe": "Avast", 378 | "ASWSCAN.exe": "Avast", 379 | "AVCIMAN.exe": "熊猫卫士", 380 | "AVCONSOL.exe": "McAfee", 381 | "AVENGINE.exe": "熊猫卫士", 382 | "AVESVC.exe": "Avira AntiVir Security Service", 383 | "AVEVL32.exe": "已知杀软进程,名称暂未收录", 384 | "AVGAM.exe": "AVG", 385 | "AVGCC.exe": "AVG", 386 | "AVGCHSVX.exe": "AVG", 387 | "AVGCSRVX": "AVG", 388 | "AVGNSX.exe": "AVG", 389 | "AVGCC32.exe": "AVG", 390 | "AVGCTRL.exe": "AVG", 391 | "AVGEMC.exe": "AVG", 392 | "AVGFWSRV.exe": "AVG", 393 | "AVGNTMGR.exe": "AVG", 394 | "AVGSERV.exe": "AVG", 395 | "AVGTRAY.exe": "AVG", 396 | "AVGUPSVC.exe": "AVG", 397 | "AVINITNT.exe": "Command AntiVirus for NT Server", 398 | "AVPCC.exe": "Kaspersky", 399 | "AVSERVER.exe": "Kerio MailServer", 400 | "AVSCHED32.exe": "H+BEDV", 401 | "AVSYNMGR.exe": "McAfee", 402 | "AVWUPSRV.exe": "H+BEDV", 403 | "BDSWITCH.exe": "BitDefender Module", 404 | "BLACKD.exe": "BlackICE", 405 | "CCEVTMGR.exe": "Symantec", 406 | "CFP.exe": "COMODO", 407 | "CLAMWIN.exe": "ClamWin Portable", 408 | "CUREIT.exe": "DrWeb CureIT", 409 | "DEFWATCH.exe": "Norton Antivirus", 410 | "DRWADINS.exe": "Dr.Web", 411 | "DRWEB.exe": "Dr.Web", 412 | "DEFENDERDAEMON.exe": "ShadowDefender", 413 | "EWIDOCTRL.exe": "Ewido Security Suite", 414 | "EZANTIVIRUSREGISTRATIONCHECK.exe": "e-Trust Antivirus", 415 | "FIREWALL.exe": "AshampooSoftware", 416 | "FPROTTRAY.exe": "F-PROT Antivirus", 417 | "FPWIN.exe": "Verizon", 418 | "FRESHCLAM.exe": "ClamAV", 419 | "FSAV32.exe": "F-Secure", 420 | "FSBWSYS.exe": "F-secure", 421 | "FSDFWD.exe": "F-Secure", 422 | "FSGK32.exe": "F-Secure", 423 | "FSGK32ST.exe": "F-Secure", 424 | "FSMA32.exe": "F-Secure", 425 | "FSMB32.exe": "F-Secure", 426 | "FSSM32.exe": "F-Secure", 427 | "GUARDGUI.exe": "网游保镖", 428 | "GUARDNT.exe": "IKARUS", 429 | "IAMAPP.exe": "Symantec", 430 | "INOCIT.exe": "eTrust", 431 | "INORPC.exe": "eTrust", 432 | "INORT.exe": "eTrust", 433 | "INOTASK.exe": "eTrust", 434 | "INOUPTNG.exe": "eTrust", 435 | "ISAFE.exe": "eTrust", 436 | "KAV.exe": "Kaspersky", 437 | "KAVMM.exe": "Kaspersky", 438 | "KAVPF.exe": "Kaspersky", 439 | "KAVPFW.exe": "Kaspersky", 440 | "KAVSTART.exe": "Kaspersky", 441 | "KAVSVC.exe": "Kaspersky", 442 | "KAVSVCUI.exe": "Kaspersky", 443 | "KMAILMON.exe": "金山毒霸", 444 | "MCAGENT.exe": "McAfee", 445 | "MCMNHDLR.exe": "McAfee", 446 | "MCREGWIZ.exe": "McAfee", 447 | "MCUPDATE.exe": "McAfee", 448 | "MCVSSHLD.exe": "McAfee", 449 | "MINILOG.exe": "Zone Alarm", 450 | "MYAGTSVC.exe": "McAfee", 451 | "MYAGTTRY.exe": "McAfee", 452 | "NAVAPSVC.exe": "Norton", 453 | "NAVAPW32.exe": "Norton", 454 | "NAVLU32.exe": "Norton", 455 | "NAVW32.exe": "Norton Antivirus", 456 | "NEOWATCHLOG.exe": "NeoWatch", 457 | "NEOWATCHTRAY.exe": "NeoWatch", 458 | "NISSERV.exe": "Norton", 459 | "NISUM.exe": "Norton", 460 | "NMAIN.exe": "Norton", 461 | "NOD32.exe": "ESET NOD32", 462 | "NPFMSG.exe": "Norman个人防火墙", 463 | "NPROTECT.exe": "Symantec", 464 | "NSMDTR.exe": "Norton", 465 | "NTRTSCAN.exe": "趋势科技", 466 | "OFCPFWSVC.exe": "OfficeScanNT", 467 | "ONLINENT.exe": "已知杀软进程,名称暂未收录", 468 | "OP_MON.exe": " OutpostFirewall", 469 | "PAVFIRES.exe": "熊猫卫士", 470 | "PAVFNSVR.exe": "熊猫卫士", 471 | "PAVKRE.exe": "熊猫卫士", 472 | "PAVPROT.exe": "熊猫卫士", 473 | "PAVPROXY.exe": "熊猫卫士", 474 | "PAVPRSRV.exe": "熊猫卫士", 475 | "PAVSRV51.exe": "熊猫卫士", 476 | "PAVSS.exe": "熊猫卫士", 477 | "PCCGUIDE.exe": "PC-cillin", 478 | "PCCIOMON.exe": "PC-cillin", 479 | "PCCNTMON.exe": "PC-cillin", 480 | "PCCPFW.exe": "趋势科技", 481 | "PCCTLCOM.exe": "趋势科技", 482 | "PCTAV.exe": "PC Tools AntiVirus", 483 | "PERSFW.exe": "Tiny Personal Firewall", 484 | "PERVAC.exe": "已知杀软进程,名称暂未收录", 485 | "PESTPATROL.exe": "Ikarus", 486 | "PREVSRV.exe": "熊猫卫士", 487 | "RTVSCN95.exe": "Real-time Virus Scanner", 488 | "SAVADMINSERVICE.exe": "SAV", 489 | "SAVMAIN.exe": "SAV", 490 | "SAVSCAN.exe": "SAV", 491 | "SDHELP.exe": "Spyware Doctor", 492 | "SHSTAT.exe": "McAfee", 493 | "SPBBCSVC.exe": "Symantec", 494 | "SPIDERCPL.exe": "Dr.Web", 495 | "SPIDERML.exe": "Dr.Web", 496 | "SPIDERUI.exe": "Dr.Web", 497 | "SPYBOTSD.exe": "Spybot ", 498 | "SWAGENT.exe": "SonicWALL", 499 | "SWDOCTOR.exe": "SonicWALL", 500 | "SWNETSUP.exe": "Sophos", 501 | "SYMLCSVC.exe": "Symantec", 502 | "SYMPROXYSVC.exe": "Symantec", 503 | "SYMSPORT.exe": "Sysmantec", 504 | "SYMWSC.exe": "Sysmantec", 505 | "SYNMGR.exe": "Sysmantec", 506 | "TMLISTEN.exe": "趋势科技", 507 | "TMNTSRV.exe": "趋势科技", 508 | "TMPROXY.exe": "趋势科技", 509 | "TNBUTIL.exe": "Anti-Virus", 510 | "VBA32ECM.exe": "已知杀软进程,名称暂未收录", 511 | "VBA32IFS.exe": "已知杀软进程,名称暂未收录", 512 | "VBA32PP3.exe": "已知杀软进程,名称暂未收录", 513 | "VCRMON.exe": "VirusChaser", 514 | "VRMONNT.exe": "HAURI", 515 | "VRMONSVC.exe": "HAURI", 516 | "VSHWIN32.exe": "McAfee", 517 | "VSSTAT.exe": "McAfee", 518 | "XCOMMSVR.exe": "BitDefender", 519 | "ZONEALARM.exe": "Zone Alarm", 520 | "360rp.exe": "360杀毒", 521 | "afwServ.exe": " Avast Antivirus ", 522 | "safeboxTray.exe": "360杀毒", 523 | "360safebox.exe": "360杀毒", 524 | "QQPCTray.exe": "QQ电脑管家", 525 | "KSafeTray.exe": "金山毒霸", 526 | "KSafeSvc.exe": "金山毒霸", 527 | "KWatch.exe": "金山毒霸", 528 | "gov_defence_service.exe": "云锁", 529 | "gov_defence_daemon.exe": "云锁", 530 | "smartscreen.exe": "Windows Defender", 531 | "macompatsvc.exe": "McAfee", 532 | "mcamnsvc.exe ": "McAfee", 533 | "masvc.exe": "McAfee", 534 | "mfemms.exe": "McAfee", 535 | "mfevtps.exe": "McAfee", 536 | "mctary.exe": "McAfee", 537 | "mcshield.exe": "McAfee", 538 | "mfewc.exe": "McAfee", 539 | "mfewch.exe": "McAfee", 540 | "mfefw.exe": "McAfee", 541 | "mfefire.exe": "McAfee", 542 | "mfetp.exe": "McAfee", 543 | "mfecanary.exe": "McAfee", 544 | "mfeconsole.exe": "McAfee", 545 | "mfeesp.exe": "McAfee", 546 | "fcag.exe": "McAfee", 547 | "fcags.exe": "McAfee", 548 | "fcagswd.exe": "McAfee", 549 | "fcagate.exe": "McAfee" 550 | } -------------------------------------------------------------------------------- /raw_features.py: -------------------------------------------------------------------------------- 1 | import re 2 | import lief 3 | import hashlib 4 | import numpy as np 5 | from sklearn.feature_extraction import FeatureHasher 6 | 7 | 8 | class FeatureType(object): 9 | ''' Base class from which each feature type may inherit ''' 10 | 11 | name = '' 12 | dim = 0 13 | 14 | def __repr__(self): 15 | return '{}({})'.format(self.name, self.dim) 16 | 17 | def raw_features(self, bytez, lief_binary): 18 | ''' Generate a JSON-able representation of the file ''' 19 | raise (NotImplemented) 20 | 21 | def process_raw_features(self, raw_obj): 22 | ''' Generate a feature vector from the raw features ''' 23 | raise (NotImplemented) 24 | 25 | def feature_vector(self, bytez, lief_binary): 26 | ''' Directly calculate the feature vector from the sample itself. This should only be implemented differently 27 | if there are significant speedups to be gained from combining the two functions. ''' 28 | return self.process_raw_features(self.raw_features(bytez, lief_binary)) 29 | 30 | 31 | class ByteHistogram(FeatureType): 32 | ''' Byte histogram (count + non-normalized) over the entire binary file ''' 33 | 34 | name = 'histogram' 35 | dim = 256 36 | 37 | def __init__(self): 38 | super(FeatureType, self).__init__() 39 | 40 | def raw_features(self, bytez, lief_binary): 41 | counts = np.bincount(np.frombuffer(bytez, dtype=np.uint8), minlength=256) 42 | return counts.tolist() 43 | 44 | 45 | def process_raw_features(self, raw_obj): 46 | counts = np.array(raw_obj, dtype=np.float32) 47 | sum_ = counts.sum() 48 | normalized = counts / sum_ 49 | return normalized 50 | 51 | 52 | class ByteEntropyHistogram(FeatureType): 53 | ''' 2d byte/entropy histogram based loosely on (Saxe and Berlin, 2015). 54 | This roughly approximates the joint probability of byte value and local entropy. 55 | See Section 2.1.1 in https://arxiv.org/pdf/1508.03096.pdf for more info. 56 | ''' 57 | 58 | name = 'byteentropy' 59 | dim = 256 60 | 61 | def __init__(self, step=1024, window=2048): 62 | super(FeatureType, self).__init__() 63 | self.window = window 64 | self.step = step 65 | 66 | 67 | def _entropy_bin_counts(self, block): 68 | # coarse histogram, 16 bytes per bin 69 | c = np.bincount(block >> 4, minlength=16) # 16-bin histogram 70 | p = c.astype(np.float32) / self.window 71 | wh = np.where(c)[0] 72 | H = np.sum(-p[wh] * np.log2( 73 | p[wh])) * 2 # * x2 b.c. we reduced information by half: 256 bins (8 bits) to 16 bins (4 bits) 74 | 75 | Hbin = int(H * 2) # up to 16 bins (max entropy is 8 bits) 76 | if Hbin == 16: # handle entropy = 8.0 bits 77 | Hbin = 15 78 | 79 | return Hbin, c 80 | 81 | 82 | def raw_features(self, bytez, lief_binary): 83 | output = np.zeros((16, 16), dtype=np.int) 84 | a = np.frombuffer(bytez, dtype=np.uint8) 85 | if a.shape[0] < self.window: 86 | Hbin, c = self._entropy_bin_counts(a) 87 | output[Hbin, :] += c 88 | else: 89 | # strided trick from here: http://www.rigtorp.se/2011/01/01/rolling-statistics-numpy.html 90 | shape = a.shape[:-1] + (a.shape[-1] - self.window + 1, self.window) 91 | strides = a.strides + (a.strides[-1],) 92 | blocks = np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)[::self.step, :] 93 | 94 | # from the blocks, compute histogram 95 | for block in blocks: 96 | Hbin, c = self._entropy_bin_counts(block) 97 | output[Hbin, :] += c 98 | 99 | return output.flatten().tolist() 100 | 101 | 102 | def process_raw_features(self, raw_obj): 103 | counts = np.array(raw_obj, dtype=np.float32) 104 | sum_ = counts.sum() 105 | normalized = counts / sum_ 106 | return normalized 107 | 108 | 109 | class SectionInfo(FeatureType): 110 | ''' Information about section names, sizes and entropy. Uses hashing trick 111 | to summarize all this section info into a feature vector. 112 | ''' 113 | 114 | name = 'section' 115 | dim = 5 + 50 + 50 + 50 + 50 + 50 116 | 117 | def __init__(self): 118 | super(FeatureType, self).__init__() 119 | 120 | @staticmethod 121 | def _properties(s): 122 | return [str(c).split('.')[-1] for c in s.characteristics_lists] 123 | 124 | def raw_features(self, bytez, lief_binary): 125 | if lief_binary is None: 126 | return {"entry": "", "sections": []} 127 | 128 | # properties of entry point, or if invalid, the first executable section 129 | try: 130 | entry_section = lief_binary.section_from_offset(lief_binary.entrypoint).name 131 | except lief.not_found: 132 | # bad entry point, let's find the first executable section 133 | entry_section = "" 134 | for s in lief_binary.sections: 135 | if lief.PE.SECTION_CHARACTERISTICS.MEM_EXECUTE in s.characteristics_lists: 136 | entry_section = s.name 137 | break 138 | 139 | raw_obj = {"entry": entry_section} 140 | raw_obj["sections"] = [{ 141 | 'name': s.name, 142 | 'size': s.size, 143 | 'entropy': s.entropy, 144 | 'vsize': s.virtual_size, 145 | 'props': self._properties(s) 146 | } for s in lief_binary.sections] 147 | return raw_obj 148 | 149 | def process_raw_features(self, raw_obj): 150 | sections = raw_obj['sections'] 151 | general = [ 152 | len(sections), # total number of sections 153 | # number of sections with nonzero size 154 | sum(1 for s in sections if s['size'] == 0), 155 | # number of sections with an empty name 156 | sum(1 for s in sections if s['name'] == ""), 157 | # number of RX 158 | sum(1 for s in sections if 'MEM_READ' in s['props'] and 'MEM_EXECUTE' in s['props']), 159 | # number of W 160 | sum(1 for s in sections if 'MEM_WRITE' in s['props']) 161 | ] 162 | # gross characteristics of each section 163 | section_sizes = [(s['name'], s['size']) for s in sections] 164 | section_sizes_hashed = FeatureHasher(50, input_type="pair").transform([section_sizes]).toarray()[0] 165 | section_entropy = [(s['name'], s['entropy']) for s in sections] 166 | section_entropy_hashed = FeatureHasher(50, input_type="pair").transform([section_entropy]).toarray()[0] 167 | section_vsize = [(s['name'], s['vsize']) for s in sections] 168 | section_vsize_hashed = FeatureHasher(50, input_type="pair").transform([section_vsize]).toarray()[0] 169 | entry_name_hashed = FeatureHasher(50, input_type="string").transform([raw_obj['entry']]).toarray()[0] 170 | characteristics = [p for s in sections for p in s['props'] if s['name'] == raw_obj['entry']] 171 | characteristics_hashed = FeatureHasher(50, input_type="string").transform([characteristics]).toarray()[0] 172 | 173 | return np.hstack([ 174 | general, section_sizes_hashed, section_entropy_hashed, section_vsize_hashed, entry_name_hashed, 175 | characteristics_hashed 176 | ]).astype(np.float32) 177 | 178 | 179 | class ImportsInfo(FeatureType): 180 | ''' Information about imported libraries and functions from the 181 | import address table. Note that the total number of imported 182 | functions is contained in GeneralFileInfo. 183 | ''' 184 | 185 | name = 'imports' 186 | dim = 1280 187 | 188 | def __init__(self): 189 | super(FeatureType, self).__init__() 190 | 191 | def raw_features(self, bytez, lief_binary): 192 | imports = {} 193 | if lief_binary is None: 194 | return imports 195 | 196 | for lib in lief_binary.imports: 197 | if lib.name not in imports: 198 | imports[lib.name] = [] # libraries can be duplicated in listing, extend instead of overwrite 199 | 200 | # Clipping assumes there are diminishing returns on the discriminatory power of imported functions 201 | # beyond the first 10000 characters, and this will help limit the dataset size 202 | imports[lib.name].extend([entry.name[:10000] for entry in lib.entries]) 203 | 204 | return imports 205 | 206 | def process_raw_features(self, raw_obj): 207 | # unique libraries 208 | libraries = list(set([l.lower() for l in raw_obj.keys()])) 209 | libraries_hashed = FeatureHasher(256, input_type="string").transform([libraries]).toarray()[0] 210 | 211 | # A string like "kernel32.dll:CreateFileMappingA" for each imported function 212 | imports = [lib.lower() + ':' + e for lib, elist in raw_obj.items() for e in elist] 213 | imports_hashed = FeatureHasher(1024, input_type="string").transform([imports]).toarray()[0] 214 | 215 | # Two separate elements: libraries (alone) and fully-qualified names of imported functions 216 | return np.hstack([libraries_hashed, imports_hashed]).astype(np.float32) 217 | 218 | 219 | class ExportsInfo(FeatureType): 220 | ''' Information about exported functions. Note that the total number of exported 221 | functions is contained in GeneralFileInfo. 222 | ''' 223 | 224 | name = 'exports' 225 | dim = 128 226 | 227 | def __init__(self): 228 | super(FeatureType, self).__init__() 229 | 230 | def raw_features(self, bytez, lief_binary): 231 | if lief_binary is None: 232 | return [] 233 | 234 | clipped_exports = [export.name for export in lief_binary.exported_functions] 235 | 236 | return clipped_exports 237 | 238 | def process_raw_features(self, raw_obj): 239 | exports_hashed = FeatureHasher(128, input_type="string").transform([raw_obj]).toarray()[0] 240 | return exports_hashed.astype(np.float32) 241 | 242 | 243 | class GeneralFileInfo(FeatureType): 244 | ''' General information about the file ''' 245 | 246 | name = 'general' 247 | dim = 10 248 | 249 | def __init__(self): 250 | super(FeatureType, self).__init__() 251 | 252 | def raw_features(self, bytez, lief_binary): 253 | if lief_binary is None: 254 | return { 255 | 'size': len(bytez), 256 | 'vsize': 0, 257 | 'has_debug': 0, 258 | 'exports': 0, 259 | 'imports': 0, 260 | 'has_relocations': 0, 261 | 'has_resources': 0, 262 | 'has_signature': 0, 263 | 'has_tls': 0, 264 | 'symbols': 0 265 | } 266 | 267 | return { 268 | 'size': len(bytez), 269 | 'vsize': lief_binary.virtual_size, 270 | 'has_debug': int(lief_binary.has_debug), 271 | 'exports': len(lief_binary.exported_functions), 272 | 'imports': len(lief_binary.imported_functions), 273 | 'has_relocations': int(lief_binary.has_relocations), 274 | 'has_resources': int(lief_binary.has_resources), 275 | 'has_signature': int(lief_binary.has_signature), 276 | 'has_tls': int(lief_binary.has_tls), 277 | 'symbols': len(lief_binary.symbols), 278 | } 279 | 280 | def process_raw_features(self, raw_obj): 281 | return np.asarray( 282 | [ 283 | raw_obj['size'], raw_obj['vsize'], raw_obj['has_debug'], raw_obj['exports'], raw_obj['imports'], 284 | raw_obj['has_relocations'], raw_obj['has_resources'], raw_obj['has_signature'], raw_obj['has_tls'], 285 | raw_obj['symbols'] 286 | ], 287 | dtype=np.float32) 288 | 289 | 290 | class HeaderFileInfo(FeatureType): 291 | ''' Machine, architecure, OS, linker and other information extracted from header ''' 292 | 293 | name = 'header' 294 | dim = 62 295 | 296 | def __init__(self): 297 | super(FeatureType, self).__init__() 298 | 299 | def raw_features(self, bytez, lief_binary): 300 | raw_obj = {} 301 | raw_obj['coff'] = {'timestamp': 0, 'machine': "", 'characteristics': []} 302 | raw_obj['optional'] = { 303 | 'subsystem': "", 304 | 'dll_characteristics': [], 305 | 'magic': "", 306 | 'major_image_version': 0, 307 | 'minor_image_version': 0, 308 | 'major_linker_version': 0, 309 | 'minor_linker_version': 0, 310 | 'major_operating_system_version': 0, 311 | 'minor_operating_system_version': 0, 312 | 'major_subsystem_version': 0, 313 | 'minor_subsystem_version': 0, 314 | 'sizeof_code': 0, 315 | 'sizeof_headers': 0, 316 | 'sizeof_heap_commit': 0 317 | } 318 | if lief_binary is None: 319 | return raw_obj 320 | 321 | raw_obj['coff']['timestamp'] = lief_binary.header.time_date_stamps 322 | raw_obj['coff']['machine'] = str(lief_binary.header.machine).split('.')[-1] 323 | raw_obj['coff']['characteristics'] = [str(c).split('.')[-1] for c in lief_binary.header.characteristics_list] 324 | raw_obj['optional']['subsystem'] = str(lief_binary.optional_header.subsystem).split('.')[-1] 325 | raw_obj['optional']['dll_characteristics'] = [ 326 | str(c).split('.')[-1] for c in lief_binary.optional_header.dll_characteristics_lists 327 | ] 328 | raw_obj['optional']['magic'] = str(lief_binary.optional_header.magic).split('.')[-1] 329 | raw_obj['optional']['major_image_version'] = lief_binary.optional_header.major_image_version 330 | raw_obj['optional']['minor_image_version'] = lief_binary.optional_header.minor_image_version 331 | raw_obj['optional']['major_linker_version'] = lief_binary.optional_header.major_linker_version 332 | raw_obj['optional']['minor_linker_version'] = lief_binary.optional_header.minor_linker_version 333 | raw_obj['optional'][ 334 | 'major_operating_system_version'] = lief_binary.optional_header.major_operating_system_version 335 | raw_obj['optional'][ 336 | 'minor_operating_system_version'] = lief_binary.optional_header.minor_operating_system_version 337 | raw_obj['optional']['major_subsystem_version'] = lief_binary.optional_header.major_subsystem_version 338 | raw_obj['optional']['minor_subsystem_version'] = lief_binary.optional_header.minor_subsystem_version 339 | raw_obj['optional']['sizeof_code'] = lief_binary.optional_header.sizeof_code 340 | raw_obj['optional']['sizeof_headers'] = lief_binary.optional_header.sizeof_headers 341 | raw_obj['optional']['sizeof_heap_commit'] = lief_binary.optional_header.sizeof_heap_commit 342 | return raw_obj 343 | 344 | def process_raw_features(self, raw_obj): 345 | return np.hstack([ 346 | raw_obj['coff']['timestamp'], 347 | FeatureHasher(10, input_type="string").transform([[raw_obj['coff']['machine']]]).toarray()[0], 348 | FeatureHasher(10, input_type="string").transform([raw_obj['coff']['characteristics']]).toarray()[0], 349 | FeatureHasher(10, input_type="string").transform([[raw_obj['optional']['subsystem']]]).toarray()[0], 350 | FeatureHasher(10, input_type="string").transform([raw_obj['optional']['dll_characteristics']]).toarray()[0], 351 | FeatureHasher(10, input_type="string").transform([[raw_obj['optional']['magic']]]).toarray()[0], 352 | raw_obj['optional']['major_image_version'], 353 | raw_obj['optional']['minor_image_version'], 354 | raw_obj['optional']['major_linker_version'], 355 | raw_obj['optional']['minor_linker_version'], 356 | raw_obj['optional']['major_operating_system_version'], 357 | raw_obj['optional']['minor_operating_system_version'], 358 | raw_obj['optional']['major_subsystem_version'], 359 | raw_obj['optional']['minor_subsystem_version'], 360 | raw_obj['optional']['sizeof_code'], 361 | raw_obj['optional']['sizeof_headers'], 362 | raw_obj['optional']['sizeof_heap_commit'], 363 | ]).astype(np.float32) 364 | 365 | 366 | class StringExtractor(FeatureType): 367 | ''' Extracts strings from raw byte stream ''' 368 | 369 | name = 'strings' 370 | dim = 1 + 1 + 1 + 96 + 1 + 1 + 1 + 1 + 1 371 | 372 | def __init__(self): 373 | super(FeatureType, self).__init__() 374 | # all consecutive runs of 0x20 - 0x7f that are 5+ characters 375 | self._allstrings = re.compile(b'[\x20-\x7f]{5,}') 376 | # occurances of the string 'C:\'. Not actually extracting the path 377 | self._paths = re.compile(b'c:\\\\', re.IGNORECASE) 378 | # occurances of http:// or https://. Not actually extracting the URLs 379 | self._urls = re.compile(b'https?://', re.IGNORECASE) 380 | # occurances of the string prefix HKEY_. No actually extracting registry names 381 | self._registry = re.compile(b'HKEY_') 382 | # crude evidence of an MZ header (dropper?) somewhere in the byte stream 383 | self._mz = re.compile(b'MZ') 384 | 385 | def raw_features(self, bytez, lief_binary): 386 | allstrings = self._allstrings.findall(bytez) 387 | if allstrings: 388 | # statistics about strings: 389 | string_lengths = [len(s) for s in allstrings] 390 | avlength = sum(string_lengths) / len(string_lengths) 391 | # map printable characters 0x20 - 0x7f to an int array consisting of 0-95, inclusive 392 | as_shifted_string = [b - ord(b'\x20') for b in b''.join(allstrings)] 393 | c = np.bincount(as_shifted_string, minlength=96) # histogram count 394 | # distribution of characters in printable strings 395 | csum = c.sum() 396 | p = c.astype(np.float32) / csum 397 | wh = np.where(c)[0] 398 | H = np.sum(-p[wh] * np.log2(p[wh])) # entropy 399 | else: 400 | avlength = 0 401 | c = np.zeros((96,), dtype=np.float32) 402 | H = 0 403 | csum = 0 404 | 405 | return { 406 | 'numstrings': len(allstrings), 407 | 'avlength': avlength, 408 | 'printabledist': c.tolist(), # store non-normalized histogram 409 | 'printables': int(csum), 410 | 'entropy': float(H), 411 | 'paths': len(self._paths.findall(bytez)), 412 | 'urls': len(self._urls.findall(bytez)), 413 | 'registry': len(self._registry.findall(bytez)), 414 | 'MZ': len(self._mz.findall(bytez)) 415 | } 416 | 417 | def process_raw_features(self, raw_obj): 418 | hist_divisor = float(raw_obj['printables']) if raw_obj['printables'] > 0 else 1.0 419 | return np.hstack([ 420 | raw_obj['numstrings'], raw_obj['avlength'], raw_obj['printables'], 421 | np.asarray(raw_obj['printabledist']) / hist_divisor, raw_obj['entropy'], raw_obj['paths'], raw_obj['urls'], 422 | raw_obj['registry'], raw_obj['MZ'] 423 | ]).astype(np.float32) 424 | 425 | 426 | class PEFeatureExtractor(object): 427 | ''' Extract useful features from a PE file, and return as a vector of fixed size. ''' 428 | 429 | features = [ 430 | ByteHistogram(), ByteEntropyHistogram(), GeneralFileInfo(), 431 | HeaderFileInfo(), ExportsInfo(), SectionInfo() 432 | ] # ImportsInfo(), StringExtractor() 433 | dim = sum([fe.dim for fe in features]) 434 | 435 | def raw_features(self, bytez): 436 | try: 437 | lief_binary = lief.PE.parse(list(bytez)) 438 | except (lief.bad_format, lief.bad_file, lief.pe_error, lief.parser_error, RuntimeError) as e: 439 | print("lief error: ", str(e)) 440 | lief_binary = None 441 | except Exception: # everything else (KeyboardInterrupt, SystemExit, ValueError): 442 | raise 443 | 444 | features = {"sha256": hashlib.sha256(bytez).hexdigest()} 445 | features.update({fe.name: fe.raw_features(bytez, lief_binary) for fe in self.features}) 446 | return features 447 | 448 | def process_raw_features(self, raw_obj): 449 | feature_vectors = [fe.process_raw_features(raw_obj[fe.name]) for fe in self.features] 450 | return np.hstack(feature_vectors).astype(np.float32) 451 | 452 | def feature_vector(self, bytez): 453 | return self.process_raw_features(self.raw_features(bytez)) -------------------------------------------------------------------------------- /data/OPCODE.txt: -------------------------------------------------------------------------------- 1 | aaa 2 | aad 3 | aam 4 | aas 5 | fabs 6 | adc 7 | adcx 8 | add 9 | addpd 10 | addps 11 | addsd 12 | addss 13 | addsubpd 14 | addsubps 15 | fadd 16 | fiadd 17 | faddp 18 | adox 19 | aesdeclast 20 | aesdec 21 | aesenclast 22 | aesenc 23 | aesimc 24 | aeskeygenassist 25 | and 26 | andn 27 | andnpd 28 | andnps 29 | andpd 30 | andps 31 | arpl 32 | bextr 33 | blcfill 34 | blci 35 | blcic 36 | blcmsk 37 | blcs 38 | blendpd 39 | blendps 40 | blendvpd 41 | blendvps 42 | blsfill 43 | blsi 44 | blsic 45 | blsmsk 46 | blsr 47 | bound 48 | bsf 49 | bsr 50 | bswap 51 | bt 52 | btc 53 | btr 54 | bts 55 | bzhi 56 | call 57 | cbw 58 | cdq 59 | cdqe 60 | fchs 61 | clac 62 | clc 63 | cld 64 | clflush 65 | clflushopt 66 | clgi 67 | cli 68 | clts 69 | clwb 70 | cmc 71 | cmova 72 | cmovae 73 | cmovb 74 | cmovbe 75 | fcmovbe 76 | fcmovb 77 | cmove 78 | fcmove 79 | cmovg 80 | cmovge 81 | cmovl 82 | cmovle 83 | fcmovnbe 84 | fcmovnb 85 | cmovne 86 | fcmovne 87 | cmovno 88 | cmovnp 89 | fcmovnu 90 | cmovns 91 | cmovo 92 | cmovp 93 | fcmovu 94 | cmovs 95 | cmp 96 | cmpsb 97 | cmpsq 98 | cmpsw 99 | cmpxchg16b 100 | cmpxchg 101 | cmpxchg8b 102 | comisd 103 | comiss 104 | fcomp 105 | fcomip 106 | fcomi 107 | fcom 108 | fcos 109 | cpuid 110 | cqo 111 | crc32 112 | cvtdq2pd 113 | cvtdq2ps 114 | cvtpd2dq 115 | cvtpd2ps 116 | cvtps2dq 117 | cvtps2pd 118 | cvtsd2si 119 | cvtsd2ss 120 | cvtsi2sd 121 | cvtsi2ss 122 | cvtss2sd 123 | cvtss2si 124 | cvttpd2dq 125 | cvttps2dq 126 | cvttsd2si 127 | cvttss2si 128 | cwd 129 | cwde 130 | daa 131 | das 132 | data16 133 | dec 134 | div 135 | divpd 136 | divps 137 | fdivr 138 | fidivr 139 | fdivrp 140 | divsd 141 | divss 142 | fdiv 143 | fidiv 144 | fdivp 145 | dppd 146 | dpps 147 | ret 148 | encls 149 | enclu 150 | enter 151 | extractps 152 | extrq 153 | f2xm1 154 | lcall 155 | ljmp 156 | fbld 157 | fbstp 158 | fcompp 159 | fdecstp 160 | femms 161 | ffree 162 | ficom 163 | ficomp 164 | fincstp 165 | fldcw 166 | fldenv 167 | fldl2e 168 | fldl2t 169 | fldlg2 170 | fldln2 171 | fldpi 172 | fnclex 173 | fninit 174 | fnop 175 | fnstcw 176 | fnstsw 177 | fpatan 178 | fprem 179 | fprem1 180 | fptan 181 | ffreep 182 | frndint 183 | frstor 184 | fnsave 185 | fscale 186 | fsetpm 187 | fsincos 188 | fnstenv 189 | fxam 190 | fxrstor 191 | fxrstor64 192 | fxsave 193 | fxsave64 194 | fxtract 195 | fyl2x 196 | fyl2xp1 197 | movapd 198 | movaps 199 | orpd 200 | orps 201 | vmovapd 202 | vmovaps 203 | xorpd 204 | xorps 205 | getsec 206 | haddpd 207 | haddps 208 | hlt 209 | hsubpd 210 | hsubps 211 | idiv 212 | fild 213 | imul 214 | in 215 | inc 216 | insb 217 | insertps 218 | insertq 219 | insd 220 | insw 221 | int 222 | int1 223 | int3 224 | into 225 | invd 226 | invept 227 | invlpg 228 | invlpga 229 | invpcid 230 | invvpid 231 | iret 232 | iretd 233 | iretq 234 | fisttp 235 | fist 236 | fistp 237 | ucomisd 238 | ucomiss 239 | vcomisd 240 | vcomiss 241 | vcvtsd2ss 242 | vcvtsi2sd 243 | vcvtsi2ss 244 | vcvtss2sd 245 | vcvttsd2si 246 | vcvttsd2usi 247 | vcvttss2si 248 | vcvttss2usi 249 | vcvtusi2sd 250 | vcvtusi2ss 251 | vucomisd 252 | vucomiss 253 | jae 254 | ja 255 | jbe 256 | jb 257 | jcxz 258 | jecxz 259 | je 260 | jge 261 | jg 262 | jle 263 | jl 264 | jmp 265 | jne 266 | jno 267 | jnp 268 | jns 269 | jo 270 | jp 271 | jrcxz 272 | js 273 | kandb 274 | kandd 275 | kandnb 276 | kandnd 277 | kandnq 278 | kandnw 279 | kandq 280 | kandw 281 | kmovb 282 | kmovd 283 | kmovq 284 | kmovw 285 | knotb 286 | knotd 287 | knotq 288 | knotw 289 | korb 290 | kord 291 | korq 292 | kortestb 293 | kortestd 294 | kortestq 295 | kortestw 296 | korw 297 | kshiftlb 298 | kshiftld 299 | kshiftlq 300 | kshiftlw 301 | kshiftrb 302 | kshiftrd 303 | kshiftrq 304 | kshiftrw 305 | kunpckbw 306 | kxnorb 307 | kxnord 308 | kxnorq 309 | kxnorw 310 | kxorb 311 | kxord 312 | kxorq 313 | kxorw 314 | lahf 315 | lar 316 | lddqu 317 | ldmxcsr 318 | lds 319 | fldz 320 | fld1 321 | fld 322 | lea 323 | leave 324 | les 325 | lfence 326 | lfs 327 | lgdt 328 | lgs 329 | lidt 330 | lldt 331 | lmsw 332 | or 333 | sub 334 | xor 335 | lodsb 336 | lodsd 337 | lodsq 338 | lodsw 339 | loop 340 | loope 341 | loopne 342 | retf 343 | retfq 344 | lsl 345 | lss 346 | ltr 347 | xadd 348 | lzcnt 349 | maskmovdqu 350 | maxpd 351 | maxps 352 | maxsd 353 | maxss 354 | mfence 355 | minpd 356 | minps 357 | minsd 358 | minss 359 | cvtpd2pi 360 | cvtpi2pd 361 | cvtpi2ps 362 | cvtps2pi 363 | cvttpd2pi 364 | cvttps2pi 365 | emms 366 | maskmovq 367 | movd 368 | movdq2q 369 | movntq 370 | movq2dq 371 | movq 372 | pabsb 373 | pabsd 374 | pabsw 375 | packssdw 376 | packsswb 377 | packuswb 378 | paddb 379 | paddd 380 | paddq 381 | paddsb 382 | paddsw 383 | paddusb 384 | paddusw 385 | paddw 386 | palignr 387 | pandn 388 | pand 389 | pavgb 390 | pavgw 391 | pcmpeqb 392 | pcmpeqd 393 | pcmpeqw 394 | pcmpgtb 395 | pcmpgtd 396 | pcmpgtw 397 | pextrw 398 | phaddsw 399 | phaddw 400 | phaddd 401 | phsubd 402 | phsubsw 403 | phsubw 404 | pinsrw 405 | pmaddubsw 406 | pmaddwd 407 | pmaxsw 408 | pmaxub 409 | pminsw 410 | pminub 411 | pmovmskb 412 | pmulhrsw 413 | pmulhuw 414 | pmulhw 415 | pmullw 416 | pmuludq 417 | por 418 | psadbw 419 | pshufb 420 | pshufw 421 | psignb 422 | psignd 423 | psignw 424 | pslld 425 | psllq 426 | psllw 427 | psrad 428 | psraw 429 | psrld 430 | psrlq 431 | psrlw 432 | psubb 433 | psubd 434 | psubq 435 | psubsb 436 | psubsw 437 | psubusb 438 | psubusw 439 | psubw 440 | punpckhbw 441 | punpckhdq 442 | punpckhwd 443 | punpcklbw 444 | punpckldq 445 | punpcklwd 446 | pxor 447 | monitor 448 | montmul 449 | mov 450 | movabs 451 | movbe 452 | movddup 453 | movdqa 454 | movdqu 455 | movhlps 456 | movhpd 457 | movhps 458 | movlhps 459 | movlpd 460 | movlps 461 | movmskpd 462 | movmskps 463 | movntdqa 464 | movntdq 465 | movnti 466 | movntpd 467 | movntps 468 | movntsd 469 | movntss 470 | movsb 471 | movsd 472 | movshdup 473 | movsldup 474 | movsq 475 | movss 476 | movsw 477 | movsx 478 | movsxd 479 | movupd 480 | movups 481 | movzx 482 | mpsadbw 483 | mul 484 | mulpd 485 | mulps 486 | mulsd 487 | mulss 488 | mulx 489 | fmul 490 | fimul 491 | fmulp 492 | mwait 493 | neg 494 | nop 495 | not 496 | out 497 | outsb 498 | outsd 499 | outsw 500 | packusdw 501 | pause 502 | pavgusb 503 | pblendvb 504 | pblendw 505 | pclmulqdq 506 | pcmpeqq 507 | pcmpestri 508 | pcmpestrm 509 | pcmpgtq 510 | pcmpistri 511 | pcmpistrm 512 | pcommit 513 | pdep 514 | pext 515 | pextrb 516 | pextrd 517 | pextrq 518 | pf2id 519 | pf2iw 520 | pfacc 521 | pfadd 522 | pfcmpeq 523 | pfcmpge 524 | pfcmpgt 525 | pfmax 526 | pfmin 527 | pfmul 528 | pfnacc 529 | pfpnacc 530 | pfrcpit1 531 | pfrcpit2 532 | pfrcp 533 | pfrsqit1 534 | pfrsqrt 535 | pfsubr 536 | pfsub 537 | phminposuw 538 | pi2fd 539 | pi2fw 540 | pinsrb 541 | pinsrd 542 | pinsrq 543 | pmaxsb 544 | pmaxsd 545 | pmaxud 546 | pmaxuw 547 | pminsb 548 | pminsd 549 | pminud 550 | pminuw 551 | pmovsxbd 552 | pmovsxbq 553 | pmovsxbw 554 | pmovsxdq 555 | pmovsxwd 556 | pmovsxwq 557 | pmovzxbd 558 | pmovzxbq 559 | pmovzxbw 560 | pmovzxdq 561 | pmovzxwd 562 | pmovzxwq 563 | pmuldq 564 | pmulhrw 565 | pmulld 566 | pop 567 | popaw 568 | popal 569 | popcnt 570 | popf 571 | popfd 572 | popfq 573 | prefetch 574 | prefetchnta 575 | prefetcht0 576 | prefetcht1 577 | prefetcht2 578 | prefetchw 579 | pshufd 580 | pshufhw 581 | pshuflw 582 | pslldq 583 | psrldq 584 | pswapd 585 | ptest 586 | punpckhqdq 587 | punpcklqdq 588 | push 589 | pushaw 590 | pushal 591 | pushf 592 | pushfd 593 | pushfq 594 | rcl 595 | rcpps 596 | rcpss 597 | rcr 598 | rdfsbase 599 | rdgsbase 600 | rdmsr 601 | rdpmc 602 | rdrand 603 | rdseed 604 | rdtsc 605 | rdtscp 606 | rol 607 | ror 608 | rorx 609 | roundpd 610 | roundps 611 | roundsd 612 | roundss 613 | rsm 614 | rsqrtps 615 | rsqrtss 616 | sahf 617 | sal 618 | salc 619 | sar 620 | sarx 621 | sbb 622 | scasb 623 | scasd 624 | scasq 625 | scasw 626 | setae 627 | seta 628 | setbe 629 | setb 630 | sete 631 | setge 632 | setg 633 | setle 634 | setl 635 | setne 636 | setno 637 | setnp 638 | setns 639 | seto 640 | setp 641 | sets 642 | sfence 643 | sgdt 644 | sha1msg1 645 | sha1msg2 646 | sha1nexte 647 | sha1rnds4 648 | sha256msg1 649 | sha256msg2 650 | sha256rnds2 651 | shl 652 | shld 653 | shlx 654 | shr 655 | shrd 656 | shrx 657 | shufpd 658 | shufps 659 | sidt 660 | fsin 661 | skinit 662 | sldt 663 | smsw 664 | sqrtpd 665 | sqrtps 666 | sqrtsd 667 | sqrtss 668 | fsqrt 669 | stac 670 | stc 671 | std 672 | stgi 673 | sti 674 | stmxcsr 675 | stosb 676 | stosd 677 | stosq 678 | stosw 679 | str 680 | fst 681 | fstp 682 | fstpnce 683 | fxch 684 | subpd 685 | subps 686 | fsubr 687 | fisubr 688 | fsubrp 689 | subsd 690 | subss 691 | fsub 692 | fisub 693 | fsubp 694 | swapgs 695 | syscall 696 | sysenter 697 | sysexit 698 | sysret 699 | t1mskc 700 | test 701 | ud2 702 | ftst 703 | tzcnt 704 | tzmsk 705 | fucomip 706 | fucomi 707 | fucompp 708 | fucomp 709 | fucom 710 | ud2b 711 | unpckhpd 712 | unpckhps 713 | unpcklpd 714 | unpcklps 715 | vaddpd 716 | vaddps 717 | vaddsd 718 | vaddss 719 | vaddsubpd 720 | vaddsubps 721 | vaesdeclast 722 | vaesdec 723 | vaesenclast 724 | vaesenc 725 | vaesimc 726 | vaeskeygenassist 727 | valignd 728 | valignq 729 | vandnpd 730 | vandnps 731 | vandpd 732 | vandps 733 | vblendmpd 734 | vblendmps 735 | vblendpd 736 | vblendps 737 | vblendvpd 738 | vblendvps 739 | vbroadcastf128 740 | vbroadcasti32x4 741 | vbroadcasti64x4 742 | vbroadcastsd 743 | vbroadcastss 744 | vcompresspd 745 | vcompressps 746 | vcvtdq2pd 747 | vcvtdq2ps 748 | vcvtpd2dqx 749 | vcvtpd2dq 750 | vcvtpd2psx 751 | vcvtpd2ps 752 | vcvtpd2udq 753 | vcvtph2ps 754 | vcvtps2dq 755 | vcvtps2pd 756 | vcvtps2ph 757 | vcvtps2udq 758 | vcvtsd2si 759 | vcvtsd2usi 760 | vcvtss2si 761 | vcvtss2usi 762 | vcvttpd2dqx 763 | vcvttpd2dq 764 | vcvttpd2udq 765 | vcvttps2dq 766 | vcvttps2udq 767 | vcvtudq2pd 768 | vcvtudq2ps 769 | vdivpd 770 | vdivps 771 | vdivsd 772 | vdivss 773 | vdppd 774 | vdpps 775 | verr 776 | verw 777 | vexp2pd 778 | vexp2ps 779 | vexpandpd 780 | vexpandps 781 | vextractf128 782 | vextractf32x4 783 | vextractf64x4 784 | vextracti128 785 | vextracti32x4 786 | vextracti64x4 787 | vextractps 788 | vfmadd132pd 789 | vfmadd132ps 790 | vfmaddpd 791 | vfmadd213pd 792 | vfmadd231pd 793 | vfmaddps 794 | vfmadd213ps 795 | vfmadd231ps 796 | vfmaddsd 797 | vfmadd213sd 798 | vfmadd132sd 799 | vfmadd231sd 800 | vfmaddss 801 | vfmadd213ss 802 | vfmadd132ss 803 | vfmadd231ss 804 | vfmaddsub132pd 805 | vfmaddsub132ps 806 | vfmaddsubpd 807 | vfmaddsub213pd 808 | vfmaddsub231pd 809 | vfmaddsubps 810 | vfmaddsub213ps 811 | vfmaddsub231ps 812 | vfmsub132pd 813 | vfmsub132ps 814 | vfmsubadd132pd 815 | vfmsubadd132ps 816 | vfmsubaddpd 817 | vfmsubadd213pd 818 | vfmsubadd231pd 819 | vfmsubaddps 820 | vfmsubadd213ps 821 | vfmsubadd231ps 822 | vfmsubpd 823 | vfmsub213pd 824 | vfmsub231pd 825 | vfmsubps 826 | vfmsub213ps 827 | vfmsub231ps 828 | vfmsubsd 829 | vfmsub213sd 830 | vfmsub132sd 831 | vfmsub231sd 832 | vfmsubss 833 | vfmsub213ss 834 | vfmsub132ss 835 | vfmsub231ss 836 | vfnmadd132pd 837 | vfnmadd132ps 838 | vfnmaddpd 839 | vfnmadd213pd 840 | vfnmadd231pd 841 | vfnmaddps 842 | vfnmadd213ps 843 | vfnmadd231ps 844 | vfnmaddsd 845 | vfnmadd213sd 846 | vfnmadd132sd 847 | vfnmadd231sd 848 | vfnmaddss 849 | vfnmadd213ss 850 | vfnmadd132ss 851 | vfnmadd231ss 852 | vfnmsub132pd 853 | vfnmsub132ps 854 | vfnmsubpd 855 | vfnmsub213pd 856 | vfnmsub231pd 857 | vfnmsubps 858 | vfnmsub213ps 859 | vfnmsub231ps 860 | vfnmsubsd 861 | vfnmsub213sd 862 | vfnmsub132sd 863 | vfnmsub231sd 864 | vfnmsubss 865 | vfnmsub213ss 866 | vfnmsub132ss 867 | vfnmsub231ss 868 | vfrczpd 869 | vfrczps 870 | vfrczsd 871 | vfrczss 872 | vorpd 873 | vorps 874 | vxorpd 875 | vxorps 876 | vgatherdpd 877 | vgatherdps 878 | vgatherpf0dpd 879 | vgatherpf0dps 880 | vgatherpf0qpd 881 | vgatherpf0qps 882 | vgatherpf1dpd 883 | vgatherpf1dps 884 | vgatherpf1qpd 885 | vgatherpf1qps 886 | vgatherqpd 887 | vgatherqps 888 | vhaddpd 889 | vhaddps 890 | vhsubpd 891 | vhsubps 892 | vinsertf128 893 | vinsertf32x4 894 | vinsertf32x8 895 | vinsertf64x2 896 | vinsertf64x4 897 | vinserti128 898 | vinserti32x4 899 | vinserti32x8 900 | vinserti64x2 901 | vinserti64x4 902 | vinsertps 903 | vlddqu 904 | vldmxcsr 905 | vmaskmovdqu 906 | vmaskmovpd 907 | vmaskmovps 908 | vmaxpd 909 | vmaxps 910 | vmaxsd 911 | vmaxss 912 | vmcall 913 | vmclear 914 | vmfunc 915 | vminpd 916 | vminps 917 | vminsd 918 | vminss 919 | vmlaunch 920 | vmload 921 | vmmcall 922 | vmovq 923 | vmovddup 924 | vmovd 925 | vmovdqa32 926 | vmovdqa64 927 | vmovdqa 928 | vmovdqu16 929 | vmovdqu32 930 | vmovdqu64 931 | vmovdqu8 932 | vmovdqu 933 | vmovhlps 934 | vmovhpd 935 | vmovhps 936 | vmovlhps 937 | vmovlpd 938 | vmovlps 939 | vmovmskpd 940 | vmovmskps 941 | vmovntdqa 942 | vmovntdq 943 | vmovntpd 944 | vmovntps 945 | vmovsd 946 | vmovshdup 947 | vmovsldup 948 | vmovss 949 | vmovupd 950 | vmovups 951 | vmpsadbw 952 | vmptrld 953 | vmptrst 954 | vmread 955 | vmresume 956 | vmrun 957 | vmsave 958 | vmulpd 959 | vmulps 960 | vmulsd 961 | vmulss 962 | vmwrite 963 | vmxoff 964 | vmxon 965 | vpabsb 966 | vpabsd 967 | vpabsq 968 | vpabsw 969 | vpackssdw 970 | vpacksswb 971 | vpackusdw 972 | vpackuswb 973 | vpaddb 974 | vpaddd 975 | vpaddq 976 | vpaddsb 977 | vpaddsw 978 | vpaddusb 979 | vpaddusw 980 | vpaddw 981 | vpalignr 982 | vpandd 983 | vpandnd 984 | vpandnq 985 | vpandn 986 | vpandq 987 | vpand 988 | vpavgb 989 | vpavgw 990 | vpblendd 991 | vpblendmb 992 | vpblendmd 993 | vpblendmq 994 | vpblendmw 995 | vpblendvb 996 | vpblendw 997 | vpbroadcastb 998 | vpbroadcastd 999 | vpbroadcastmb2q 1000 | vpbroadcastmw2d 1001 | vpbroadcastq 1002 | vpbroadcastw 1003 | vpclmulqdq 1004 | vpcmov 1005 | vpcmpb 1006 | vpcmpd 1007 | vpcmpeqb 1008 | vpcmpeqd 1009 | vpcmpeqq 1010 | vpcmpeqw 1011 | vpcmpestri 1012 | vpcmpestrm 1013 | vpcmpgtb 1014 | vpcmpgtd 1015 | vpcmpgtq 1016 | vpcmpgtw 1017 | vpcmpistri 1018 | vpcmpistrm 1019 | vpcmpq 1020 | vpcmpub 1021 | vpcmpud 1022 | vpcmpuq 1023 | vpcmpuw 1024 | vpcmpw 1025 | vpcomb 1026 | vpcomd 1027 | vpcompressd 1028 | vpcompressq 1029 | vpcomq 1030 | vpcomub 1031 | vpcomud 1032 | vpcomuq 1033 | vpcomuw 1034 | vpcomw 1035 | vpconflictd 1036 | vpconflictq 1037 | vperm2f128 1038 | vperm2i128 1039 | vpermd 1040 | vpermi2d 1041 | vpermi2pd 1042 | vpermi2ps 1043 | vpermi2q 1044 | vpermil2pd 1045 | vpermil2ps 1046 | vpermilpd 1047 | vpermilps 1048 | vpermpd 1049 | vpermps 1050 | vpermq 1051 | vpermt2d 1052 | vpermt2pd 1053 | vpermt2ps 1054 | vpermt2q 1055 | vpexpandd 1056 | vpexpandq 1057 | vpextrb 1058 | vpextrd 1059 | vpextrq 1060 | vpextrw 1061 | vpgatherdd 1062 | vpgatherdq 1063 | vpgatherqd 1064 | vpgatherqq 1065 | vphaddbd 1066 | vphaddbq 1067 | vphaddbw 1068 | vphadddq 1069 | vphaddd 1070 | vphaddsw 1071 | vphaddubd 1072 | vphaddubq 1073 | vphaddubw 1074 | vphaddudq 1075 | vphadduwd 1076 | vphadduwq 1077 | vphaddwd 1078 | vphaddwq 1079 | vphaddw 1080 | vphminposuw 1081 | vphsubbw 1082 | vphsubdq 1083 | vphsubd 1084 | vphsubsw 1085 | vphsubwd 1086 | vphsubw 1087 | vpinsrb 1088 | vpinsrd 1089 | vpinsrq 1090 | vpinsrw 1091 | vplzcntd 1092 | vplzcntq 1093 | vpmacsdd 1094 | vpmacsdqh 1095 | vpmacsdql 1096 | vpmacssdd 1097 | vpmacssdqh 1098 | vpmacssdql 1099 | vpmacsswd 1100 | vpmacssww 1101 | vpmacswd 1102 | vpmacsww 1103 | vpmadcsswd 1104 | vpmadcswd 1105 | vpmaddubsw 1106 | vpmaddwd 1107 | vpmaskmovd 1108 | vpmaskmovq 1109 | vpmaxsb 1110 | vpmaxsd 1111 | vpmaxsq 1112 | vpmaxsw 1113 | vpmaxub 1114 | vpmaxud 1115 | vpmaxuq 1116 | vpmaxuw 1117 | vpminsb 1118 | vpminsd 1119 | vpminsq 1120 | vpminsw 1121 | vpminub 1122 | vpminud 1123 | vpminuq 1124 | vpminuw 1125 | vpmovdb 1126 | vpmovdw 1127 | vpmovm2b 1128 | vpmovm2d 1129 | vpmovm2q 1130 | vpmovm2w 1131 | vpmovmskb 1132 | vpmovqb 1133 | vpmovqd 1134 | vpmovqw 1135 | vpmovsdb 1136 | vpmovsdw 1137 | vpmovsqb 1138 | vpmovsqd 1139 | vpmovsqw 1140 | vpmovsxbd 1141 | vpmovsxbq 1142 | vpmovsxbw 1143 | vpmovsxdq 1144 | vpmovsxwd 1145 | vpmovsxwq 1146 | vpmovusdb 1147 | vpmovusdw 1148 | vpmovusqb 1149 | vpmovusqd 1150 | vpmovusqw 1151 | vpmovzxbd 1152 | vpmovzxbq 1153 | vpmovzxbw 1154 | vpmovzxdq 1155 | vpmovzxwd 1156 | vpmovzxwq 1157 | vpmuldq 1158 | vpmulhrsw 1159 | vpmulhuw 1160 | vpmulhw 1161 | vpmulld 1162 | vpmullq 1163 | vpmullw 1164 | vpmuludq 1165 | vpord 1166 | vporq 1167 | vpor 1168 | vpperm 1169 | vprotb 1170 | vprotd 1171 | vprotq 1172 | vprotw 1173 | vpsadbw 1174 | vpscatterdd 1175 | vpscatterdq 1176 | vpscatterqd 1177 | vpscatterqq 1178 | vpshab 1179 | vpshad 1180 | vpshaq 1181 | vpshaw 1182 | vpshlb 1183 | vpshld 1184 | vpshlq 1185 | vpshlw 1186 | vpshufb 1187 | vpshufd 1188 | vpshufhw 1189 | vpshuflw 1190 | vpsignb 1191 | vpsignd 1192 | vpsignw 1193 | vpslldq 1194 | vpslld 1195 | vpsllq 1196 | vpsllvd 1197 | vpsllvq 1198 | vpsllw 1199 | vpsrad 1200 | vpsraq 1201 | vpsravd 1202 | vpsravq 1203 | vpsraw 1204 | vpsrldq 1205 | vpsrld 1206 | vpsrlq 1207 | vpsrlvd 1208 | vpsrlvq 1209 | vpsrlw 1210 | vpsubb 1211 | vpsubd 1212 | vpsubq 1213 | vpsubsb 1214 | vpsubsw 1215 | vpsubusb 1216 | vpsubusw 1217 | vpsubw 1218 | vptestmd 1219 | vptestmq 1220 | vptestnmd 1221 | vptestnmq 1222 | vptest 1223 | vpunpckhbw 1224 | vpunpckhdq 1225 | vpunpckhqdq 1226 | vpunpckhwd 1227 | vpunpcklbw 1228 | vpunpckldq 1229 | vpunpcklqdq 1230 | vpunpcklwd 1231 | vpxord 1232 | vpxorq 1233 | vpxor 1234 | vrcp14pd 1235 | vrcp14ps 1236 | vrcp14sd 1237 | vrcp14ss 1238 | vrcp28pd 1239 | vrcp28ps 1240 | vrcp28sd 1241 | vrcp28ss 1242 | vrcpps 1243 | vrcpss 1244 | vrndscalepd 1245 | vrndscaleps 1246 | vrndscalesd 1247 | vrndscaless 1248 | vroundpd 1249 | vroundps 1250 | vroundsd 1251 | vroundss 1252 | vrsqrt14pd 1253 | vrsqrt14ps 1254 | vrsqrt14sd 1255 | vrsqrt14ss 1256 | vrsqrt28pd 1257 | vrsqrt28ps 1258 | vrsqrt28sd 1259 | vrsqrt28ss 1260 | vrsqrtps 1261 | vrsqrtss 1262 | vscatterdpd 1263 | vscatterdps 1264 | vscatterpf0dpd 1265 | vscatterpf0dps 1266 | vscatterpf0qpd 1267 | vscatterpf0qps 1268 | vscatterpf1dpd 1269 | vscatterpf1dps 1270 | vscatterpf1qpd 1271 | vscatterpf1qps 1272 | vscatterqpd 1273 | vscatterqps 1274 | vshufpd 1275 | vshufps 1276 | vsqrtpd 1277 | vsqrtps 1278 | vsqrtsd 1279 | vsqrtss 1280 | vstmxcsr 1281 | vsubpd 1282 | vsubps 1283 | vsubsd 1284 | vsubss 1285 | vtestpd 1286 | vtestps 1287 | vunpckhpd 1288 | vunpckhps 1289 | vunpcklpd 1290 | vunpcklps 1291 | vzeroall 1292 | vzeroupper 1293 | wait 1294 | wbinvd 1295 | wrfsbase 1296 | wrgsbase 1297 | wrmsr 1298 | xabort 1299 | xacquire 1300 | xbegin 1301 | xchg 1302 | xcryptcbc 1303 | xcryptcfb 1304 | xcryptctr 1305 | xcryptecb 1306 | xcryptofb 1307 | xend 1308 | xgetbv 1309 | xlatb 1310 | xrelease 1311 | xrstor 1312 | xrstor64 1313 | xrstors 1314 | xrstors64 1315 | xsave 1316 | xsave64 1317 | xsavec 1318 | xsavec64 1319 | xsaveopt 1320 | xsaveopt64 1321 | xsaves 1322 | xsaves64 1323 | xsetbv 1324 | xsha1 1325 | xsha256 1326 | xstore 1327 | xtest 1328 | cmpss 1329 | cmpeqss 1330 | cmpltss 1331 | cmpless 1332 | cmpunordss 1333 | cmpneqss 1334 | cmpnltss 1335 | cmpnless 1336 | cmpordss 1337 | cmpsd 1338 | cmpeqsd 1339 | cmpltsd 1340 | cmplesd 1341 | cmpunordsd 1342 | cmpneqsd 1343 | cmpnltsd 1344 | cmpnlesd 1345 | cmpordsd 1346 | cmpps 1347 | cmpeqps 1348 | cmpltps 1349 | cmpleps 1350 | cmpunordps 1351 | cmpneqps 1352 | cmpnltps 1353 | cmpnleps 1354 | cmpordps 1355 | cmppd 1356 | cmpeqpd 1357 | cmpltpd 1358 | cmplepd 1359 | cmpunordpd 1360 | cmpneqpd 1361 | cmpnltpd 1362 | cmpnlepd 1363 | cmpordpd 1364 | vcmpss 1365 | vcmpeqss 1366 | vcmpltss 1367 | vcmpless 1368 | vcmpunordss 1369 | vcmpneqss 1370 | vcmpnltss 1371 | vcmpnless 1372 | vcmpordss 1373 | vcmpngess 1374 | vcmpngtss 1375 | vcmpfalsess 1376 | vcmpgess 1377 | vcmpgtss 1378 | vcmptruess 1379 | vcmpsd 1380 | vcmpeqsd 1381 | vcmpltsd 1382 | vcmplesd 1383 | vcmpunordsd 1384 | vcmpneqsd 1385 | vcmpnltsd 1386 | vcmpnlesd 1387 | vcmpordsd 1388 | vcmpngesd 1389 | vcmpngtsd 1390 | vcmpfalsesd 1391 | vcmpgesd 1392 | vcmpgtsd 1393 | vcmptruesd 1394 | vcmpps 1395 | vcmpeqps 1396 | vcmpltps 1397 | vcmpleps 1398 | vcmpunordps 1399 | vcmpneqps 1400 | vcmpnltps 1401 | vcmpnleps 1402 | vcmpordps 1403 | vcmpngeps 1404 | vcmpngtps 1405 | vcmpfalseps 1406 | vcmpgeps 1407 | vcmpgtps 1408 | vcmptrueps 1409 | vcmppd 1410 | vcmpeqpd 1411 | vcmpltpd 1412 | vcmplepd 1413 | vcmpunordpd 1414 | vcmpneqpd 1415 | vcmpnltpd 1416 | vcmpnlepd 1417 | vcmpordpd 1418 | vcmpngepd 1419 | vcmpngtpd 1420 | vcmpfalsepd 1421 | vcmpgepd 1422 | vcmpgtpd 1423 | vcmptruepd 1424 | ud0 1425 | endbr32 1426 | endbr64 -------------------------------------------------------------------------------- /old/gray.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import numpy\n", 11 | "import time\n", 12 | "import signal\n", 13 | "import binascii\n", 14 | "import threading\n", 15 | "import subprocess\n", 16 | "from PIL import Image\n", 17 | "from tqdm import tqdm\n", 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import pickle\n", 22 | "\n", 23 | "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score\n", 24 | "from sklearn.model_selection import train_test_split\n", 25 | "\n", 26 | "import tensorflow as tf\n", 27 | "from tensorflow.keras import layers, models\n", 28 | "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", 29 | "\n", 30 | "from raw_features import ByteHistogram, ByteEntropyHistogram\n", 31 | "\n", 32 | "%matplotlib inline" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## 读取数据地址" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "data_dir = \"../data_rec_mzpe\"\n", 49 | "os.environ[\"CUDA_VISIBLE_DEVICES\"] = '0'" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 3, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# train_black_path = []\n", 59 | "# for parent, dirnames, filenames in os.walk(os.path.join(data_dir, \"train\")):\n", 60 | "# if \"black\" in parent:\n", 61 | "# for filename in filenames:\n", 62 | "# fp = os.path.join(parent, filename)\n", 63 | "# train_black_path.append(os.path.abspath(fp))\n", 64 | "\n", 65 | "# train_white_path = []\n", 66 | "# for parent, dirnames, filenames in os.walk(os.path.join(data_dir, \"train\")):\n", 67 | "# if \"white\" in parent:\n", 68 | "# for filename in filenames:\n", 69 | "# fp = os.path.join(parent, filename)\n", 70 | "# train_white_path.append(os.path.abspath(fp))\n", 71 | "\n", 72 | "# test_path = []\n", 73 | "# for parent, dirnames, filenames in os.walk(os.path.join(data_dir, \"test\")):\n", 74 | "# for filename in filenames:\n", 75 | "# fp = os.path.join(parent, filename)\n", 76 | "# test_path.append(os.path.abspath(fp))\n", 77 | "\n", 78 | "# real_path = []\n", 79 | "# for parent, dirnames, filenames in os.walk(os.path.join(data_dir, \"real\")):\n", 80 | "# if \"1_data_real\" in parent:\n", 81 | "# for filename in filenames:\n", 82 | "# fp = os.path.join(parent, filename)\n", 83 | "# real_path.append(os.path.abspath(fp))\n", 84 | "\n", 85 | "# with open(\"train_black_path.pkl\", 'wb') as f:\n", 86 | "# pickle.dump(train_black_path, f)\n", 87 | "\n", 88 | "# with open(\"train_white_path.pkl\", 'wb') as f:\n", 89 | "# pickle.dump(train_white_path, f)\n", 90 | "\n", 91 | "# with open(\"test_path.pkl\", 'wb') as f:\n", 92 | "# pickle.dump(test_path, f)\n", 93 | "\n", 94 | "# with open(\"real_path.pkl\", 'wb') as f:\n", 95 | "# pickle.dump(real_path, f)\n", 96 | "\n", 97 | "with open(\"train_black_path.pkl\", 'rb') as f:\n", 98 | " train_black_path = pickle.load(f)\n", 99 | "\n", 100 | "with open(\"train_white_path.pkl\", 'rb') as f:\n", 101 | " train_white_path = pickle.load(f)\n", 102 | "\n", 103 | "with open(\"test_path.pkl\", 'rb') as f:\n", 104 | " test_path = pickle.load(f)\n", 105 | "\n", 106 | "with open(\"real_path.pkl\", 'rb') as f:\n", 107 | " real_path = pickle.load(f)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "拷贝一份" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 4, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "# if not os.path.exists(\"gray\"):\n", 124 | "# os.makedirs(\"gray/black\")\n", 125 | "# os.makedirs(\"gray/white\")\n", 126 | "# os.makedirs(\"gray/test\")\n", 127 | "# os.makedirs(\"gray/real\")\n", 128 | "\n", 129 | "# for pt in train_black_path:\n", 130 | "# os.system(\"cp {0} {1}\".format(pt, \"gray/black/\"))\n", 131 | "# for pt in train_white_path:\n", 132 | "# os.system(\"cp {0} {1}\".format(pt, \"gray/white/\"))\n", 133 | "# for pt in test_path:\n", 134 | "# os.system(\"cp {0} {1}\".format(pt, \"gray/test/\"))\n", 135 | "# for pt in real_path[6007:]:\n", 136 | "# os.system(\"cp {0} {1}\".format(pt, \"gray/real/\"))\n", 137 | "\n", 138 | "train_black_path = [pt.replace(\"data_rec_mzpe/train/1_2000_\", \"jupyterlab/gray/\") for pt in train_black_path]\n", 139 | "train_white_path = [pt.replace(\"data_rec_mzpe/train/1_4000_\", \"jupyterlab/gray/\") for pt in train_white_path]\n", 140 | "test_path = [pt.replace(\"data_rec_mzpe/test/data\", \"jupyterlab/gray/test\") for pt in test_path]\n", 141 | "real_path = [pt.replace(\"data_rec_mzpe/real/1_data_real\", \"jupyterlab/gray/real\") for pt in real_path]\n", 142 | "real_path = [pt.replace(\"data_rec_mzpe/real/2_data_real\", \"jupyterlab/gray/real\") for pt in real_path]\n", 143 | "real_path = [pt.replace(\"data_rec_mzpe/real/3_data_real\", \"jupyterlab/gray/real\") for pt in real_path]" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "## 提取灰度图信息" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 6, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "empty = threading.Semaphore(value=20)\n", 160 | "\n", 161 | "def getMatrixfrom_bin(filename, width): #生成灰度图\n", 162 | " with open(filename, 'rb') as f:\n", 163 | " content = f.read()\n", 164 | " hexst = binascii.hexlify(content) #将二进制文件转换为十六进制字符串\n", 165 | " fh = numpy.array(\n", 166 | " [int(hexst[i:i + 2], 16) for i in range(0, len(hexst), 2)]) #按字节分割\n", 167 | " rn = int(len(fh) / width)\n", 168 | " fh = numpy.reshape(fh[:rn * width], (-1, width)) #根据设定的宽度生成矩阵\n", 169 | " fh = numpy.uint8(fh)\n", 170 | " return fh\n", 171 | "\n", 172 | "def generate_gray(fp):\n", 173 | " filename = fp\n", 174 | " try:\n", 175 | " im = Image.fromarray(getMatrixfrom_bin(filename, 512)) #转换为图像\n", 176 | " im = im.resize((512, 512))\n", 177 | " PNG = filename + '.png'\n", 178 | " im.save(PNG)\n", 179 | " except Exception as e:\n", 180 | " pass\n", 181 | " os.remove(fp)\n", 182 | "\n", 183 | " empty.release()" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 7, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "name": "stderr", 193 | "output_type": "stream", 194 | "text": [ 195 | "gray: 100%|█████████████████████████████████| 3010/3010 [41:53<00:00, 1.20it/s]\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "# with tqdm(total=6000, ncols=80, desc=\"gray\") as pbar:\n", 201 | "# for fp in train_black_path:\n", 202 | "# empty.acquire()\n", 203 | "# t = threading.Thread(target=generate_gray, args=(fp, ), daemon=True)\n", 204 | "# t.start()\n", 205 | "# pbar.update(1)\n", 206 | "# for fp in train_white_path:\n", 207 | "# empty.acquire()\n", 208 | "# t = threading.Thread(target=generate_gray, args=(fp, ), daemon=True)\n", 209 | "# t.start()\n", 210 | "# pbar.update(1)\n", 211 | "\n", 212 | "# with tqdm(total=6000, ncols=80, desc=\"gray\") as pbar:\n", 213 | "# for fp in test_path:\n", 214 | "# empty.acquire()\n", 215 | "# t = threading.Thread(target=generate_gray, args=(fp, ), daemon=True)\n", 216 | "# t.start()\n", 217 | "# pbar.update(1)\n", 218 | "\n", 219 | "with tqdm(total=len(real_path[6007:]), ncols=80, desc=\"gray\") as pbar:\n", 220 | " for fp in real_path[6007:]:\n", 221 | " empty.acquire()\n", 222 | " t = threading.Thread(target=generate_gray, args=(fp, ), daemon=True)\n", 223 | " t.start()\n", 224 | " pbar.update(1)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "## 读取直方图数据和打标" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 5, 237 | "metadata": {}, 238 | "outputs": [ 239 | { 240 | "name": "stderr", 241 | "output_type": "stream", 242 | "text": [ 243 | "gray: 100%|██████████████████████████████| 21017/21017 [01:09<00:00, 302.95it/s]\n" 244 | ] 245 | } 246 | ], 247 | "source": [ 248 | "raw_data, raw_labels = [], []\n", 249 | "\n", 250 | "test_data_feature, real_data_feature = [], []\n", 251 | "\n", 252 | "with tqdm(total=6000*2 + len(real_path), ncols=80, desc=\"gray\") as pbar:\n", 253 | " for fp in train_black_path:\n", 254 | " image = Image.open(fp+'.png')\n", 255 | " image_data = np.array(image)\n", 256 | " raw_data.append(image_data)\n", 257 | " raw_labels.append(1)\n", 258 | " pbar.update(1)\n", 259 | " for fp in train_white_path:\n", 260 | " image = Image.open(fp+'.png')\n", 261 | " image_data = np.array(image)\n", 262 | " raw_data.append(image_data)\n", 263 | " raw_labels.append(0)\n", 264 | " pbar.update(1)\n", 265 | " for fp in test_path:\n", 266 | " image = Image.open(fp+'.png')\n", 267 | " image_data = np.array(image)\n", 268 | " test_data_feature.append(image_data)\n", 269 | " pbar.update(1)\n", 270 | " for fp in real_path:\n", 271 | " image = Image.open(fp+'.png')\n", 272 | " image_data = np.array(image)\n", 273 | " real_data_feature.append(image_data)\n", 274 | " pbar.update(1)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "## 超参数" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 6, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "TRAIN_SIZE = 0.8\n", 291 | "VAL_SIZE = 0.1\n", 292 | "TEST_SIZE = 0.1\n", 293 | "SEED = 4396\n", 294 | "\n", 295 | "WIDTH, HEIGHT = 512, 512\n", 296 | "BATCH_SIZE = 32\n", 297 | "EPOCH = 300\n", 298 | "SHUFFLE = False\n", 299 | "CLASSES = 2\n", 300 | "\n", 301 | "LR = 1e-5" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "## 打乱顺序" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 7, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "np.random.seed(SEED)\n", 318 | "tf.random.set_seed(SEED)\n", 319 | "\n", 320 | "img_data, labels = np.array(raw_data, dtype=np.float32), np.array(raw_labels, dtype=np.int32)\n", 321 | "\n", 322 | "index = list(range(len(labels)))\n", 323 | "np.random.shuffle(index)\n", 324 | "\n", 325 | "img_data = img_data[index]\n", 326 | "labels = labels[index]" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "## 划分数据集" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 14, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "train_image, test_image, train_label, test_label = train_test_split(\n", 343 | " img_data,\n", 344 | " labels,\n", 345 | " test_size=TEST_SIZE,\n", 346 | " stratify=labels,\n", 347 | " random_state=SEED)\n", 348 | "train_image, valid_image, train_label, valid_label = train_test_split(\n", 349 | " train_image,\n", 350 | " train_label,\n", 351 | " test_size=VAL_SIZE,\n", 352 | " stratify=train_label,\n", 353 | " random_state=SEED)" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "## 加载Dataset" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 15, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "train_ds = tf.data.Dataset.from_tensor_slices((train_image, train_label)) \\\n", 370 | " .batch(BATCH_SIZE) \\\n", 371 | " .prefetch(buffer_size = tf.data.experimental.AUTOTUNE)\n", 372 | "\n", 373 | "valid_ds = tf.data.Dataset.from_tensor_slices((valid_image, valid_label)) \\\n", 374 | " .batch(BATCH_SIZE) \\\n", 375 | " .prefetch(buffer_size = tf.data.experimental.AUTOTUNE)\n", 376 | "\n", 377 | "test_ds = tf.data.Dataset.from_tensor_slices((test_image, test_label)) \\\n", 378 | " .batch(BATCH_SIZE) \\\n", 379 | " .prefetch(buffer_size = tf.data.experimental.AUTOTUNE)" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "## 构建模型" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 29, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "# 构建模型\n", 396 | "inputs = layers.Input(shape=(WIDTH, HEIGHT), dtype='float32')\n", 397 | "inputs_re = tf.reshape(inputs, (-1, WIDTH, HEIGHT, 1))\n", 398 | "Conv_1 = layers.Convolution2D(30, (3, 3),\n", 399 | " strides=2,\n", 400 | " padding='same',\n", 401 | " activation='relu')(inputs_re)\n", 402 | "Max_pool_1 = layers.MaxPooling2D((2, 2), strides=2)(Conv_1)\n", 403 | "Conv_2 = layers.Convolution2D(50, (3, 3),\n", 404 | " strides=2,\n", 405 | " padding='same',\n", 406 | " activation='relu')(Max_pool_1)\n", 407 | "Max_pool_2 = layers.MaxPooling2D((2, 2), strides=2)(Conv_2)\n", 408 | "Conv_3 = layers.Convolution2D(100, (3, 3),\n", 409 | " strides=2,\n", 410 | " padding='same',\n", 411 | " activation='relu')(Max_pool_2)\n", 412 | "Max_pool_3 = layers.MaxPooling2D((2, 2), strides=2)(Conv_3)\n", 413 | "Flat = layers.Flatten()(Max_pool_3)\n", 414 | "Dense_1 = layers.Dense(500, activation='tanh')(Flat)\n", 415 | "dropout = layers.Dropout(0.5)(Dense_1)\n", 416 | "outputs = layers.Dense(1, activation='sigmoid')(dropout)\n", 417 | "\n", 418 | "\n", 419 | "model = models.Model(inputs=inputs, outputs=outputs)" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 30, 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [ 428 | "# 自定义评估指标\n", 429 | "def score(y_true, y_pred):\n", 430 | " y_true = tf.cast(tf.reshape(y_true, (-1,)), tf.int32)\n", 431 | " y_pred = tf.cast(tf.math.round(tf.reshape(y_pred, (-1,))), tf.int32)\n", 432 | "\n", 433 | " tp = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 1), tf.equal(y_pred, 1)), dtype=tf.float32))\n", 434 | " fn = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 1), tf.equal(y_pred, 0)), dtype=tf.float32))\n", 435 | " fp = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 0), tf.equal(y_pred, 1)), dtype=tf.float32))\n", 436 | " tn = tf.reduce_sum(tf.cast(tf.logical_and(tf.equal(y_true, 0), tf.equal(y_pred, 0)), dtype=tf.float32))\n", 437 | "\n", 438 | " result = (tp/(tp+fn)) - 0.9*(fp/(fp+tn))\n", 439 | " \n", 440 | " return result\n", 441 | "\n", 442 | "model.compile(optimizer=tf.keras.optimizers.Nadam(LR),\n", 443 | " loss='binary_crossentropy',\n", 444 | " metrics=['accuracy'])" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 31, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "name": "stdout", 454 | "output_type": "stream", 455 | "text": [ 456 | "Model: \"model_1\"\n", 457 | "_________________________________________________________________\n", 458 | "Layer (type) Output Shape Param # \n", 459 | "=================================================================\n", 460 | "input_2 (InputLayer) [(None, 512, 512)] 0 \n", 461 | "_________________________________________________________________\n", 462 | "tf_op_layer_Reshape_1 (Tenso [(None, 512, 512, 1)] 0 \n", 463 | "_________________________________________________________________\n", 464 | "conv2d_3 (Conv2D) (None, 256, 256, 30) 300 \n", 465 | "_________________________________________________________________\n", 466 | "max_pooling2d_3 (MaxPooling2 (None, 128, 128, 30) 0 \n", 467 | "_________________________________________________________________\n", 468 | "conv2d_4 (Conv2D) (None, 64, 64, 50) 13550 \n", 469 | "_________________________________________________________________\n", 470 | "max_pooling2d_4 (MaxPooling2 (None, 32, 32, 50) 0 \n", 471 | "_________________________________________________________________\n", 472 | "conv2d_5 (Conv2D) (None, 16, 16, 100) 45100 \n", 473 | "_________________________________________________________________\n", 474 | "max_pooling2d_5 (MaxPooling2 (None, 8, 8, 100) 0 \n", 475 | "_________________________________________________________________\n", 476 | "flatten_1 (Flatten) (None, 6400) 0 \n", 477 | "_________________________________________________________________\n", 478 | "dense_2 (Dense) (None, 500) 3200500 \n", 479 | "_________________________________________________________________\n", 480 | "dropout_1 (Dropout) (None, 500) 0 \n", 481 | "_________________________________________________________________\n", 482 | "dense_3 (Dense) (None, 1) 501 \n", 483 | "=================================================================\n", 484 | "Total params: 3,259,951\n", 485 | "Trainable params: 3,259,951\n", 486 | "Non-trainable params: 0\n", 487 | "_________________________________________________________________\n" 488 | ] 489 | } 490 | ], 491 | "source": [ 492 | "model.summary()" 493 | ] 494 | }, 495 | { 496 | "cell_type": "markdown", 497 | "metadata": {}, 498 | "source": [ 499 | "## 训练" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": 32, 505 | "metadata": {}, 506 | "outputs": [ 507 | { 508 | "name": "stdout", 509 | "output_type": "stream", 510 | "text": [ 511 | "Epoch 1/300\n", 512 | "152/152 [==============================] - 9s 56ms/step - loss: 0.2638 - accuracy: 0.9187 - val_loss: 0.1581 - val_accuracy: 0.9574 - lr: 1.0000e-05\n", 513 | "Epoch 2/300\n", 514 | "152/152 [==============================] - 8s 52ms/step - loss: 0.2052 - accuracy: 0.9426 - val_loss: 0.1491 - val_accuracy: 0.9593 - lr: 1.0000e-05\n", 515 | "Epoch 3/300\n", 516 | "152/152 [==============================] - 8s 52ms/step - loss: 0.2020 - accuracy: 0.9467 - val_loss: 0.1443 - val_accuracy: 0.9593 - lr: 1.0000e-05\n", 517 | "Epoch 4/300\n", 518 | "152/152 [==============================] - 8s 54ms/step - loss: 0.1973 - accuracy: 0.9453 - val_loss: 0.1392 - val_accuracy: 0.9593 - lr: 1.0000e-05\n", 519 | "Epoch 5/300\n", 520 | "152/152 [==============================] - 8s 52ms/step - loss: 0.1793 - accuracy: 0.9473 - val_loss: 0.1317 - val_accuracy: 0.9593 - lr: 1.0000e-05\n", 521 | "Epoch 6/300\n", 522 | "152/152 [==============================] - 9s 59ms/step - loss: 0.1791 - accuracy: 0.9467 - val_loss: 0.1287 - val_accuracy: 0.9593 - lr: 1.0000e-05\n", 523 | "Epoch 7/300\n", 524 | "152/152 [==============================] - 8s 52ms/step - loss: 0.1794 - accuracy: 0.9484 - val_loss: 0.1276 - val_accuracy: 0.9593 - lr: 1.0000e-05\n", 525 | "Epoch 8/300\n", 526 | "152/152 [==============================] - 8s 53ms/step - loss: 0.1693 - accuracy: 0.9479 - val_loss: 0.1227 - val_accuracy: 0.9593 - lr: 1.0000e-05\n", 527 | "Epoch 9/300\n", 528 | "152/152 [==============================] - 9s 57ms/step - loss: 0.1706 - accuracy: 0.9506 - val_loss: 0.1225 - val_accuracy: 0.9593 - lr: 1.0000e-05\n", 529 | "Epoch 10/300\n", 530 | "152/152 [==============================] - 8s 52ms/step - loss: 0.1649 - accuracy: 0.9525 - val_loss: 0.1184 - val_accuracy: 0.9593 - lr: 1.0000e-05\n", 531 | "Epoch 11/300\n", 532 | "152/152 [==============================] - 8s 55ms/step - loss: 0.1611 - accuracy: 0.9514 - val_loss: 0.1225 - val_accuracy: 0.9611 - lr: 1.0000e-05\n", 533 | "Epoch 12/300\n", 534 | "152/152 [==============================] - 9s 57ms/step - loss: 0.1611 - accuracy: 0.9510 - val_loss: 0.1192 - val_accuracy: 0.9611 - lr: 1.0000e-05\n", 535 | "Epoch 13/300\n", 536 | "152/152 [==============================] - 8s 54ms/step - loss: 0.1603 - accuracy: 0.9539 - val_loss: 0.1198 - val_accuracy: 0.9667 - lr: 1.0000e-05\n", 537 | "Epoch 14/300\n", 538 | "152/152 [==============================] - 8s 52ms/step - loss: 0.1580 - accuracy: 0.9535 - val_loss: 0.1182 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 539 | "Epoch 15/300\n", 540 | "152/152 [==============================] - 8s 51ms/step - loss: 0.1456 - accuracy: 0.9535 - val_loss: 0.1201 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 541 | "Epoch 16/300\n", 542 | "152/152 [==============================] - 8s 53ms/step - loss: 0.1491 - accuracy: 0.9547 - val_loss: 0.1138 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 543 | "Epoch 17/300\n", 544 | "152/152 [==============================] - 8s 51ms/step - loss: 0.1364 - accuracy: 0.9570 - val_loss: 0.1191 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 545 | "Epoch 18/300\n", 546 | "152/152 [==============================] - 8s 52ms/step - loss: 0.1456 - accuracy: 0.9553 - val_loss: 0.1175 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 547 | "Epoch 19/300\n", 548 | "152/152 [==============================] - 9s 58ms/step - loss: 0.1448 - accuracy: 0.9568 - val_loss: 0.1132 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 549 | "Epoch 20/300\n", 550 | "152/152 [==============================] - 8s 54ms/step - loss: 0.1383 - accuracy: 0.9586 - val_loss: 0.1156 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 551 | "Epoch 21/300\n", 552 | "152/152 [==============================] - 8s 55ms/step - loss: 0.1339 - accuracy: 0.9553 - val_loss: 0.1117 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 553 | "Epoch 22/300\n", 554 | "152/152 [==============================] - 8s 56ms/step - loss: 0.1351 - accuracy: 0.9562 - val_loss: 0.1125 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 555 | "Epoch 23/300\n", 556 | "152/152 [==============================] - 9s 57ms/step - loss: 0.1341 - accuracy: 0.9566 - val_loss: 0.1128 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 557 | "Epoch 24/300\n", 558 | "152/152 [==============================] - 8s 54ms/step - loss: 0.1373 - accuracy: 0.9556 - val_loss: 0.1132 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 559 | "Epoch 25/300\n", 560 | "152/152 [==============================] - 8s 51ms/step - loss: 0.1273 - accuracy: 0.9605 - val_loss: 0.1127 - val_accuracy: 0.9704 - lr: 1.0000e-05\n", 561 | "Epoch 26/300\n", 562 | "152/152 [==============================] - 8s 52ms/step - loss: 0.1265 - accuracy: 0.9605 - val_loss: 0.1114 - val_accuracy: 0.9704 - lr: 5.0000e-06\n", 563 | "Epoch 27/300\n", 564 | "152/152 [==============================] - 9s 57ms/step - loss: 0.1256 - accuracy: 0.9599 - val_loss: 0.1097 - val_accuracy: 0.9704 - lr: 5.0000e-06\n", 565 | "Epoch 28/300\n", 566 | "152/152 [==============================] - 8s 50ms/step - loss: 0.1150 - accuracy: 0.9630 - val_loss: 0.1101 - val_accuracy: 0.9704 - lr: 5.0000e-06\n", 567 | "Epoch 29/300\n", 568 | "152/152 [==============================] - 8s 52ms/step - loss: 0.1170 - accuracy: 0.9593 - val_loss: 0.1106 - val_accuracy: 0.9704 - lr: 5.0000e-06\n", 569 | "Epoch 30/300\n", 570 | "152/152 [==============================] - 8s 55ms/step - loss: 0.1168 - accuracy: 0.9623 - val_loss: 0.1120 - val_accuracy: 0.9704 - lr: 5.0000e-06\n", 571 | "Epoch 31/300\n", 572 | "152/152 [==============================] - 8s 51ms/step - loss: 0.1165 - accuracy: 0.9619 - val_loss: 0.1103 - val_accuracy: 0.9704 - lr: 5.0000e-06\n", 573 | "Epoch 32/300\n", 574 | "152/152 [==============================] - 8s 52ms/step - loss: 0.1105 - accuracy: 0.9638 - val_loss: 0.1111 - val_accuracy: 0.9704 - lr: 2.5000e-06\n", 575 | "Epoch 33/300\n", 576 | "152/152 [==============================] - 8s 55ms/step - loss: 0.1097 - accuracy: 0.9621 - val_loss: 0.1128 - val_accuracy: 0.9704 - lr: 2.5000e-06\n", 577 | "Epoch 34/300\n", 578 | "152/152 [==============================] - 8s 54ms/step - loss: 0.1052 - accuracy: 0.9646 - val_loss: 0.1124 - val_accuracy: 0.9704 - lr: 2.5000e-06\n" 579 | ] 580 | } 581 | ], 582 | "source": [ 583 | "history = model.fit(\n", 584 | " train_ds,\n", 585 | " validation_data=valid_ds,\n", 586 | " # class_weight=class_weight_dict,\n", 587 | " epochs=EPOCH,\n", 588 | " workers=4,\n", 589 | " callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=7, min_delta=1e-4, mode='min'),\n", 590 | " tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=4, factor=0.5, verbose=0)])" 591 | ] 592 | }, 593 | { 594 | "cell_type": "code", 595 | "execution_count": 33, 596 | "metadata": {}, 597 | "outputs": [], 598 | "source": [ 599 | "dfhistory = pd.DataFrame(history.history)" 600 | ] 601 | }, 602 | { 603 | "cell_type": "code", 604 | "execution_count": 34, 605 | "metadata": {}, 606 | "outputs": [ 607 | { 608 | "data": { 609 | "text/plain": [ 610 | "" 611 | ] 612 | }, 613 | "execution_count": 34, 614 | "metadata": {}, 615 | "output_type": "execute_result" 616 | }, 617 | { 618 | "data": { 619 | "image/png": "\n", 620 | "text/plain": [ 621 | "
" 622 | ] 623 | }, 624 | "metadata": { 625 | "needs_background": "light" 626 | }, 627 | "output_type": "display_data" 628 | } 629 | ], 630 | "source": [ 631 | "dfhistory[['val_loss', 'loss']].plot()" 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "execution_count": 35, 637 | "metadata": {}, 638 | "outputs": [ 639 | { 640 | "data": { 641 | "text/plain": [ 642 | "" 643 | ] 644 | }, 645 | "execution_count": 35, 646 | "metadata": {}, 647 | "output_type": "execute_result" 648 | }, 649 | { 650 | "data": { 651 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXiU1fXA8e8hBMJOgLAGCCLIjkAEhLogLmARhIKCioBSpSpW/LVqaVXaYkutS3FFVBQUihSkRYqoCBIXVEB2AoiAJaxhCwkYsp3fH3cShpBlEpLMZOZ8nmeeZN73zvueGeXMzX3ve66oKsYYY4JXBX8HYIwxpnRZojfGmCBnid4YY4KcJXpjjAlyluiNMSbIVfR3AHmpV6+exsTE+DsMY4wpN9auXXtEVaPy2heQiT4mJoY1a9b4OwxjjCk3ROTH/PbZ0I0xxgQ5S/TGGBPkLNEbY0yQC8gx+rykp6eTkJBAamqqv0MxQEREBNHR0YSHh/s7FGNMIcpNok9ISKBGjRrExMQgIv4OJ6SpKkePHiUhIYEWLVr4OxxjTCHKzdBNamoqdevWtSQfAESEunXr2l9XxpQT5SbRA5bkA4j9tzCm/PBp6EZE+gFTgTDgDVWdkmt/JDADaAmkAnep6mYRuQR4z6vpRcATqvqPkgjehA5VZf7aBPYeO+3vUIwpNVUrV2TcVS1L/LiFJnoRCQNeBq4DEoDVIrJIVbd6NZsIrFfVwSLSxtO+r6puBy71Os4+YGEJvwcTAt7/bh+/nb8RAPtjwgSretUr+yfRA92Bnaq6C0BE5gKDAO9E3w74K4CqbhORGBFpoKqHvNr0BX5Q1Xzv3gom1atXJyUlxd9hBIXDyan8afFWYptHMu/ey6lQwTK9MUXhyxh9E2Cv1/MEzzZvG4AhACLSHWgOROdqMxz4Z34nEZF7RGSNiKxJTEz0ISzji4yMDH+HcMEmLdrCT+mZ/G1oJ0vyxhSDLz36vP5l5V5/cAowVUTWA5uAdUBOhhGRSsBA4Hf5nURVpwPTAWJjYwtc3/CPH2xh6/6TPoTuu3aNa/LkTe3z3f/oo4/SvHlz7rvvPgAmTZqEiBAXF8fx48dJT09n8uTJDBo0qNBzpaSkMGjQoDxfN2vWLJ555hlEhE6dOvHOO+9w6NAhxo0bx65duwB49dVXady4MQMGDGDz5s0APPPMM6SkpDBp0iSuvvpqevXqxZdffsnAgQNp3bo1kydPJi0tjbp16zJ79mwaNGhASkoK48ePZ82aNYgITz75JCdOnGDz5s08//zzALz++uvEx8fz3HPPXdDnW1xLNx9gyaaDPNLvElpGVfdLDMaUd74k+gSgqdfzaGC/dwNVPQmMARA3HWO355GtP/BdrqGccmX48OE89NBDOYl+3rx5LF26lAkTJlCzZk2OHDlCz549GThwYKEzUiIiIli4cOF5r9u6dStPPfUUX375JfXq1ePYsWMAPPjgg1x11VUsXLiQzMxMUlJSOH78eIHnOHHiBCtXrgTg+PHjfP3114gIb7zxBk8//TTPPvssf/7zn6lVqxabNm3KaVepUiU6derE008/TXh4OG+99RavvfbahX58xXLidBp/+PcW2jeuyS+vuMgvMRgTDHxJ9KuBViLSAncxdThwm3cDEakNnFbVNGAsEOdJ/tlGUMCwTVEV1PMuLV26dOHw4cPs37+fxMREIiMjadSoERMmTCAuLo4KFSqwb98+Dh06RMOGDQs8lqoyceLE8163fPlyhg4dSr169QCoU6cOAMuXL2fWrFkAhIWFUatWrUIT/a233prze0JCArfeeisHDhwgLS0t5yanZcuWMXfu3Jx2kZGRAFxzzTUsXryYtm3bkp6eTseOHYv4aZWMyf+N58TpNGbedRnhYeVqJrAxAaXQRK+qGSLyAPARbnrlDFXdIiLjPPunAW2BWSKSibtIe3f260WkKm7Gzr2lEH+ZGjp0KPPnz+fgwYMMHz6c2bNnk5iYyNq1awkPDycmJsanm4jye52q+jw/vWLFimRlZeU8z33eatWq5fw+fvx4Hn74YQYOHMhnn33GpEmTAPI939ixY/nLX/5CmzZtGDNmjE/xlLSVOxKZvzaBB/pcTPvGtfwSgzHBwqdukqouUdXWqtpSVZ/ybJvmSfKo6ipVbaWqbVR1iKoe93rtaVWtq6pJpfMWys7w4cOZO3cu8+fPZ+jQoSQlJVG/fn3Cw8NZsWIFP/7o24Si/F7Xt29f5s2bx9GjRwFyhm769u3Lq6++CkBmZiYnT56kQYMGHD58mKNHj3LmzBkWL15c4PmaNHHXz2fOnJmz/frrr+ell17KeZ79V0KPHj3Yu3cvc+bMYcSIEb5+PCUm5UwGE9/fRMuoajxwzcVlfn5jgo39PVwE7du3Jzk5mSZNmtCoUSNuv/121qxZQ2xsLLNnz6ZNmzY+HSe/17Vv357f//73XHXVVXTu3JmHH34YgKlTp7JixQo6duxIt27d2LJlC+Hh4TzxxBP06NGDAQMGFHjuSZMmMWzYMK644oqcYSGAP/zhDxw/fpwOHTrQuXNnVqxYkbPvlltuoXfv3jnDOWXp70u3sT/pJ54e2omI8LAyP78xwUZUC5zg4hexsbGae4Wp+Ph42rZt66eIQs+AAQOYMGECffv2zbdNafw3Wb3nGLe8topRl8cwaWDZX4sxprwSkbWqGpvXPuvRm3OcOHGC1q1bU6VKlQKTfGlITc/k0QUbaVK7Cr+94ZIyPbcxwazclCkujzZt2sTIkSPP2Va5cmW++eYbP0VUuNq1a7Njxw6/nPuFT79nV+Ip3rm7O9Uq2/+axpQU+9dUijp27Mj69ev9HUa5sHlfEq/F7eKW2GiuaJXnQvbGmGKyoRvjd+mZWTwyfyN1qlXi9ze283c4xgQd69Ebv1v43T62HjjJtDu6UquqLU1oTEmzHr3xK1Vlxpe7adOwBje0L/iOYmNM8ViiN361atdRth1MZkxvWwvYmNJiiT7ABENZ4aJ468s9RFYNZ9CluStfG2NKiiX6Irj55pvp1q0b7du3Z/r06QAsXbqUrl270rlz55x55ykpKYwZM4aOHTvSqVMnFixYALjFSLLNnz+f0aNHAzB69Ggefvhh+vTpw6OPPsq3335Lr1696NKlC7169WL79u2AK3/wm9/8Jue4L774Ip9++imDBw/OOe4nn3zCkCFDyuLjuGD/O3qaZfGHuK1HM7sD1phSVD4vxn74GBzcVLLHbNgR+k8psMmMGTOoU6cOP/30E5dddhmDBg3il7/8JXFxcbRo0SKnNk1e5X8Ls2PHDpYtW0ZYWBgnT54kLi6OihUrsmzZMiZOnMiCBQuYPn06u3fvZt26dVSsWJFjx44RGRnJ/fffT2JiIlFRUbz11lt+K0RWVDNX7SFMhJE9Y/wdijFBrXwmej954YUXWLjQLXm7d+9epk+fzpVXXplT9je7rHB+5X8LMmzYMMLCXK82KSmJUaNG8f333yMipKen5xx33LhxVKxY8ZzzjRw5knfffZcxY8awatWqnJLGgSzlTAbzVu+lf8dGNKwV4e9wjAlq5TPRF9LzLg2fffYZy5YtY9WqVVStWpWrr76azp075wyreMuv/K/3toLKCj/++OP06dOHhQsXsmfPHq6++uoCjztmzBhuuukmIiIiGDZsWM4XQSBbsDaB5DMZjOkd4+9QjAkMBzbAvrUQe1eJH9rG6H2UlJREZGQkVatWZdu2bXz99decOXOGlStXsnu3W0wre+gmv/K/DRo0ID4+nqysrJy/DPI7V3ZZ4bfffjtn+/XXX8+0adNyLthmn69x48Y0btyYyZMn54z7B7KsLOXtr/bQuWltujYr++qYxgScQ1tg1s3w+XNwJrnED2+J3kf9+vUjIyODTp068fjjj9OzZ0+ioqKYPn06Q4YMoXPnzjmrOuVX/nfKlCkMGDCAa665hkaNGuV7rkceeYTf/e539O7dm8zMzJztY8eOpVmzZnTq1InOnTszZ86cnH233347TZs2pV27wL+zdOWORHYfOcVd1ps3Bg5vg5kDoWJlGLUIKtco8VNYmeIg8cADD9ClSxfuvvvuwhuXkOL+Nxn55jdsP5jMF49eQ6WK1tcw5dz+9VC3ZfES9JGd8PaN7vfR/4V6rYodhpUpDnLdunVj48aN3HHHHf4OpVDfH0rm8++PMLJnc0vypnzLSIMlv4XpV8ErvWD350V7/bFdMPMmyMqEOxddUJIvTOBftTOFWrt2rb9D8NlbX+2hUsUK3Najmb9DMab4TuyFf42GfWug652w50uYOQB6/AqufRLCqxTy+v+54ZqMVBi9GOr7tjpdcZWrRF+UxbNN6SrOkN+J02m8/10Cgzo3pm71yqUQlQl6Z1IgvCpU8ONfgzuXwYJfQmY63DIL2g2CtFOwbBJ886rbP/g1iO6W9+uT9sHbA+DMSRj1ATQo/ZXUys3fzhERERw9erRYCcaULFXl6NGjREQUbf773NV7SU3PYkzvFqUUmQlqx3bDPzrCv3/ln/NnZcKKv8C7Q6FGI7jnM5fkASpVgxv/DiP/Dek/wZvXwfLJbnjH28kDbrjmp+MwciE06lwmoZebHn10dDQJCQkkJib6OxSD++KNjo72uX1GZhazvtpDz4vq0K5xzVKMzASltNPw3kiXIDfOhfY3wyX9y+78p47C+2Phh+XQeQT8/DmoVPX8di37wH1fubv34/4OOz5yvfsG7SDlMMwaCCmH4I73oUk+Pf5SUG4SfXh4eM4dqKb8+XjrIfYnpfKkLfhtikoV/vswHNoEt852verFE6B5L4ioVbxj7vjYDbPUbg7127kx8vrtoFq989vuXQ3/GgWnjsBNU6HrKChoCDmiFgx+FdoOgA9+7S7WXvkIbF4ASQlw+3xo1qN4cRdTuUn0pnx768vdNK1ThWvbNvB3KKa8WTMDNvwTrnrMJc+ajeCNa+Hjx2HgC0U/3uFt7kJq5eqw7ztY+9bZfdWiIMqT9Ou3hdQTsPwpd867P4LGXXw/T5ufQ9MesPghWDEZKkbAbfMgpnfRY75AluhNqdu8L4nVe47zh5+3JayCXUw3RZCwBj58FC6+Dq561G1r0g0ufwC+egE6/AIuusr346WehPfucGPq96yEGg0h+SAkxsPheDi81f1c9y6kn3Kvad0PBk+DKsW4i7taPbjlHdi+BKo3gOg8p7mXOkv0ptTN+HI3VSuFMSy2qb9DMeXJqSMw707Xmx4y/dyZNlf/DrYthg8ehF995RJ3YVThP/e5+eujFrnjgvtZsxG0vOZs26wsSNrrYmjc5cJm+Yi43r0flZtZNybwZGUpSafTC3zsOXKKxRsOMLRbNLWq2HqwxkeZGTB/jEu0t7wDVeucu79SVRj4Ehzf44ZWfPHVCxD/AVz3R4j5WcFtK1SAyOZuiqQ/p3KWEOvRm2LZlZjCfbO/Y9tB3wowjeoVU7oBmeCyYjLsjoNBL0PjS/NuE9MbYu+Gr1+B9oOh6WX5H293nJvn3m6QG/YJMZboTZEt2XSAR+ZvJDxMeLRfGyoXUsqgWZ2qtIyqXmAbY3LEfwBfPA/dRkOXQsp6XDvJTWFc9ADcG+cKg+WWtA/+NQbqXuy+OELwpkufEr2I9AOmAmHAG6o6Jdf+SGAG0BJIBe5S1c2efbWBN4AOgHr2rSqxd2DKTHpmFn9dso0ZX+7m0qa1eeX2rjSuXcit3ia4bf8Q1rxVeLvwCDcDpcWVUL99/sMhR3bCwl9B467Q/+nCjxtREwY8D3OGQdwzcM3vz92fkeamRmakwq3vlkplyPKg0EQvImHAy8B1QAKwWkQWqepWr2YTgfWqOlhE2nja9/XsmwosVdWhIlIJyOMuAxPoDiT9xANz1rH2x+OM7hXDxBvbWlGyUJdxBhY/DJlpUKuQm+dST8DW/7jfq9RxY+QtrnSPeq1dL/tMipsRExbuSgvk1TvPS+vrodOt8MVzbmimYYez+z6aCAmrYdhMiLqkeO8zCPjSo+8O7FTVXQAiMhcYBHgn+nbAXwFUdZuIxIhIA+An4EpgtGdfGpDrnmAT6L74/gi/nruO1PRMXhzRhZs6N/Z3SCYQrJ8NyfvdXZ4X9y28fdI+2PO5Gy/fHQfxi9z26g0g5gr46RgkboOR70PtIs7Q6jfF3bX6n/th7KcQVhE2zIXVr0Ov8e5O2hDmS6JvAuz1ep4A5L6tawMwBPhCRLoDzYFoIBNIBN4Skc7AWuDXqnoq90lE5B7gHoBmzayyYSDIylJeWrGT55ft4OKo6rx6Rzcurm9j7QZX0OuL592cdu9piQWp1QQ6D3cPVTdjJifxfw4pB6HvE74fz1vVOq7WzL9Gw9cvQ8u+8MFD7guk76SiHy/I+JLo87pykbuy2BRgqoisBzYB64AMIBzoCoxX1W9EZCrwGPD4eQdUnQ5MB7fwiM/vwJSK46fSmDBvPZ9tT2TQpY35y+COVKts1+6Nx8b3XKnd/n8v3sVNEajTwj263ukSf8ohdwNTcbW7GdoMcCUSVr8BVWrD0Bmudx/ifPkEEgDvv6Oigf3eDVT1JDAGQFwd4d2eR1UgQVW/8TSdj0v0fnEwKZUJ761n9Z5j/gqh3MhUJbxCBf58cwfu6NHMykObszIz4PNnoWEnaH1DyRxT5MKSfPYxfv4svNwdTu6H0Uugev2Sia+c8yXRrwZaiUgLYB8wHLjNu4FnZs1pzxj8WCDOk/xPisheEblEVbfjLtBuxQ++2nmEB+eu43RaJqN7xVA53C4kFkQQ+nVoSIcmxSwaZYLXlvfd3aW3vBN4UxVrNHTXDNJSyrxwWCArNNGraoaIPAB8hJteOUNVt4jIOM/+aUBbYJaIZOISuffCpeOB2Z4ZN7vw9PzLSlaW8urKH3j24+1cFFWdufd05eL6oTnFygQoVTcUUqEidBzq72gKlpXlpjHWb+eGSQKRn+rJBDKfBq9UdQmwJNe2aV6/rwLyXPBQVdcDfvnkT5xO4+F5G1i+7TADOzfmr0NsnNkEmLRT7qLhpnkgFdyCFn6obuiz+EVwZDv84s2gKA0QKoL2v9TGhBMMePELPv8+kT8Nas/U4ZdakjeB5cj38Hpf2PQvV5kxsgUsGAunA/Qakqrrzde92JUcMOVG0CV6VWX2Nz8y9NVVZGUp8+69nDsvj7GLiSawbH4fpl8Npw67JeX6THQzRE4fcXPBA3HJzO0fusU/rvg/qBDm72hMEQRVoj+dlsH/zdvA7xdu5vKWdfnvg1fQpVkxakgbU1oy0lx99flj3Dj3vZ+75efAFe+67k+udvm30/0bZ26qEPe0W5Gp4zB/R2OKKGjGMk6cTuPW175mx+FkJlzbmvHXXEwFW+TCBJKkfe6GnoRvocevXFKvWOncNj3Gwa7P4OM/QLPLoVEnf0R6vp2fwv51bim9MCs3Xd4ETY++VpVwLmsRycwx3fn1ta0syZsLk54Ke78tuSGUH5bDa1e4FYyGvQ39p5yf5MFNVxz0ClStC/PvcvVf/C27N18zGjrfVnh7E3CCJtGLCJNv7siVraP8HYop71JPwrtD4M3r4J/DIflQ8Y+VcQZW/BXeGQLV6sM9nxV+IbNaXRjyOhzdCR8+Uvxzl5TdcbD3G/jZQ3l/OZmAFzRDN8aUiFNHXZI/tBm6jXGLUr/SA37+HHQYUrRj7VzmxuOP7oROw2HAc74teQfQ4gq46hFY+Te4qA90usBx8bRTrmDYYc/aqEd/gKbdoeso98VSkLi/Q/WG0GXkhcVg/MYSvTHZTu6Hdwa7YlvD57jb+3veBwvvdRdPty2GG585f1m73E78D5Z61jSt0xLuWAAXX1v0eK58xBX7WjwBmnSFui0Lf42qGx46tMWz0PU29/PEj2fbVIyAmk1gx4fui6TjUOh+b97XA35c5QqP3fAXV1PelEuiATiNKzY2VtesWePvMEwoObYbZg2C00dhxFzXo86WmeEqNa6cAlXrwcAXXQ303NJT4asXXR0YEbjyN27ZOl/rquclKQFe7e2Kf931cd5DJ6pwYIMrTbBlofuiAXenbd1WUL+tm+FTv617RMa46ZGHtrrZPRvfg/TT7uJvj3vdHa/ZF1zfGQwHNsJDm9w6rSZgichaVc3z5lRL9MYc2uoSWuYZ1/tu0i3vdgc2wMJxrofcdRTc8NTZFYt2fOzG04/vhrYDXQ+4qDXV8xO/GN673X1p3OC1EPbheNi8wM3JP/aDS+wX9XG115t0c39N+DKm/tNxWDfb1W4/vgdqNIbL7nJFy+bc4pbr+9mEknkvptRYojcmP/vWwru/gLDKcOe/XY+3IBlnXBncr16AWk3huj/Cxnlu7nvdVnDj08Wrp16Y//7GJeKBL7qLw5sXQGK8K5sQc4W7ftB2YOHDSgXJyoTvP4ZvXoNdK9y2KpGuNx+iS/CVJ5bojcnL7s/drJqqdeHO/7jhEV/97xs3dn98N4RXg6t+Cz3vL71ZKemp8EZfd5EY3DBLh1+4pfNKoxRv4g5Y+zY0vczKHZQTluiNyW37Uph3p0vuIxdCzWIsj5h2yo1vt7rBrZ5U2k7sdT3u1jcUvkarCTkFJXqbdWNCS8YZt/rQJ09Aw45w+4LCpxfmp1I1iL2rZOMrSO2mcNndhbczJhdL9CY0ZKbDundd9cWTCW6649C3IKKmvyMzptRZojfBLTPDDa+s/JubSx59GQx6CS66OvBWRzKmlFiiN/53Jhk2zHUzRuq3831aYEGyMt20w5VT3J2pjTq7m51aXWcJ3oQcS/TGvzIzXEXHncvObjvnRh/PI6qtu3BaWB30rCzY9oGbApm4zX1x3Dob2vzcErwJWZbojX998rhL8jc+A816nr1l/3C8m+O+5f2zbcMqQ3iVgo+XlQlpyVCvtVvIo91gW/LOhDxL9MZ/1rwFX7/iarN3/6Xb1rDjuW3OpLg1Sg/HQ+J2N2umMNGxbo65rYJkDGCJ3vjL7jhY8hs3++X6yfm3q1zd3c6fX1kCY0yh7G9aU/aO/gDvjXQXXYfOgDDrbxhTmizRm/MlrIFXesGM/q4aYmZ6yR37pxMw51ZXo+W2uRBRq+SObYzJkyV6c5aqK2g1o5+b8pi8382I+Ucnt/jEqSMXdvzsGTbH98Ct70Kdi0ogaGNMYexvZuOcSYEPHnRVEVv3g8HToHLNs9UMl0+GlU9Dh6HQ4x5o3KXo51j6mKuKOPAliOld8u/BGJMnS/TGTWmcN9LdWNT3Ceg94eyUxEv6u0fidrdIxfp/woY5EN3dLVLRdqBvNzd9+7ors9trPHS1JemMKUtWvTLUbZoPix50qwcNnQEtriy4fWoSrJ/jkv6xXRBWCepdAvXbnLuSUa1mZ78sflgO7w51d6UOn2PTHo0pBVam2Jwv4wx8NNFVcmx2uSvwVbOR76/PyoIfPnXTJA/Hu7tQk/ae3R9eDaIucUk/frEr43v3x7aAhTGlxMoUm3Od+J+7KLpvrRtK6fvk2TVCfVWhguuht7ru7LbUJDfE470o9fefuDLAI+ZakjfGT3xK9CLSD5gKhAFvqOqUXPsjgRlASyAVuEtVN3v27QGSgUwgI79vHFNGDm6CmQMhK8PNfGl7U8kdO6IWNO3uHt5Urc6MMX5UaKIXkTDgZeA6IAFYLSKLVHWrV7OJwHpVHSwibTzt+3rt76OqFzg3z1yw4z+69VHDq8CoD6Buy7I5ryV5Y/zKl3n03YGdqrpLVdOAucCgXG3aAZ8CqOo2IEZEGpRopObCnDoC7w6BjFS4Y0HZJXljjN/5kuibAF5X2UjwbPO2ARgCICLdgeZA9qKWCnwsImtF5J78TiIi94jIGhFZk5iY6Gv8xhdpp2DOLZCUALfNcxdIjTEhw5dEn9ff3bmn6kwBIkVkPTAeWAdkePb1VtWuQH/gfhHJc/6eqk5X1VhVjY2KivItelO4zHSYNwr2r3PTJ5v19HdExpgy5svF2ASgqdfzaGC/dwNVPQmMARARAXZ7Hqjqfs/PwyKyEDcUFHfBkZvCqbo58js/gZumusU3jDEhx5ce/WqglYi0EJFKwHBgkXcDEant2QcwFohT1ZMiUk1EanjaVAOuBzaXXPimQJ/+0d3FevVE6Dba39EYY/yk0B69qmaIyAPAR7jplTNUdYuIjPPsnwa0BWaJSCawFbjb8/IGwELXyaciMEdVl5b82zDn+XoafPE8dBsDVz3i72iMMX5kd8aWF2eS3fTIui0LX05v8wKYf7cbqrlllpUcMCYE2J2x5d2Oj2HReEg56Oq417kIotqcrStTv537AggLh10rYeE4V9bgF29YkjfGWKIPaGeS4aPfw3czXTLv+wSc+PFsiYHtS0CzXNsK4VCvlStvUKcljJhTeM/fGBMSLNEHqj1fwL9/5ea+934I+kyEipXPbZOeCkd2uIJih7e64mI1G8NNL0CVSP/EbYwJOJboS9vBzRBRE2o38619+k/w6Z/h61cgMgbGLIVmPfJuGx4BjTq5hzHG5MMSfWnJynLTG7/8h3seGeNqvcdcCS2ugBoNz3/NvrVufP3IDrhsLFz3J6hUrUzDNsYEH0v0pSHtNCy8F+IXQddRbnx9z+ew9T/w3SzXpl5rT+K/wt2tuvpN+PxZ9wUwciG0vMa/78EYEzQs0edl7Uy3TmqfiW6KYlGqLyYfgrkjYN93cMNfoOd97vU9x0FWJhzcCLs/dwt2rP+nW/gjW+cR0G8KVKld8u/JGBOybB59bsmH4MVukHkGMtPg4muh/9O+VXs8tBXm3Aqnj7ipjYWVHMhMdzVofvwS6reH1teXzHswxoQcm0dfFJ/+yZXy/dWXsHMZrPgrvNLTrcR0xf/lP2a+cxn8awyEV4UxS6Bxl8LPFRae90IdxhhTgnypdRM6EtbC+nfh8vvdeqeX3w/j10D7wW78/KXubpw9919Bq9+E2bdA7ebwy+W+JXljjCkjluizZWXBh7+F6g3hyt+c3V6jIQyZDmM+dEvlzbsT3hkMR753Y+5LJ8J/H3ZDPHd96BbBNsaYAGJDN9k2/NNNbxw8Pe9FrJv3gnvj3MXTFU/BK5dDg/ZwYD30GOcuvFq5AWNMALIePUBqEiybBNHdodMt+bcLq+hmz4xfCx2HwaEt7oPXx5YAAAz/SURBVEJt/79ZkjfGBCzr0QOsfBpOJcJt7/k2lbJ6fRj8Kgx8wV1QNcaYAGY9+sQd8M006DoSmnQt2mstyRtjyoHQTvSqsPRRCK8G1zzh72iMMaZUhHai3/4h/LAc+vwOqtuC5MaY4BS6iT49FT76nVvA47Kx/o7GGGNKTehejF31EhzfAyP/bWPtxpigFpo9+qR97k7XtjdByz7+jsYYY0pVaCb6T55wS/Bd/5S/IzHGmFIXeon+x69g83zo9SBENvd3NMYYU+pCL9F/8Q+o0Qh+NsHfkRhjTJkIvUR/fA9Ex0Klqv6OxBhjykToJfrkg65Hb4wxISK0En3aaTiTlPfC3MYYE6RCK9GnHHQ/rUdvjAkhoZXok7MTvfXojTGhI8QS/QH303r0xpgQ4lOiF5F+IrJdRHaKyGN57I8UkYUislFEvhWRDrn2h4nIOhFZXFKBF4v16I0xIajQRC8iYcDLQH+gHTBCRNrlajYRWK+qnYA7gam59v8aiL/wcC9Q8gEIqwwRtf0diTHGlBlfevTdgZ2quktV04C5wKBcbdoBnwKo6jYgRkQaAIhINPBz4I0Si7q4kg+63rwvq0gZY0yQ8CXRNwH2ej1P8GzztgEYAiAi3YHmQLRn3z+AR4Csgk4iIveIyBoRWZOYmOhDWMVgc+iNMSHIl0SfV/dXcz2fAkSKyHpgPLAOyBCRAcBhVV1b2ElUdbqqxqpqbFRUKS0Ckt2jN8aYEOJLPfoEoKnX82hgv3cDVT0JjAEQEQF2ex7DgYEiciMQAdQUkXdV9Y4SiL3okg/Cxdf65dTGGOMvvvToVwOtRKSFiFTCJe9F3g1EpLZnH8BYIE5VT6rq71Q1WlVjPK9b7rckfyYZ0pKtR2+MCTmF9uhVNUNEHgA+AsKAGaq6RUTGefZPA9oCs0QkE9gK3F2KMRdP8iH30xK9MSbE+LSUoKouAZbk2jbN6/dVQKtCjvEZ8FmRIywpOTdLWaI3xoSW0LkzNiW7R2+zbowxoSV0Er316I0xISqEEv1BCK8KlWv6OxJjjClTIZToD9hdscaYkBRCid7uijXGhKYQSvQHoHoDf0dhjDFlLjQSvaqbR289emNMCAqNRH8mGdJP2YwbY0xICo1En2xrxRpjQleIJHqbQ2+MCV0hkuitR2+MCV0hkuize/Q268YYE3pCJNEfhErVoXINf0dijDFlLjQSfYqtLGWMCV2hkejtrlhjTAgLkUR/wHr0xpiQFfyJXtUWBTfGhLTgT/SpJyAjFapbojfGhKbgT/Q5c+gt0RtjQlMIJXq7GGuMCU0hlOitR2+MCU0hkOitzo0xJrSFQKI/CJVrQaVq/o7EGGP8IgQSvc2hN8aEthBI9AetmJkxJqQFf6JPsfIHxpjQFtyJ3u6KNcaYIE/0Px2HzDTr0RtjQlpwJ3qbWmmMMaGS6K1Hb4wJXT4lehHpJyLbRWSniDyWx/5IEVkoIhtF5FsR6eDZHuF5vkFEtojIH0v6DRQo+67Y6jbrxhgTugpN9CISBrwM9AfaASNEpF2uZhOB9araCbgTmOrZfga4RlU7A5cC/USkZ0kFXygbujHGGJ969N2Bnaq6S1XTgLnAoFxt2gGfAqjqNiBGRBqok+JpE+55aMmE7oPkQxBRG8KrlNkpjTEm0PiS6JsAe72eJ3i2edsADAEQke5AcyDa8zxMRNYDh4FPVPWbvE4iIveIyBoRWZOYmFi0d5Gf5AM2Pm+MCXm+JHrJY1vuXvkUINKT0McD64AMAFXNVNVLcYm/e/b4/XkHVJ2uqrGqGhsVFeXzGyiQzaE3xhgq+tAmAWjq9Twa2O/dQFVPAmMARESA3Z6Hd5sTIvIZ0A/YXPyQiyD5INRrXSanMsaYQOVLj3410EpEWohIJWA4sMi7gYjU9uwDGAvEqepJEYkSkdqeNlWAa4FtJRd+AbKyPOUPrEdvjAlthfboVTVDRB4APgLCgBmqukVExnn2TwPaArNEJBPYCtzteXkjYKZn5k4FYJ6qLi6F93G+00chK8MSvTEm5PkydIOqLgGW5No2zev3VUCrPF63EehygTEWT4qtLGWMMRDMd8baWrHGGAMEdaK3m6WMMQaCOtFb+QNjjIGgTvQHoGpdqFjZ35EYY4xfBXGiPwjVbdjGGGOCO9Hb+LwxxgR7orcZN8YYE5yJPisTUg5Zj94YYwjWRH/qCGimJXpjjCFYE70tIWiMMTmCNNFb+QNjjMkWnIne6twYY0yO4Ez0dlesMcbkCNJEfwCqRUFYuL8jMcYYvwvSRG83SxljTLYgTfS2KLgxxmQL0kRvPXpjjMkWfIk+MwNSDltBM2OM8Qi+RH8qEVDr0RtjjEfwJXq7K9YYY84RhInebpYyxhhvQZjorUdvjDHegjDRHwSp4G6YMsYYE4yJPvuu2Ir+jsQYYwJC8CV6W3DEGGPOEXyJ3u6KNcaYcwRhore7Yo0xxltwJfrMdHfDlPXojTEmR3Al+pRD7qf16I0xJodPiV5E+onIdhHZKSKP5bE/UkQWishGEflWRDp4tjcVkRUiEi8iW0Tk1yX9Bs6Rc7OU9eiNMSZboYleRMKAl4H+QDtghIi0y9VsIrBeVTsBdwJTPdszgP9T1bZAT+D+PF5bcmxlKWOMOY8vPfruwE5V3aWqacBcYFCuNu2ATwFUdRsQIyINVPWAqn7n2Z4MxANNSiz63OyuWGOMOY8vib4JsNfreQLnJ+sNwBAAEekONAeivRuISAzQBfgmr5OIyD0iskZE1iQmJvoS+/mSD4KEQbV6xXu9McYEIV8SveSxTXM9nwJEish6YDywDjds4w4gUh1YADykqifzOomqTlfVWFWNjYoqZvmC5INu2KZCWPFeb4wxQciXOgEJQFOv59HAfu8GnuQ9BkBEBNjteSAi4bgkP1tV3y+BmPOXfMBm3BhjTC6+9OhXA61EpIWIVAKGA4u8G4hIbc8+gLFAnKqe9CT9N4F4VX2uJAPPU/JBG583xphcCk30qpoBPAB8hLuYOk9Vt4jIOBEZ52nWFtgiIttws3Oyp1H2BkYC14jIes/jxhJ/F9mSD0ANm3FjjDHefCrxqKpLgCW5tk3z+n0V0CqP131B3mP8JU8VWl0PTXuWyemMMaa8CJ5aviIw5DV/R2GMMQEnuEogGGOMOY8lemOMCXKW6I0xJshZojfGmCBnid4YY4KcJXpjjAlyluiNMSbIWaI3xpggJ6q5C1H6n4gkAj8W8+X1gCMlGE5ZsbjLlsVdtizu0tdcVfMs/RuQif5CiMgaVY31dxxFZXGXLYu7bFnc/mVDN8YYE+Qs0RtjTJALxkQ/3d8BFJPFXbYs7rJlcftR0I3RG2OMOVcw9uiNMcZ4sURvjDFBLmgSvYj0E5HtIrJTRB7zdzxFISJ7RGSTZ6nFNf6OJz8iMkNEDovIZq9tdUTkExH53vMz0p8x5iWfuCeJyL4yWeKymESkqYisEJF4EdkiIr/2bA/oz7yAuAP6MxeRCBH5VkQ2eOL+o2d7QH/evgiKMXoRCQN2ANcBCbgFzUeo6la/BuYjEdkDxKpqQN+YISJXAinALFXt4Nn2NHBMVad4vmAjVfVRf8aZWz5xTwJSVPUZf8ZWEBFpBDRS1e9EpAawFrgZGE0Af+YFxH0LAfyZi4gA1VQ1RUTCgS9w618PIYA/b18ES4++O7BTVXepahowFxjk55iCjqrGAcdybR4EzPT8PhP3Dzqg5BN3wFPVA6r6nef3ZCAeaEKAf+YFxB3Q1EnxPA33PJQA/7x9ESyJvgmw1+t5AuXgfywvCnwsImtF5B5/B1NEDVT1ALh/4EB9P8dTFA+IyEbP0E5A/zkuIjFAF+AbytFnnituCPDPXETCRGQ9cBj4RFXL1eedn2BJ9JLHtvI0JtVbVbsC/YH7PUMNpnS9CrQELgUOAM/6N5z8iUh1YAHwkKqe9Hc8vsoj7oD/zFU1U1UvBaKB7iLSwd8xlYRgSfQJQFOv59HAfj/FUmSqut/z8zCwEDcUVV4c8ozJZo/NHvZzPD5R1UOef9RZwOsE6GfuGSteAMxW1fc9mwP+M88r7vLymQOo6gngM6Af5eDzLkywJPrVQCsRaSEilYDhwCI/x+QTEanmuWCFiFQDrgc2F/yqgLIIGOX5fRTwHz/G4rPsf7gegwnAz9xzcfBNIF5Vn/PaFdCfeX5xB/pnLiJRIlLb83sV4FpgGwH+efsiKGbdAHimav0DCANmqOpTfg7JJyJyEa4XD1ARmBOosYvIP4GrcaVbDwFPAv8G5gHNgP8Bw1Q1oC585hP31bghBAX2APdmj8MGChH5GfA5sAnI8myeiBvvDtjPvIC4RxDAn7mIdMJdbA3DdYLnqeqfRKQuAfx5+yJoEr0xxpi8BcvQjTHGmHxYojfGmCBnid4YY4KcJXpjjAlyluiNMSbIWaI3xpggZ4neGGOC3P8Dztp+D8IqvVsAAAAASUVORK5CYII=\n", 652 | "text/plain": [ 653 | "
" 654 | ] 655 | }, 656 | "metadata": { 657 | "needs_background": "light" 658 | }, 659 | "output_type": "display_data" 660 | } 661 | ], 662 | "source": [ 663 | "dfhistory[['val_accuracy', 'accuracy']].plot()" 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": 36, 669 | "metadata": {}, 670 | "outputs": [], 671 | "source": [ 672 | "# dfhistory[['val_score', 'score']].plot()" 673 | ] 674 | }, 675 | { 676 | "cell_type": "markdown", 677 | "metadata": {}, 678 | "source": [ 679 | "## 测试集" 680 | ] 681 | }, 682 | { 683 | "cell_type": "code", 684 | "execution_count": 14, 685 | "metadata": {}, 686 | "outputs": [ 687 | { 688 | "name": "stdout", 689 | "output_type": "stream", 690 | "text": [ 691 | "188/188 [==============================] - 4s 23ms/step - loss: 0.0920 - accuracy: 0.9680\n" 692 | ] 693 | } 694 | ], 695 | "source": [ 696 | "predict = model.evaluate(img_data, labels)" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": 38, 702 | "metadata": {}, 703 | "outputs": [], 704 | "source": [ 705 | "# model.save('./gray/gray_0.97.h5', save_format=\"tf\")" 706 | ] 707 | }, 708 | { 709 | "cell_type": "markdown", 710 | "metadata": {}, 711 | "source": [ 712 | "## 测试数据" 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": 12, 718 | "metadata": {}, 719 | "outputs": [], 720 | "source": [ 721 | "model = tf.keras.models.load_model('./gray/gray_0.97.h5')" 722 | ] 723 | }, 724 | { 725 | "cell_type": "code", 726 | "execution_count": 40, 727 | "metadata": {}, 728 | "outputs": [], 729 | "source": [ 730 | "test_data_feature = np.array(test_data_feature)\n", 731 | "predict_test = model.predict(test_data_feature)" 732 | ] 733 | }, 734 | { 735 | "cell_type": "code", 736 | "execution_count": 41, 737 | "metadata": {}, 738 | "outputs": [], 739 | "source": [ 740 | "test_data_labels = [0 if v < 0.5 else 1 for v in predict_test]" 741 | ] 742 | }, 743 | { 744 | "cell_type": "code", 745 | "execution_count": 42, 746 | "metadata": {}, 747 | "outputs": [ 748 | { 749 | "data": { 750 | "text/plain": [ 751 | "(1813, 6000)" 752 | ] 753 | }, 754 | "execution_count": 42, 755 | "metadata": {}, 756 | "output_type": "execute_result" 757 | } 758 | ], 759 | "source": [ 760 | "sum(test_data_labels), len(test_data_labels)" 761 | ] 762 | }, 763 | { 764 | "cell_type": "markdown", 765 | "metadata": {}, 766 | "source": [ 767 | "## 输出模型结果" 768 | ] 769 | }, 770 | { 771 | "cell_type": "code", 772 | "execution_count": 5, 773 | "metadata": {}, 774 | "outputs": [ 775 | { 776 | "name": "stderr", 777 | "output_type": "stream", 778 | "text": [ 779 | "gray: 100%|██████████████████████████████| 21017/21017 [01:04<00:00, 327.49it/s]\n" 780 | ] 781 | } 782 | ], 783 | "source": [ 784 | "raw_data, raw_labels = [], []\n", 785 | "\n", 786 | "test_data_feature, real_data_feature = [], []\n", 787 | "\n", 788 | "with tqdm(total=6000*2 + len(real_path), ncols=80, desc=\"gray\") as pbar:\n", 789 | " for fp in train_black_path:\n", 790 | " image = Image.open(fp+'.png')\n", 791 | " image_data = np.array(image)\n", 792 | " raw_data.append(image_data)\n", 793 | " raw_labels.append(1)\n", 794 | " pbar.update(1)\n", 795 | " for fp in train_white_path:\n", 796 | " image = Image.open(fp+'.png')\n", 797 | " image_data = np.array(image)\n", 798 | " raw_data.append(image_data)\n", 799 | " raw_labels.append(0)\n", 800 | " pbar.update(1)\n", 801 | " for fp in test_path:\n", 802 | " image = Image.open(fp+'.png')\n", 803 | " image_data = np.array(image)\n", 804 | " test_data_feature.append(image_data)\n", 805 | " pbar.update(1)\n", 806 | " for fp in real_path:\n", 807 | " image = Image.open(fp+'.png')\n", 808 | " image_data = np.array(image)\n", 809 | " real_data_feature.append(image_data)\n", 810 | " pbar.update(1)\n", 811 | "raw_data, raw_labels = np.array(raw_data, dtype=np.float32), np.array(raw_labels, dtype=np.int32)\n", 812 | "test_data_feature, real_data_feature = np.array(test_data_feature, dtype=np.float32), np.array(real_data_feature, dtype=np.float32)" 813 | ] 814 | }, 815 | { 816 | "cell_type": "code", 817 | "execution_count": 6, 818 | "metadata": {}, 819 | "outputs": [], 820 | "source": [ 821 | "model = tf.keras.models.load_model('./gray/gray_0.97.h5')" 822 | ] 823 | }, 824 | { 825 | "cell_type": "code", 826 | "execution_count": 7, 827 | "metadata": {}, 828 | "outputs": [], 829 | "source": [ 830 | "gray_train = model.predict(raw_data)\n", 831 | "gray_test = model.predict(test_data_feature)\n", 832 | "gray_real = model.predict(real_data_feature)" 833 | ] 834 | }, 835 | { 836 | "cell_type": "code", 837 | "execution_count": 8, 838 | "metadata": {}, 839 | "outputs": [ 840 | { 841 | "data": { 842 | "text/plain": [ 843 | "((6000, 1), (6000, 1), (9017, 1))" 844 | ] 845 | }, 846 | "execution_count": 8, 847 | "metadata": {}, 848 | "output_type": "execute_result" 849 | } 850 | ], 851 | "source": [ 852 | "gray_train.shape, gray_test.shape, gray_real.shape" 853 | ] 854 | }, 855 | { 856 | "cell_type": "code", 857 | "execution_count": 9, 858 | "metadata": {}, 859 | "outputs": [], 860 | "source": [ 861 | "with open(\"gray/gray_train_old.csv\", \"wb\") as fp:\n", 862 | " pickle.dump(gray_train, fp)\n", 863 | "with open(\"gray/gray_test_old.csv\", \"wb\") as fp:\n", 864 | " pickle.dump(gray_test, fp)\n", 865 | "with open(\"gray/gray_real_old.csv\", \"wb\") as fp:\n", 866 | " pickle.dump(gray_real, fp)" 867 | ] 868 | }, 869 | { 870 | "cell_type": "code", 871 | "execution_count": 10, 872 | "metadata": {}, 873 | "outputs": [ 874 | { 875 | "data": { 876 | "text/plain": [ 877 | "1813" 878 | ] 879 | }, 880 | "execution_count": 10, 881 | "metadata": {}, 882 | "output_type": "execute_result" 883 | } 884 | ], 885 | "source": [ 886 | "sum([0 if v < 0.5 else 1 for v in gray_test])" 887 | ] 888 | }, 889 | { 890 | "cell_type": "code", 891 | "execution_count": 11, 892 | "metadata": {}, 893 | "outputs": [ 894 | { 895 | "data": { 896 | "text/plain": [ 897 | "2381" 898 | ] 899 | }, 900 | "execution_count": 11, 901 | "metadata": {}, 902 | "output_type": "execute_result" 903 | } 904 | ], 905 | "source": [ 906 | "sum([0 if v < 0.5 else 1 for v in gray_real])" 907 | ] 908 | }, 909 | { 910 | "cell_type": "code", 911 | "execution_count": null, 912 | "metadata": {}, 913 | "outputs": [], 914 | "source": [] 915 | } 916 | ], 917 | "metadata": { 918 | "kernelspec": { 919 | "display_name": "datacon", 920 | "language": "python", 921 | "name": "datacon" 922 | }, 923 | "language_info": { 924 | "codemirror_mode": { 925 | "name": "ipython", 926 | "version": 3 927 | }, 928 | "file_extension": ".py", 929 | "mimetype": "text/x-python", 930 | "name": "python", 931 | "nbconvert_exporter": "python", 932 | "pygments_lexer": "ipython3", 933 | "version": "3.7.7" 934 | } 935 | }, 936 | "nbformat": 4, 937 | "nbformat_minor": 4 938 | } --------------------------------------------------------------------------------