├── models
    ├── dl
    │   ├── graphDapp
    │   │   ├── __init__.py
    │   │   ├── readme.txt
    │   │   ├── graphDapp_config.py
    │   │   ├── logger_wrappers.py
    │   │   ├── model_seriealization.py
    │   │   ├── graphDapp_main_model.py
    │   │   ├── test.py
    │   │   ├── train.py
    │   │   └── DApp_Classifier.py
    │   ├── __pycache__
    │   │   └── .gitignore
    │   ├── fsnet
    │   │   ├── requirement.txt
    │   │   ├── service_online.py
    │   │   ├── README.md
    │   │   ├── dataset.py
    │   │   ├── eval.py
    │   │   ├── preprocess.py
    │   │   └── main.py
    │   ├── mimetic
    │   │   ├── readme
    │   │   ├── logger_wrappers.py
    │   │   ├── model_seriealization.py
    │   │   └── build_model.py
    │   ├── df
    │   │   ├── Deep Fingerprinting.pdf
    │   │   ├── df_model_config.py
    │   │   ├── df_services.py
    │   │   ├── df_services_test.py
    │   │   ├── generate_dataset.py
    │   │   ├── df_main_model.py
    │   │   └── df_model.py
    │   ├── df_only_D
    │   │   ├── Deep Fingerprinting.pdf
    │   │   ├── df_model_config.py
    │   │   ├── df_services.py
    │   │   ├── df_services_test.py
    │   │   ├── generate_dataset.py
    │   │   ├── df_model.py
    │   │   └── df_main_model.py
    │   ├── appnet
    │   │   ├── readme
    │   │   ├── logger_wrappers.py
    │   │   ├── model_seriealization.py
    │   │   └── build_model.py
    │   ├── cnn
    │   │   ├── Automated Website Fingerprinting through deep learning.pdf
    │   │   ├── cnn_model_config.py
    │   │   └── cnn_model.py
    │   ├── lstm
    │   │   ├── Automated Website Fingerprinting through deep learning.pdf
    │   │   ├── lstm_model_config.py
    │   │   └── lstm_model.py
    │   ├── sdae
    │   │   ├── Automated Website Fingerprinting through deep learning.pdf
    │   │   ├── sdae_model_config.py
    │   │   └── sdae_model.py
    │   ├── select_gpu.py
    │   ├── accuracy_per_class.py
    │   ├── beauty
    │   │   ├── cnn_model_config.py
    │   │   ├── cnn_model.py
    │   │   └── beauty_main_model.py
    │   ├── df_eval.py
    │   ├── sdae_eval.py
    │   ├── cnn_eval.py
    │   ├── examples.py
    │   ├── cnn_example.py
    │   ├── df_example.py
    │   ├── sdae_example.py
    │   ├── README.md
    │   ├── lstm_eval.py
    │   ├── lstm_example.py
    │   └── awf_dataset_util
    │   │   └── data.py
    ├── ml
    │   ├── __init__.py
    │   ├── cumul
    │   │   ├── model.py
    │   │   ├── saved_model
    │   │   │   └── readme.txt
    │   │   ├── __init__.py
    │   │   ├── hyper_params.py
    │   │   ├── model_predict.py
    │   │   ├── model_train.py
    │   │   ├── attack_cumul.py
    │   │   ├── feature_extractor.py
    │   │   └── util.py
    │   ├── bind
    │   │   ├── __init__.py
    │   │   ├── hyper_params.py
    │   │   ├── run.py
    │   │   ├── eval.py
    │   │   ├── train.py
    │   │   └── README.md
    │   ├── appscanner
    │   │   ├── __init__.py
    │   │   ├── 【1】AppScanner.pdf
    │   │   ├── README
    │   │   ├── hyper_params.py
    │   │   ├── min_max.py
    │   │   ├── eval.py
    │   │   ├── feature_extractor.py
    │   │   ├── train.py
    │   │   └── model.py
    │   └── rdp
    │   │   ├── rdp_config.py
    │   │   ├── readme.cn
    │   │   ├── feature_extractor.py
    │   │   ├── convert_to_csv.py
    │   │   ├── util.py
    │   │   └── statistic_tractor.py
    ├── __init__.py
    └── model_base.py
├── .idea
    └── .name
├── .gitignore
├── run.sh
├── config.py
└── get_dataset_statistics.py


/models/dl/graphDapp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | traffic_classification_utils


--------------------------------------------------------------------------------
/models/ml/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dk'
2 | 


--------------------------------------------------------------------------------
/models/ml/cumul/model.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dk'
2 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dk'
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/models/dl/__pycache__/.gitignore:
--------------------------------------------------------------------------------
1 | *.cpython-36.pyc
2 | 


--------------------------------------------------------------------------------
/models/ml/bind/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dk'
2 | 


--------------------------------------------------------------------------------
/models/ml/appscanner/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dk'
2 | 
3 | 


--------------------------------------------------------------------------------
/models/dl/fsnet/requirement.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | numpy == 1.14.5
3 | tensorflow == 1.8.0
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !README
3 | !*/
4 | !*.txt
5 | !*.name
6 | !*.py
7 | !*.md
8 | !*.pdf
9 | 


--------------------------------------------------------------------------------
/models/dl/mimetic/readme:
--------------------------------------------------------------------------------
1 | 1. 载荷信息：
2 |     CNN处理
3 | 2. 序列信息：
4 |     包长序列、包方向序列、包达到时间间隔序列、tcp window size序列 使用GRU


--------------------------------------------------------------------------------
/models/dl/graphDapp/readme.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmhIcoding/traffic_classification_utils/HEAD/models/dl/graphDapp/readme.txt


--------------------------------------------------------------------------------
/models/dl/df/Deep Fingerprinting.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmhIcoding/traffic_classification_utils/HEAD/models/dl/df/Deep Fingerprinting.pdf


--------------------------------------------------------------------------------
/models/ml/appscanner/【1】AppScanner.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmhIcoding/traffic_classification_utils/HEAD/models/ml/appscanner/【1】AppScanner.pdf


--------------------------------------------------------------------------------
/models/ml/cumul/saved_model/readme.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmhIcoding/traffic_classification_utils/HEAD/models/ml/cumul/saved_model/readme.txt


--------------------------------------------------------------------------------
/models/dl/df_only_D/Deep Fingerprinting.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmhIcoding/traffic_classification_utils/HEAD/models/dl/df_only_D/Deep Fingerprinting.pdf


--------------------------------------------------------------------------------
/models/ml/appscanner/README:
--------------------------------------------------------------------------------
1 | 本模型是appscanner模型,主要用于识别移动app的流量.
2 | 论文:Appscanner: Automatic fingerprinting of smartphone apps from encrypted network traffic (EuroS&P 2016)


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | source ~/venv/bin/activate && 
4 | export PYTHONPATH=/home3/jmh/traffic_classification_utils/:$PYTHONPATH &&
5 | python3 $1 #foolbox_example.py
6 | 


--------------------------------------------------------------------------------
/models/dl/appnet/readme:
--------------------------------------------------------------------------------
1 | 包长序列：embedding, 使用Bi-LSTM做特征提取
2 |     包长序列最长截断为20，embedding 长度为128
3 | 
4 | TLS第一个握手包的字节载荷：embedding, 使用CNN做特征提取
5 |     第一个握手包字节截断为1014， embedding长度为256
6 | 
7 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dk'
2 | import os
3 | project_base = os.path.dirname(os.path.realpath(__file__))
4 | raw_dataset_base = project_base + '/dataset/'
5 | min_flow_len = 10
6 | whitelist = []
7 | 


--------------------------------------------------------------------------------
/models/ml/cumul/__init__.py:
--------------------------------------------------------------------------------
1 | ##################
2 | ##################
3 | ##基于Cell+SVM,实现CUMUL的Tor 分类方法
4 | ##`https://nymity.ch/tor-dns/pdf/Panchenko2016a.pdf`
5 | ##################
6 | ##################
7 | 


--------------------------------------------------------------------------------
/models/dl/cnn/Automated Website Fingerprinting through deep learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmhIcoding/traffic_classification_utils/HEAD/models/dl/cnn/Automated Website Fingerprinting through deep learning.pdf


--------------------------------------------------------------------------------
/models/dl/lstm/Automated Website Fingerprinting through deep learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmhIcoding/traffic_classification_utils/HEAD/models/dl/lstm/Automated Website Fingerprinting through deep learning.pdf


--------------------------------------------------------------------------------
/models/dl/sdae/Automated Website Fingerprinting through deep learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmhIcoding/traffic_classification_utils/HEAD/models/dl/sdae/Automated Website Fingerprinting through deep learning.pdf


--------------------------------------------------------------------------------
/models/dl/graphDapp/graphDapp_config.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | config = {
 3 |     'device_id':'cuda:2',
 4 |     'max_epoch':200,
 5 |     'gin_layer_num':3,
 6 |     'gin_hidden_units':64,
 7 |     'iteration_nums':3,
 8 |     'learning_rate':5e-4,
 9 |     'batch_size':128,
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/models/ml/cumul/hyper_params.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | hyper_params = {'boosting_type': 'rf',
 3 |  'objective': 'multiclass',
 4 |  'num_leaves': 512,
 5 |  'learning_rate': 0.05,
 6 |  'feature_fraction': 0.9,
 7 |  'bagging_fraction': 0.8,
 8 |  'bagging_freq': 5,
 9 |  'verbose': -1,
10 |  'num_class':100,
11 |  'lambda_l1':0.05,
12 |  'lambda_l2':0.15
13 | }
14 | 


--------------------------------------------------------------------------------
/models/ml/bind/hyper_params.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | hyper_params = {
 3 |          'boosting_type': 'gbdt',
 4 |          'objective': 'multiclass',
 5 |          'num_leaves': 512,
 6 |          'learning_rate': 0.05,
 7 |          'feature_fraction': 0.9,
 8 |          'bagging_fraction': 0.8,
 9 |          'bagging_freq': 5,
10 |          'verbose': -1,
11 |          'lambda_l1':0.05,
12 |          'lambda_l2':0.15
13 |         }
14 | 


--------------------------------------------------------------------------------
/models/ml/appscanner/hyper_params.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | hyper_params = {
 3 |          'boosting_type': 'gbdt',
 4 |          'objective': 'multiclass',
 5 |          'num_leaves': 32,
 6 |          'learning_rate': 0.05,
 7 |          'feature_fraction': 0.9,
 8 |          'bagging_fraction': 0.8,
 9 |          'bagging_freq': 5,
10 |          'verbose': -1,
11 |          'lambda_l1':0.05,
12 |          'lambda_l2':0.15
13 |         }
14 | 


--------------------------------------------------------------------------------
/models/ml/rdp/rdp_config.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | #超参数
 3 | hyper_params={
 4 |     'boosting_type': 'rf',
 5 |     'objective': 'multiclass',
 6 |     'num_leaves': 512,
 7 |     'learning_rate': 0.05,
 8 |     'feature_fraction': 0.9,
 9 |     'bagging_fraction': 0.8,
10 |     'bagging_freq': 5,
11 |     'verbose': 0,
12 |     'num_class':6,
13 |     'lambda_l1':0.05,
14 |     'lambda_l2':0.15,
15 |     'time_threshold':0.3
16 | }
17 | 


--------------------------------------------------------------------------------
/models/dl/df/df_model_config.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | nb_classes_template = 55  #标签的个数 number of classes【此处需要修改】
 3 | learning_params_template ={
 4 |     "epoch":200,
 5 |     "batch_size":128,
 6 |     "in_dim":200,              #输入向量的长度【此处需要修改】
 7 |     "input_length":200,        #输入向量的长度【此处需要修改】
 8 |     "lr":0.002,                  #学习速率
 9 |     "beta_1":0.9,
10 |     "beta_2":0.999,
11 |     "epsilon":1e-08,
12 |     "decay":0.0
13 | }
14 | 
15 | assert  learning_params_template['in_dim']==learning_params_template['input_length']
16 | 


--------------------------------------------------------------------------------
/models/dl/df_only_D/df_model_config.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | nb_classes_template = 55  #标签的个数 number of classes【此处需要修改】
 3 | learning_params_template ={
 4 |     "epoch":200,
 5 |     "batch_size":128,
 6 |     "in_dim":200,              #输入向量的长度【此处需要修改】
 7 |     "input_length":200,        #输入向量的长度【此处需要修改】
 8 |     "lr":0.002,                  #学习速率
 9 |     "beta_1":0.9,
10 |     "beta_2":0.999,
11 |     "epsilon":1e-08,
12 |     "decay":0.0
13 | }
14 | 
15 | assert  learning_params_template['in_dim']==learning_params_template['input_length']
16 | 


--------------------------------------------------------------------------------
/models/ml/rdp/readme.cn:
--------------------------------------------------------------------------------
 1 | 方法来源：IPCCC 2019
 2 | @inproceedings{jiang2019know,
 3 |   title={I Know What You Are Doing With Remote Desktop},
 4 |   author={Jiang, Minghao and Gou, Gaopeng and Shi, Junzheng and Xiong, Gang},
 5 |   booktitle={2019 IEEE 38th International Performance Computing and Communications Conference (IPCCC)},
 6 |   pages={1--7},
 7 |   year={2019},
 8 |   organization={IEEE}
 9 | }
10 | 
11 | 方法逻辑：
12 |     1. 按照时间idle切分burst
13 |     2. 每个burst提取66维统计特征，前51维是包长的统计特征，后15维是包间隔的统计特征
14 |     3. 训练的时候，以burst为单元进行分类
15 |     4. 测试的时候，给定一条流，对里面的所有burst，按照投票选出它的标签
16 | 
17 | 


--------------------------------------------------------------------------------
/models/dl/fsnet/service_online.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | from flask import request, Flask, jsonify
 3 | app = Flask(__name__)
 4 | from fsnet_main_model import model
 5 | 
 6 | fsnet_model = model('fgnet53', randseed= 128, splitrate=0.1)
 7 | @app.route('/fsnet/logit',methods=['POST'])
 8 | def get_logit():
 9 |     if not request.json or not 'flow' in request.json:
10 |         return jsonify({'error':'not flow in request'})
11 |     flow = request.json['flow']
12 |     logit =fsnet_model.logit_online(flow).tolist()
13 |     return  jsonify({'logit':logit})
14 | 
15 | if __name__ == '__main__':
16 |     app.run(host='192.168.255.82',port=10086,debug=True)


--------------------------------------------------------------------------------
/models/dl/graphDapp/logger_wrappers.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import logging
 3 | import datetime
 4 | logger=logging.Logger('graph neural network')
 5 | logger.setLevel(logging.NOTSET)
 6 | _WARNING = 10
 7 | _INFO = 100
 8 | _ERROR= 0
 9 | level = _INFO
10 | def warning(msg):
11 |     if level < _WARNING :
12 |         return
13 |     logger.warning(msg="Time:{0}, [WARN]: {1}".format(datetime.datetime.now(),msg))
14 | 
15 | def info(msg):
16 |     if level < _INFO :
17 |         return
18 |     logger.warning(msg="Time:{0}, [INFO]: {1}".format(datetime.datetime.now(),msg))
19 | 
20 | def error(msg):
21 |     logger.warning(msg="Time:{0}, [ERROR]: {1}".format(datetime.datetime.now(),msg))


--------------------------------------------------------------------------------
/models/ml/cumul/model_predict.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'jmh081701'
 2 | import  lightgbm as lgb
 3 | from src.cumul.util import CUMUL_datagenerator
 4 | from sklearn.metrics import  accuracy_score
 5 | import  numpy as np
 6 | saved_model = "./saved_model/cumul.model"
 7 | model = lgb.Booster(model_file=saved_model)
 8 | 
 9 | dator = CUMUL_datagenerator(is_train=True)
10 | def prediction(X):
11 |     #X = dator.feature_extract(X)
12 |     logit  = model.predict(data=X)
13 |     y = list(map(lambda x : np.argmax(x),logit))
14 |     #assert len(y.shape) == X.shape[0]
15 | 
16 |     return y
17 | 
18 | if __name__ == '__main__':
19 |     dator = CUMUL_datagenerator(is_train=True)
20 |     test_X,test_y = dator.testSet()
21 |     predict_y = prediction(test_X)
22 |     accuracy = accuracy_score(test_y,predict_y)
23 |     print('test accuracy:{0}'.format(accuracy))
24 | 
25 | 


--------------------------------------------------------------------------------
/models/dl/select_gpu.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'jmh081701'
 2 | import os
 3 | import  sys
 4 | import numpy as np
 5 | cmd = "nvidia-smi --query-gpu=index,memory.free --format=csv"
 6 | def get_free_gpu_id():
 7 |     pipe =os.popen(cmd)
 8 |     freeMbs =[]
 9 |     for eachline in pipe:
10 |         if 'index' in eachline:
11 |             continue
12 |         id,freeMb=eachline.split(',')
13 |         freeMb = int(freeMb.replace("MiB",""))
14 |         freeMbs.append(freeMb)
15 | 
16 |     return str(np.argmax(freeMbs))
17 | def set_visible_gpu():
18 |     '''
19 |         选择当前GPU列表里面,空余内存最大的显卡。
20 |         windows平台不做任何选择
21 |     '''
22 |     if sys.platform=='linux':
23 |         os.environ["CUDA_VISIBLE_DEVICES"] = get_free_gpu_id()
24 |     else:
25 |         os.environ['CUDA_VISBALE_DEIVCES'] ='0'
26 | if __name__ == '__main__':
27 | 
28 |     print(get_free_gpu_id())


--------------------------------------------------------------------------------
/models/ml/rdp/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | from models.ml.rdp.rdp_config import hyper_params
 3 | from models.ml.rdp.statistic_tractor import peak_feature
 4 | 
 5 | def feature_extract(pkt_size, timestamps, time_threshold= hyper_params['time_threshold'] ):
 6 |     assert len(pkt_size) == len(timestamps)
 7 |     timestamps = [0.0] + [timestamps[i]- timestamps[i-1] for i in range(1, len(timestamps)) ]
 8 |     total_peak = [(each[0], each[1]) for each in zip(timestamps, pkt_size)]
 9 |     peaks = [[]]
10 |     for i in range(len(total_peak)):
11 |         if total_peak[i][0] <= time_threshold :
12 |             peaks[-1].append(total_peak[i])
13 |         else:
14 |             peaks.append([total_peak[i]])
15 | 
16 |     features = []
17 |     for peak in peaks:
18 |         feature =  peak_feature(peak)
19 |         features.append(feature)
20 | 
21 |     return features[1:]


--------------------------------------------------------------------------------
/models/dl/accuracy_per_class.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | #计算每个类别的准确率
 3 | import json
 4 | from sklearn import  metrics
 5 | from sklearn.metrics import classification_report
 6 | def accuracy_per_class(y_real,y_pred):
 7 |     right={}
 8 |     error={}
 9 |     for i in range(len(y_real)):
10 |         if y_real[i] not in right:
11 |             right.setdefault(y_real[i],0)
12 |         if y_real[i] not in error:
13 |             error.setdefault(y_real[i],0)
14 |         if y_real[i]==y_pred[i]:
15 |             right[y_real[i]] += 1
16 |         else:
17 |             error[y_real[i]] += 1
18 |     acc={}
19 |     for each in right:
20 |         acc.setdefault(each,right[each]/(right[each]+error[each]))
21 |     print('Accuracy of each class:')
22 |     print(acc)
23 |     #for i in range(len(right)):
24 |     #    print("%0.2d\t%0.4f"%(i,acc[i]*100 if i in acc else 100))
25 | 
26 |     #计算各种率
27 |     print(classification_report(y_true=y_real,y_pred=y_pred,digits=5))


--------------------------------------------------------------------------------
/models/dl/df/df_services.py:
--------------------------------------------------------------------------------
 1 | import flask
 2 | from df_main_model import model
 3 | from flask import Flask, request, jsonify
 4 | import requests
 5 | 
 6 | app = Flask(__name__)
 7 | df_model = model('datacon',128,0.1)
 8 | 
 9 | _labels = ['0.json','1.json','2.json','3.json','4.json','5.json','6.json','7.json','8.json','9.json','10.json']
10 | _labels.sort()
11 | 
12 | @app.route(rule= '/datacon', methods=['POST'])
13 | def tunnel():
14 |     try:
15 |         request_data = request.json
16 |         label = df_model.predict(request_data['packet_length'])
17 |         label = [_labels[_id].replace('.json','') for _id in label]
18 |         return jsonify({'status':'success', 'label': label})
19 | 
20 |     except BaseException as exp:
21 |         #raise exp
22 |         return  jsonify({'status':'error', 'data': str(exp)})
23 | 
24 | if __name__ == '__main__':
25 |     app.run(host="0.0.0.0",
26 |             port=8899,
27 |             threaded=True)
28 | 


--------------------------------------------------------------------------------
/models/dl/df_only_D/df_services.py:
--------------------------------------------------------------------------------
 1 | import flask
 2 | from df_main_model import model
 3 | from flask import Flask, request, jsonify
 4 | import requests
 5 | 
 6 | app = Flask(__name__)
 7 | df_model = model('datacon',128,0.1)
 8 | 
 9 | _labels = ['0.json','1.json','2.json','3.json','4.json','5.json','6.json','7.json','8.json','9.json','10.json']
10 | _labels.sort()
11 | 
12 | @app.route(rule= '/datacon', methods=['POST'])
13 | def tunnel():
14 |     try:
15 |         request_data = request.json
16 |         label = df_model.predict(request_data['packet_length'])
17 |         label = [_labels[_id].replace('.json','') for _id in label]
18 |         return jsonify({'status':'success', 'label': label})
19 | 
20 |     except BaseException as exp:
21 |         #raise exp
22 |         return  jsonify({'status':'error', 'data': str(exp)})
23 | 
24 | if __name__ == '__main__':
25 |     app.run(host="0.0.0.0",
26 |             port=8899,
27 |             threaded=True)
28 | 


--------------------------------------------------------------------------------
/models/dl/df/df_services_test.py:
--------------------------------------------------------------------------------
 1 | import flask
 2 | from df_main_model import model
 3 | from flask import Flask, request, jsonify
 4 | import requests
 5 | 
 6 | app = Flask(__name__)
 7 | df_model = model('datacon_training',128,0.1)
 8 | 
 9 | _labels = ['0.json','1.json','2.json','3.json','4.json','5.json','6.json','7.json','8.json','9.json','10.json']
10 | _labels.sort()
11 | 
12 | @app.route(rule= '/datacon', methods=['POST'])
13 | def tunnel():
14 |     try:
15 |         request_data = request.json
16 |         label = df_model.predict(request_data['packet_length'])
17 |         label = [_labels[_id].replace('.json','') for _id in label]
18 |         return jsonify({'status':'success', 'label': label})
19 | 
20 |     except BaseException as exp:
21 |         #raise exp
22 |         return  jsonify({'status':'error', 'data': str(exp)})
23 | 
24 | if __name__ == '__main__':
25 |     app.run(host="0.0.0.0",
26 |             port=8898,
27 |             threaded=True)
28 | 


--------------------------------------------------------------------------------
/models/dl/df_only_D/df_services_test.py:
--------------------------------------------------------------------------------
 1 | import flask
 2 | from df_main_model import model
 3 | from flask import Flask, request, jsonify
 4 | import requests
 5 | 
 6 | app = Flask(__name__)
 7 | df_model = model('datacon_training',128,0.1)
 8 | 
 9 | _labels = ['0.json','1.json','2.json','3.json','4.json','5.json','6.json','7.json','8.json','9.json','10.json']
10 | _labels.sort()
11 | 
12 | @app.route(rule= '/datacon', methods=['POST'])
13 | def tunnel():
14 |     try:
15 |         request_data = request.json
16 |         label = df_model.predict(request_data['packet_length'])
17 |         label = [_labels[_id].replace('.json','') for _id in label]
18 |         return jsonify({'status':'success', 'label': label})
19 | 
20 |     except BaseException as exp:
21 |         #raise exp
22 |         return  jsonify({'status':'error', 'data': str(exp)})
23 | 
24 | if __name__ == '__main__':
25 |     app.run(host="0.0.0.0",
26 |             port=8898,
27 |             threaded=True)
28 | 


--------------------------------------------------------------------------------
/models/dl/lstm/lstm_model_config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __author__ = 'dk'
 3 | ###这个模型来自于：Automated website fingerprinting through deep learning (Vera Rimmer et.al )
 4 | #学习的参数
 5 | nb_classes_template  = 100       #分类的目标类别数目,指网站的数目【此处需要修改】
 6 | learn_params_template={
 7 |   "nb_epochs": 50,
 8 |   "maxlen": 40,                    #向量最大长度,最大包长序列长度【此处需要修改】
 9 |   "nb_features": 1,                #这个是每个向量的每个分量的维度,类似于embed后的长度,默认就是+1,-1的序列，所以长度为1。
10 |   "batch_size": 256,
11 |   "val_split": 0.15,
12 |   "test_split": 0.15,
13 |   "optimizer": "rmsprop",
14 |   "nb_layers": 2,
15 |   "layers": [
16 |     {
17 |       "units": 128,
18 |       "dropout": 0.22244615886559121,
19 |       "activation": "tanh",
20 |       "rec_activation": "hard_sigmoid"
21 |     },
22 |     {
23 |       "units": 128,
24 |       "dropout": 0.20857652372682717,
25 |       "activation": "tanh",
26 |       "rec_activation": "hard_sigmoid"
27 |     }
28 |   ],
29 |   "lr": 0.0010053829131721616,
30 |   "decay": 0,
31 |   "momentum": 0.9,
32 |   "nesterov": True
33 | }
34 | 


--------------------------------------------------------------------------------
/models/dl/mimetic/logger_wrappers.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import logging
 3 | import datetime
 4 | logger_name = 'appnet'
 5 | scirpt_name = ''
 6 | logger=logging.Logger(logger_name)
 7 | logger.setLevel(logging.NOTSET)
 8 | _WARNING = 10
 9 | _INFO = 100
10 | _ERROR= 0
11 | level = _INFO
12 | def warning(msg):
13 |     if level < _WARNING :
14 |         return
15 |     msg =  "Time:{0}, [{2}-WARN]: {1}".format(datetime.datetime.now(),msg, scirpt_name)
16 |     logger.warning(msg)
17 |     with open(logger_name+'.log','a') as fp:
18 |         fp.writelines(msg+'\n')
19 | 
20 | def info(msg):
21 |     if level < _INFO :
22 |         return
23 |     msg="Time:{0}, [{2}-INFO]: {1}".format(datetime.datetime.now(),msg, scirpt_name)
24 |     logger.warning(msg)
25 |     with open(logger_name+'.log','a') as fp:
26 |         fp.writelines(msg+'\n')
27 | def error(msg):
28 |     msg ="Time:{0}, [{2}-ERROR]: {1}".format(datetime.datetime.now(),msg, scirpt_name)
29 |     logger.warning(msg)
30 |     with open(logger_name+'.log','a') as fp:
31 |         fp.writelines(msg+'\n')
32 | 


--------------------------------------------------------------------------------
/models/dl/appnet/logger_wrappers.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import logging
 3 | import datetime
 4 | logger_name = 'unsupervised_adaption_for_traffic_classification'
 5 | scirpt_name = ''
 6 | logger=logging.Logger(logger_name)
 7 | logger.setLevel(logging.NOTSET)
 8 | _WARNING = 10
 9 | _INFO = 100
10 | _ERROR= 0
11 | level = _INFO
12 | def warning(msg):
13 |     if level < _WARNING :
14 |         return
15 |     msg =  "Time:{0}, [{2}-WARN]: {1}".format(datetime.datetime.now(),msg, scirpt_name)
16 |     logger.warning(msg)
17 |     with open(logger_name+'.log','a') as fp:
18 |         fp.writelines(msg+'\n')
19 | 
20 | def info(msg):
21 |     if level < _INFO :
22 |         return
23 |     msg="Time:{0}, [{2}-INFO]: {1}".format(datetime.datetime.now(),msg, scirpt_name)
24 |     logger.warning(msg)
25 |     with open(logger_name+'.log','a') as fp:
26 |         fp.writelines(msg+'\n')
27 | def error(msg):
28 |     msg ="Time:{0}, [{2}-ERROR]: {1}".format(datetime.datetime.now(),msg, scirpt_name)
29 |     logger.warning(msg)
30 |     with open(logger_name+'.log','a') as fp:
31 |         fp.writelines(msg+'\n')
32 | 


--------------------------------------------------------------------------------
/models/dl/appnet/model_seriealization.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | #模型的保存和加载
 3 | import torch
 4 | import json
 5 | import logger_wrappers
 6 | import os
 7 | 
 8 | def save(model,model_path):
 9 |     checkpoint_path = model_path
10 |     path = model_path
11 |     if os.path.exists(os.path.dirname(checkpoint_path)) == False:
12 |         logger_wrappers.warning('create checkpoint path: {0}'.format(os.path.dirname(checkpoint_path)))
13 |         os.makedirs(os.path.dirname(checkpoint_path), exist_ok= True)
14 |     torch.save(model.state_dict(),path)
15 |     info = "Dump model to {0} well.".format(checkpoint_path)
16 |     logger_wrappers.warning(info)
17 | 
18 | def load(model,model_path, use_gpu=True, device=None):
19 |     #print(device)
20 |     path = model_path
21 |     if os.path.exists(path):
22 |         if use_gpu == False:
23 |             map_location= torch.device('cpu')
24 |         else:
25 |             map_location = lambda storage, loc: storage.cuda(int(device.split(":")[-1]))
26 |         model_CKPT = torch.load(path, map_location=map_location)
27 |         model.load_state_dict(model_CKPT)
28 |         info ="Load model from {0} well.".format(path)
29 |         logger_wrappers.warning(info)
30 |     else:
31 |         logger_wrappers.warning('Load empty model from {0}.'.format(path))
32 |     return model#,optimizer
33 | 


--------------------------------------------------------------------------------
/models/dl/mimetic/model_seriealization.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | #模型的保存和加载
 3 | import torch
 4 | import json
 5 | import logger_wrappers
 6 | import os
 7 | 
 8 | def save(model,model_path):
 9 |     checkpoint_path = model_path
10 |     path = model_path
11 |     if os.path.exists(os.path.dirname(checkpoint_path)) == False:
12 |         logger_wrappers.warning('create checkpoint path: {0}'.format(os.path.dirname(checkpoint_path)))
13 |         os.makedirs(os.path.dirname(checkpoint_path), exist_ok= True)
14 |     torch.save(model.state_dict(),path)
15 |     info = "Dump model to {0} well.".format(checkpoint_path)
16 |     logger_wrappers.warning(info)
17 | 
18 | def load(model,model_path, use_gpu=True, device=None):
19 |     #print(device)
20 |     path = model_path
21 |     if os.path.exists(path):
22 |         if use_gpu == False:
23 |             map_location= torch.device('cpu')
24 |         else:
25 |             map_location = lambda storage, loc: storage.cuda(int(device.split(":")[-1]))
26 |         model_CKPT = torch.load(path, map_location=map_location)
27 |         model.load_state_dict(model_CKPT)
28 |         info ="Load model from {0} well.".format(path)
29 |         logger_wrappers.warning(info)
30 |     else:
31 |         logger_wrappers.warning('Load empty model from {0}.'.format(path))
32 |     return model#,optimizer
33 | 


--------------------------------------------------------------------------------
/models/ml/bind/run.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | from BIND.build_vector_dataset import builder
 3 | import  lightgbm as lgb
 4 | from sklearn.metrics import  accuracy_score
 5 | import numpy as np
 6 | ##原始的包长序列
 7 | bd = builder(raw_feature_dictory='./raw_feature/',global_feature_dict_filename="./raw_feature/global_feature_dict.vocb")
 8 | X_train,y_train,X_test,y_test,X_valid,y_valid=bd.vectorize()
 9 | saved_model = "bind.model"
10 | ##开始训练
11 | 
12 | lgb_train = lgb.Dataset(data=X_train,label=y_train)
13 | lgb_eval = lgb.Dataset(data=X_valid,label=y_valid)
14 | hyper_params = {
15 |    'boosting_type': 'rf',
16 |  'objective': 'multiclass',
17 |  'num_leaves': 512,
18 |  'learning_rate': 0.05,
19 |  'feature_fraction': 0.9,
20 |  'bagging_fraction': 0.8,
21 |  'bagging_freq': 5,
22 |  'verbose': 0,
23 |  'num_class':55,
24 |  'lambda_l1':0.05,
25 |  'lambda_l2':0.15
26 | }
27 | 
28 | gbm = lgb.train(params=hyper_params,
29 |                 train_set=lgb_train,
30 |                 valid_sets=lgb_eval,
31 |                 num_boost_round=100,
32 |                 early_stopping_rounds=5)
33 | #save model
34 | try:
35 |     gbm.save_model(saved_model)
36 | except BaseException as exp:
37 |     pass
38 | logit = gbm.predict(data=X_test)
39 | label_predict = list(map(lambda x : np.argmax(x),logit))
40 | 
41 | accuracy = accuracy_score(y_test,label_predict)
42 | 
43 | print(accuracy)


--------------------------------------------------------------------------------
/models/ml/bind/eval.py:
--------------------------------------------------------------------------------
 1 | from BIND.build_vector_dataset import builder
 2 | import  lightgbm as lgb
 3 | from sklearn.metrics import  accuracy_score
 4 | import numpy as np
 5 | from accuracy_per_class import accuracy_per_class
 6 | def main(raw_feature_dictory,modelpath,global_feature_dict_filename="./global_feature_dict.vocb"):
 7 |     ##原始的包长序列
 8 |     bd = builder(raw_feature_dictory=raw_feature_dictory,global_feature_dict_filename=global_feature_dict_filename)
 9 |     X_train,y_train,X_test,y_test,X_valid,y_valid=bd.vectorize(test_split_ratio=0.5)
10 |     saved_model = modelpath
11 |     ##开始训练
12 | 
13 |     lgb_train = lgb.Dataset(data=X_train,label=y_train)
14 |     lgb_eval = lgb.Dataset(data=X_valid,label=y_valid)
15 |     hyper_params = {
16 |      'boosting_type': 'rf',
17 |      'objective': 'multiclass',
18 |      'num_leaves': 512,
19 |      'learning_rate': 0.05,
20 |      'feature_fraction': 0.9,
21 |      'bagging_fraction': 0.8,
22 |      'bagging_freq': 5,
23 |      'verbose': 0,
24 |      'num_class':53,
25 |      'lambda_l1':0.05,
26 |      'lambda_l2':0.15
27 |     }
28 |     gbm = lgb.Booster(model_file=saved_model)
29 |     logit = gbm.predict(data=X_test)
30 |     label_predict = list(map(lambda x : np.argmax(x),logit))
31 | 
32 |     accuracy = accuracy_score(y_test,label_predict)
33 |     accuracy_per_class(y_real=y_test,y_pred=label_predict)
34 | 
35 |     print(accuracy)


--------------------------------------------------------------------------------
/models/ml/cumul/model_train.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'jmh081701'
 2 | import sklearn
 3 | from sklearn.externals import joblib
 4 | from sklearn.metrics import  accuracy_score
 5 | import lightgbm as lgb
 6 | import  numpy as np
 7 | from src.cumul.util import CUMUL_datagenerator
 8 | dator = CUMUL_datagenerator(is_train=True)
 9 | 
10 | saved_model = "saved_model/cumul.model"
11 | train_X,train_y = dator.trainSet()
12 | valid_X,valid_y = dator.validSet()
13 | test_X,test_y = dator.testSet()
14 | 
15 | lgb_train = lgb.Dataset(data=train_X,label=train_y)
16 | lgb_eval = lgb.Dataset(data=valid_X,label=valid_y)
17 | 
18 | hyper_params = {
19 |    'boosting_type': 'rf',
20 |  'objective': 'multiclass',
21 |  'num_leaves': 512,
22 |  'learning_rate': 0.05,
23 |  'feature_fraction': 0.9,
24 |  'bagging_fraction': 0.8,
25 |  'bagging_freq': 5,
26 |  'verbose': 0,
27 |  'num_class':100,
28 |  'lambda_l1':0.05,
29 |  'lambda_l2':0.15
30 | }
31 | 
32 | gbm = lgb.train(params=hyper_params,
33 |                 train_set=lgb_train,
34 |                 valid_sets=lgb_eval,
35 |                 num_boost_round=3000,
36 |                 early_stopping_rounds=10)
37 | 
38 | logit = gbm.predict(data=test_X)
39 | label_predict = list(map(lambda x : np.argmax(x),logit))
40 | 
41 | accuracy = accuracy_score(test_y,label_predict)
42 | print(accuracy)
43 | 
44 | #save model
45 | gbm.save_model(saved_model)
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/get_dataset_statistics.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'jmh081701'
 2 | import numpy as np
 3 | import json
 4 | import os
 5 | 
 6 | def statistic(lengths):
 7 |     mean = np.mean(lengths)
 8 |     min = np.min(lengths)
 9 |     max = np.max(lengths)
10 |     std = np.std(lengths)
11 |     median = np.median(lengths)
12 |     print('\tmean:{0}, std:{1}, min:{2}, max:{3}, median:{4}'.format(mean, std, min, max, median))
13 |     percent = [10,20,30,40,50,60,70,80,90,95,99]
14 |     for each_percent in percent:
15 |         print("\t\tP( v<={1}) = {0}".format(each_percent, np.percentile(lengths,each_percent)))
16 | def parser_dataset(dataset_dir):
17 |     total_length = []
18 |     print(dataset_dir)
19 |     for _root, _dirs, _files in os.walk(dataset_dir):
20 |         if len(_files) == 0 :
21 |             raise  ValueError('{0} empty!'.format(dataset_dir))
22 | 
23 |         for file in _files:
24 |             length = []
25 |             path = _root + '/' + file
26 |             with open(path) as fp:
27 |                 data = json.load(fp)
28 | 
29 |             for flow in data:
30 |                 length.append(len(flow['packet_length']))
31 |                 total_length.append(length[-1])
32 | 
33 |             #print(file)
34 |             #print(statistic(length))
35 | 
36 |     print('total:')
37 |     statistic(total_length)
38 | 
39 | if __name__ == '__main__':
40 |     dataset_dir = 'dataset/tifs2015'
41 |     parser_dataset(dataset_dir)


--------------------------------------------------------------------------------
/models/dl/graphDapp/model_seriealization.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | #模型的保存和加载
 3 | import torch
 4 | import json
 5 | import logger_wrappers
 6 | import os
 7 | model_name = "/gnn_model.pkl"
 8 | def save(model,optimizer,checkpoint_path):
 9 |     path = checkpoint_path + model_name
10 |     if os.path.exists(checkpoint_path) == False:
11 |         os.makedirs(checkpoint_path)
12 |     torch.save(model.state_dict(),path)
13 |     #torch.save(
14 |     #    {'state_dict':model.state_dict(),
15 |     #     'optimizer':optimizer.state_dict()},
16 |     #    (checkpoint_path+model_name).replace("//","/")
17 |     #)
18 |     info = "Dump model to {0} well.".format(checkpoint_path)
19 |     logger_wrappers.warning(info)
20 | 
21 | def load(model,optimizer,checkpoint_path, use_gpu=True):
22 |     path = (checkpoint_path+model_name).replace("//","/")
23 |     if os.path.exists(path):
24 |         if use_gpu == False:
25 |             map_location= torch.device('cpu')
26 |         else:
27 |             map_location = None
28 |         model_CKPT = torch.load(path, map_location=map_location)
29 |         model.load_state_dict(model_CKPT)
30 |         #model.load_state_dict(model_CKPT['state_dict'])
31 |         #optimizer.load_state_dict(model_CKPT['optimizer'])
32 |         info ="Load model from {0} well.".format(path)
33 |         logger_wrappers.warning(info)
34 |     else:
35 |         logger_wrappers.warning('Load empty model from {0}.'.format(path))
36 |     return model#,optimizer
37 | 


--------------------------------------------------------------------------------
/models/ml/cumul/attack_cumul.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'jmh081701'
 2 | import  lightgbm as lgb
 3 | from src.cumul.util import CUMUL_datagenerator
 4 | from sklearn.metrics import  accuracy_score
 5 | from src.df.src import utility
 6 | import  numpy as np
 7 | saved_model = "./saved_model/cumul.model"
 8 | model = lgb.Booster(model_file=saved_model)
 9 | dator = CUMUL_datagenerator(is_train=False)
10 | def prediction(X):
11 |     print(X.shape)
12 |     X = dator.feature_extract(X)
13 |     logit  = model.predict(data=X)
14 |     y = list(map(lambda x : np.argmax(x),logit))
15 |     #assert len(y.shape) == X.shape[0]
16 | 
17 |     return y
18 | def flatten(X_compressed):
19 |     X =[]
20 |     for i in range(X_compressed.shape[0]):
21 |         x =[]
22 |         for j in range(X_compressed.shape[1]):
23 |             if (X_compressed[i,j])<0:
24 |                 x += [-1] * abs(int(X_compressed[i,j]))
25 |             elif X_compressed[i,j] >0 :
26 |                 x +=[1] * abs(int(X_compressed[i,j]))
27 |         x+=[0] * 5000
28 |         X.append(x[:5000])
29 |     return np.array(X)
30 | if __name__ == '__main__':
31 |     preprocess = CUMUL_datagenerator(is_train=False)
32 |     X_train, y_train, X_valid, y_valid, X_test, y_test = utility.LoadDataRetrain(is_cluster=False,dataset_dir=None)
33 |     predict_y = prediction(flatten(X_train))
34 |     accuracy = accuracy_score(y_train,predict_y)
35 |     print('test accuracy:{0}'.format(accuracy))
36 | 


--------------------------------------------------------------------------------
/models/ml/bind/train.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | from BIND.build_vector_dataset import builder
 3 | import  lightgbm as lgb
 4 | from sklearn.metrics import  accuracy_score
 5 | import numpy as np
 6 | def main(raw_feature_dictory,modelpath,global_feature_dict_filename="./global_feature_dict.vocb"):
 7 |     ##原始的包长序列
 8 |     bd = builder(raw_feature_dictory=raw_feature_dictory,global_feature_dict_filename=global_feature_dict_filename)
 9 |     X_train,y_train,X_test,y_test,X_valid,y_valid=bd.vectorize()
10 |     saved_model = modelpath
11 |     ##开始训练
12 | 
13 |     lgb_train = lgb.Dataset(data=X_train,label=y_train)
14 |     lgb_eval = lgb.Dataset(data=X_valid,label=y_valid)
15 |     hyper_params = {
16 |      'boosting_type': 'rf',
17 |      'objective': 'multiclass',
18 |      'num_leaves': 512,
19 |      'learning_rate': 0.05,
20 |      'feature_fraction': 0.9,
21 |      'bagging_fraction': 0.8,
22 |      'bagging_freq': 5,
23 |      'verbose': 0,
24 |      'num_class':53,
25 |      'lambda_l1':0.05,
26 |      'lambda_l2':0.15
27 |     }
28 | 
29 |     gbm = lgb.train(params=hyper_params,
30 |                     train_set=lgb_train,
31 |                     valid_sets=lgb_eval,
32 |                     num_boost_round=100,
33 |                     early_stopping_rounds=5)
34 |     #save model
35 |     try:
36 |         gbm.save_model(saved_model)
37 |     except BaseException as exp:
38 |         pass
39 |     logit = gbm.predict(data=X_test)
40 |     label_predict = list(map(lambda x : np.argmax(x),logit))
41 | 
42 |     accuracy = accuracy_score(y_test,label_predict)
43 |     print(accuracy)
44 | if __name__ == '__main__':
45 |     main("./raw_feature/",global_feature_dict_filename="./global_feature_dict.vocb")
46 | 


--------------------------------------------------------------------------------
/models/ml/appscanner/min_max.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | ##_min和_max必须得是train.py输出的那样
 3 | _min=[-1.44800000e+03,-1.44800000e+03,-1.44800000e+03, 0.00000000e+00,
 4 |       0.00000000e+00, 0.00000000e+00,-9.74732319e+00,-3.00000000e+00,
 5 |      -1.44800000e+03,-1.44800000e+03,-1.44800000e+03,-1.44800000e+03,
 6 |      -1.44800000e+03,-1.44800000e+03,-1.44800000e+03,-1.44800000e+03,
 7 |      -1.44800000e+03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
 8 |       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
 9 |      -2.23682312e+01,-3.00000000e+00, 0.00000000e+00, 0.00000000e+00,
10 |       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
11 |       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
12 |      -1.44800000e+03,-1.44800000e+03,-1.44800000e+03, 0.00000000e+00,
13 |       0.00000000e+00, 0.00000000e+00,-2.23682312e+01,-3.00000000e+00,
14 |      -1.44800000e+03,-1.44800000e+03,-1.44800000e+03,-1.44800000e+03,
15 |      -1.44800000e+03,-1.44800000e+03,-1.44800000e+03,-1.44800000e+03,
16 |      -1.44800000e+03, 1.00000000e+00]
17 | 
18 | _max=[0.00000000e+00,0.00000000e+00,0.00000000e+00,7.23500000e+02,
19 |      7.23500000e+02,5.23452250e+05,3.15449531e+01,9.93387334e+02,
20 |      0.00000000e+00,0.00000000e+00,0.00000000e+00,0.00000000e+00,
21 |      0.00000000e+00,0.00000000e+00,0.00000000e+00,0.00000000e+00,
22 |      0.00000000e+00,1.00000000e+03,1.44800000e+03,1.44800000e+03,
23 |      1.44800000e+03,7.23500000e+02,7.23500000e+02,5.23452250e+05,
24 |      9.40585157e+00,4.99444233e+02,1.44800000e+03,1.44800000e+03,
25 |      1.44800000e+03,1.44800000e+03,1.44800000e+03,1.44800000e+03,
26 |      1.44800000e+03,1.44800000e+03,1.44800000e+03,1.00000000e+03,
27 |      1.44800000e+03,1.44800000e+03,1.44800000e+03,1.44493827e+03,
28 |      1.32699466e+03,1.76091482e+06,2.61611211e+01,7.35659009e+02,
29 |      1.44800000e+03,1.44800000e+03,1.44800000e+03,1.44800000e+03,
30 |      1.44800000e+03,1.44800000e+03,1.44800000e+03,1.44800000e+03,
31 |      1.44800000e+03,1.00000000e+03]


--------------------------------------------------------------------------------
/models/dl/cnn/cnn_model_config.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | ###这个模型来自于：Automated website fingerprinting through deep learning (Vera Rimmer et.al )
 3 | nb_classes_template  = 100       #分类的目标类别数目,指网站的数目【此处需要修改】
 4 | learning_params_template={
 5 |   "nb_epochs": 15,
 6 |   "input_length":40,
 7 |   "maxlen": 40,                #向量长度【此处需要修改】
 8 |   "nb_features": 1,
 9 |   "batch_size": 256,
10 |   "val_split": 0.05,
11 |   "test_split": 0.05,
12 |   "optimizer": "rmsprop",
13 |   "lr": 0.0008,
14 |   "decay": 0,
15 |   'momentum':0.9,
16 |   "nb_layers": 7,
17 |   "layers": [
18 |     {
19 |       "name": "conv",
20 |       "rate": 0.25,
21 |       "filters": 32,
22 |       "kernel_size": 5,
23 |       "activation": "relu",
24 |       "stride": 1
25 |     },
26 |     {
27 |       "name": "conv",
28 |       "pool_size": 4,
29 |       "filters": 32,
30 |       "kernel_size": 5,
31 |       "activation": "relu",
32 |       "stride": 1
33 |     },
34 |     {
35 |       "name": "maxpooling",
36 |       "pool_size": 4
37 |     },
38 |     {
39 |       "name": "lstm",
40 |       "units": 128
41 |     },
42 |     {
43 |       "last": True,
44 |       "units": nb_classes_template,                     #这个就是最后一层的输出神经元个数,必须等于nb_classes
45 |       "name": "dense",
46 |       "activation": "softmax",
47 |       "regularization": 0
48 |     }
49 |   ]
50 | }
51 | try:
52 |     assert nb_classes_template == learning_params_template['layers'][-1]['units']
53 | except  AssertionError as exp:
54 |     print("cnn model: The last layer units should be equals to the number of classes.")
55 |     print("{0}:{1}".format(__file__,str(exp)))
56 |     raise AssertionError(exp)
57 | try:
58 |     assert  learning_params_template['maxlen']==learning_params_template['input_length']
59 | except AssertionError as exp:
60 |     print("cnn model: The max_len should be equal to input_length, because they are alias name for each other.")
61 |     print("{0}:{1}".format(__file__,str(exp)))
62 |     raise AssertionError(exp)
63 | 


--------------------------------------------------------------------------------
/models/dl/sdae/sdae_model_config.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | ###这个模型来自于：Automated website fingerprinting through deep learning (Vera Rimmer et.al )
 3 | #学习的参数
 4 | nb_classes_template  = 100       #分类的目标类别数目,指网站的数目【此处需要修改】
 5 | learning_params_template={
 6 |     "nb_epochs" : 30,
 7 |     "maxlen" : 40,             #原始向量的长度【此处需要修改】
 8 |     "features" : 2,
 9 |     "batch_size" : 32,
10 |     "val_split" : 0.05,
11 |     "test_split" : 0.05,
12 |     "optimizer" : "sgd",
13 |     "nb_layers" : 3,
14 |     "lr" : 0.001,
15 |     "momentum" : 0.9,
16 |     "decay" : 0.0,
17 |     "nesterov" : True,
18 |     "layers":#各个自编码器层的参数设置
19 |     [
20 |         {   #第一层的超参数
21 |             "in_dim" : 40, #encoder输入向量长度【此处需要修改】
22 |             "out_dim" : 700,#decoder输出向量长度
23 |             "epochs": 20,
24 |             "batch_size": 128,
25 |             "dropout" :0.2, #dropout的概率
26 |             "optimizer" : "sgd",    #本层的优化器,可选性:sgd(随机梯度下降),adam,rmsprop
27 |             "enc_activation" : "tanh",#编码器的激活函数
28 |             "dec_activation" : "linear",#解码器的激活函数
29 |             "lr":0.001,     #sgd的优化器参数
30 |             "momentum" : 0.9,
31 |             "decay" : 0.0
32 |         },
33 |         {   #第二层超参数
34 |             "in_dim": 700,
35 |             "out_dim": 500,
36 |             "epochs": 10,
37 |             "batch_size": 128,
38 |             "dropout":0.2,
39 |             "optimizer":"sgd",
40 |             "enc_activation": "tanh",
41 |             "dec_activation":"linear",
42 |             "lr": 0.001,
43 |             "momentum":0.9,
44 |             "decay": 0.0
45 |         },
46 |         {   #第三层超参数
47 |             "in_dim" : 500,
48 |             "out_dim":  300,
49 |             "epochs":  10,
50 |             "batch_size": 128,
51 |             "dropout": 0.2,
52 |             "optimizer": "sgd",
53 |             "enc_activation": "tanh",
54 |             "dec_activation": "linear",
55 |             "lr" : 0.001,
56 |             "momentum":  0.9,
57 |             "decay" : 0.0
58 |         }
59 |     ]
60 | }
61 | 


--------------------------------------------------------------------------------
/models/ml/appscanner/eval.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | from wf_attacks.data_utils import LoadDataApp_crossversion
 3 | from wf_attacks.appscanner.feature_extractor import  feature_extract
 4 | import  lightgbm as lgb
 5 | from sklearn.metrics import  accuracy_score
 6 | import numpy as np
 7 | from wf_attacks.appscanner.min_max import  _min ,_max
 8 | from accuracy_per_class import accuracy_per_class
 9 | def main(test_set,modelpath):
10 |     ##原始的包长序列
11 |     global  _min,_max
12 |     X_train_r, y_train_r, X_valid_r, y_valid_r, X_test_r, y_test_r =  LoadDataApp_crossversion(test_set)
13 |     saved_model = modelpath #"appscanner.model"
14 |     print('before extract feature')
15 |     ##提取统计特征
16 |     X_train =[]
17 |     X_valid =[]
18 |     X_test  =[]
19 |     #print(X_train_r[0])
20 |     #print(X_valid_r[1])
21 |     #print(X_test_r[2])
22 |     for i in range(X_train_r.shape[0]):
23 |         X_train.append(feature_extract(X_train_r[i]))
24 |     for i in range(X_test_r.shape[0]):
25 |         X_test.append(feature_extract(X_test_r[i]))
26 |     for i in range(X_valid_r.shape[0]):
27 |         X_valid.append(feature_extract(X_valid_r[i]))
28 |     print('feature extract well!')
29 |     ##归一化操作
30 | 
31 | 
32 |     _min = np.array(_min)
33 |     _max =np.array(_max)
34 | 
35 |     X_train = np.array(X_train)
36 |     X_valid = np.array(X_valid)
37 |     X_test = np.array(X_test)
38 |     _min = np.array(_min)
39 |     _max =np.array(_max)
40 |     X_train = (X_train-_min)/(_max-_min)
41 |     X_valid = (X_valid-_min)/(_max-_min)
42 |     X_test = (X_test-_min)/(_max-_min)
43 |     print('normalize well!')
44 |     print(X_train[0])
45 |     print(X_valid[1])
46 |     print(X_test[2])
47 |     ##
48 |     y_test = np.argmax(y_test_r,1)
49 |     y_train =np.argmax(y_train_r,1)
50 |     y_valid =np.argmax(y_valid_r,1)
51 |     print(y_test[0:10])
52 |     #load model
53 |     try:
54 |         gbm = lgb.Booster(model_file=saved_model)
55 |     except BaseException as exp:
56 |         raise exp
57 |     logit = gbm.predict(data=X_test)
58 |     label_predict = list(map(lambda x : np.argmax(x),logit))
59 | 
60 |     accuracy = accuracy_score(y_test,label_predict)
61 |     accuracy_per_class(y_real=y_test,y_pred=label_predict)
62 | 
63 |     print("[Appscanner] Test on {0}, accuracy is {1}. ".format(test_set,accuracy))
64 | 
65 | 


--------------------------------------------------------------------------------
/models/dl/graphDapp/graphDapp_main_model.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | 
 3 | from models.model_base import abs_model
 4 | from config import raw_dataset_base
 5 | from models.dl.graphDapp.data_builder import Dataset
 6 | from models.dl.graphDapp.train import main as train_main
 7 | from models.dl.graphDapp.test import main as test_main
 8 | 
 9 | import os
10 | class model(abs_model):
11 |     def __init__(self, dataset, randseed, splitrate ,max_len=200):
12 |         super(model,self).__init__('graphDapp',randseed= randseed)
13 |         if os.path.exists(self.database) == False:
14 |             os.makedirs(self.database,exist_ok=True)
15 | 
16 |         self.dataset = dataset
17 |         self.model = self.database + '/'+ self.name + '_' + dataset + '_model'
18 |         self.data = self.database + '/'+ self.name + '_' + dataset + '/'
19 |         self.splitrate = splitrate
20 |         #原始数据集目录
21 |         full_rdata = raw_dataset_base + self.dataset
22 |         self.full_rdata = full_rdata
23 |         self.max_len = max_len
24 |         if self.data_exists() == False:
25 |             self.parser_raw_data()
26 |     def parser_raw_data(self):
27 |         def pad_sequence(x, max_len= self.max_len, pad_value=0):
28 |             r =  x + [pad_value] * (max_len - len(x))
29 |             return r[:max_len]
30 |         full_rdata = self.full_rdata
31 |         if os.path.exists(full_rdata) == False:
32 |             raise OSError('Dataset {0} (full path: {1}) does not exist!'.format(self.dataset,full_rdata))
33 |         os.makedirs(self.data, exist_ok=True)
34 |         ##从原始数据集构建graphDApp所需数据
35 |         dator = Dataset(raw_dir=full_rdata,
36 |                         dumpfile=self.data + '{0}.gzip'.format(self.dataset),
37 |                         split_rate=self.splitrate,
38 |                         renew= True)
39 |         dator.save_dumpfile()
40 | 
41 |     def train(self):
42 |         train_main(dataset_name=self.data + '{0}.gzip'.format(self.dataset), modelpath= self.model)
43 | 
44 |     def test(self):
45 |         test_main(dataset_name=self.data + '{0}.gzip'.format(self.dataset), modelpath= self.model)
46 | 
47 | if __name__ == '__main__':
48 |     graphdapp_model = model('awf200_burst', randseed= 128, splitrate=0.1)
49 |     graphdapp_model.parser_raw_data()
50 |     graphdapp_model.train()
51 |     graphdapp_model.test()
52 | 


--------------------------------------------------------------------------------
/models/dl/beauty/cnn_model_config.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | '''
 3 | ###这个模型来自于：@inproceedings{schuster2017beauty,
 4 | title={Beauty and the burst: Remote identification of encrypted video streams},
 5 | author={Schuster, Roei and Shmatikov, Vitaly and Tromer, Eran},
 6 | booktitle={26th $\{$USENIX$\}$ Security Symposium ($\{$USENIX$\}$ Security 17)},
 7 | pages={1357--1374},
 8 | year={2017}
 9 | }
10 | '''
11 | learning_params_template={
12 |   "epoch": 200,
13 |   "input_length":200,
14 |   "maxlen": 200,                #向量长度【此处需要修改】
15 |   "nb_features": 1,
16 |   "batch_size": 256,
17 |   "val_split": 0.05,
18 |   "test_split": 0.05,
19 |   "optimizer": "adam",
20 |   "lr": 0.0008,
21 |   "decay": 0,
22 |   'momentum':0.9,
23 |   "nb_layers": 7,
24 |   "layers": [
25 |     {
26 |       "name": "conv",
27 |       "filters": 32,
28 |       "kernel_size": 5,
29 |       "activation": "relu",
30 |       "stride": 1
31 |     },
32 |     {
33 |       "name": "conv",
34 |       "filters": 32,
35 |       "kernel_size": 5,
36 |       "activation": "relu",
37 |       "stride": 1
38 |     },
39 |     {
40 |       "name": "conv",
41 |       "filters": 32,
42 |       "kernel_size": 5,
43 |       "activation": "relu",
44 |       "stride": 1,
45 |     },
46 |     {
47 |         'name':'dropout',
48 |         'rate': 0.5
49 |     },
50 |     {
51 |       "name": "maxpooling",
52 |       "pool_size": 4
53 |     },
54 |         {
55 |         'name':'dropout',
56 |         'rate': 0.3
57 |     },
58 |     {
59 |       "units": 64,                     #这个就是最后一层的输出神经元个数,必须等于nb_classes
60 |       "name": "dense",
61 |       "activation": "relu",
62 |       "regularization": 0.0
63 |     },
64 |     {
65 |         'name':'dropout',
66 |         'rate': 0.5
67 |     },
68 |     {
69 |           'name':'flatten'
70 |     },
71 |     {
72 |       "last": True,
73 |       "units": None,                     #这个就是最后一层的输出神经元个数,必须等于nb_classes
74 |       "name": "dense",
75 |       "activation": "softmax",
76 |       "regularization": 0.0
77 |     }
78 |   ]
79 | }
80 | try:
81 |     assert  learning_params_template['maxlen']==learning_params_template['input_length']
82 | except AssertionError as exp:
83 |     print("cnn model: The max_len should be equal to input_length, because they are alias name for each other.")
84 |     print("{0}:{1}".format(__file__,str(exp)))
85 |     raise AssertionError(exp)
86 | 


--------------------------------------------------------------------------------
/models/ml/appscanner/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | #appscanner使用的特征提取方法,提取得到54维统计特征
 3 | import numpy as np
 4 | from scipy.stats import skew,kurtosis
 5 | _min=[1e9+1] * 54
 6 | _max=[0.0] * 54
 7 | def feature_trace(trace):
 8 |     feature = [0.0] * 18
 9 |     if len(trace)==0:
10 |         return  feature
11 |     feature[0] = np.min(trace)
12 |     feature[1] = np.max(trace)
13 |     feature[2] = np.mean(trace)
14 |     feature[3] = np.median(np.absolute(trace-np.mean(trace)))
15 |     feature[4] = np.std(trace)
16 |     feature[5] = np.var(trace)
17 |     feature[6] = skew(trace)
18 |     feature[7] = kurtosis(trace)
19 |     ##百分位数
20 |     p=[10,20,30,40,50,60,70,80,90]
21 |     percentile =np.percentile(trace,p)
22 |     for i in range(9):
23 |         feature[8+i] = percentile[i]
24 |     feature[17]= len(trace)
25 |     return  feature
26 | 
27 | def feature_extract(pkt_length_sequence):
28 |     ingoing_trace =[]
29 |     outgoing_trace =[]
30 |     trace =[]
31 |     pkt_length_sequence = np.array(pkt_length_sequence)
32 |     pkt_length_sequence = pkt_length_sequence.reshape((-1))
33 |     for i in range(pkt_length_sequence.shape[0]):
34 |         if pkt_length_sequence[i] < 0 :
35 |             ingoing_trace.append(pkt_length_sequence[i])
36 |         if pkt_length_sequence[i] > 0 :
37 |             outgoing_trace.append(pkt_length_sequence[i])
38 |         if pkt_length_sequence[i]!=0:
39 |             trace.append(pkt_length_sequence[i])
40 |         if pkt_length_sequence[i]==0:
41 |             break
42 | 
43 |     in_feature = feature_trace(ingoing_trace)
44 |     out_feature = feature_trace(outgoing_trace)
45 |     bi_feature = feature_trace(trace)
46 | 
47 |     feature =   in_feature+out_feature+bi_feature
48 |     for i in range(54):
49 |         if feature[i] > _max[i] :
50 |             _max[i] = feature[i]
51 |         if feature[i] < _min[i]:
52 |             _min[i] = feature[i]
53 |     return  feature
54 | def normalize(feature,min=None,max=None):
55 |     if type(min) == type(None):
56 |         min = _min
57 |     if type(max) == type(None):
58 |         max = _max
59 |     return  (feature-min)/(max-min)
60 | 
61 | if __name__ == '__main__':
62 |     pkt_length_seq =[383, -290, 90, -165, 1448, 463, 929, 389, 1448, 976, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 1448, 717, 105, 1448, 1448, 1448, 1448, 1051, 144, 219, 196, 603, 113]
63 |     x=feature_extract(pkt_length_seq)
64 |     print(x)
65 |     print(len(x))
66 | 


--------------------------------------------------------------------------------
/models/dl/df_eval.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import  os
 3 | #设置Tensorflow的日志等级
 4 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级，显示所有信息
 5 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
 6 | os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error
 7 | 
 8 | from attacks import  CNN_model,DF_model,SDAE_model,LSTM_model
 9 | from df import df_model_config
10 | 
11 | from cnn import cnn_model_config
12 | from sdae import sdae_model_config
13 | from lstm import  lstm_model_config
14 | 
15 | from data_utils import LoadDataNoDefCW100,LoadDataNoDefCW,LoadDataWalkieTalkieCW,LoadDataApp_crossversion
16 | 
17 | #使用步骤
18 | #1. 修改各个模型的超参数,xx_model_config.py,把里面的输入向量和标签数改成自己所需要的
19 | #2. 读取数据,构造好训练集,验证集,测试集
20 | #3. build_model()
21 | #4. 调用fit()
22 | #5. 测试一下
23 | #6. 保存模型
24 | def test_cnn(X_train,y_train,X_valid,y_valid,X_test,y_test):
25 |     model = CNN_model()
26 |     model.build_model()
27 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
28 |               batch_size=cnn_model_config.learning_params_template['batch_size'],
29 |               epochs=cnn_model_config.learning_params_template['nb_epochs'])
30 |     model.save_model()
31 |     score = model.evaluate(X_test=X_test,y_test=y_test)
32 |     print('simple CNN accuracy :{0}'.format(score))
33 | 
34 | def test_df(X_train,y_train,X_valid,y_valid,X_test,y_test):
35 | 
36 |     model =DF_model()
37 |     model.load_model()
38 |     score = model.evaluate(X_test=X_test,y_test=y_test)
39 |     print('Deep Fingerprinting Test on test dataset accuracy :{0}'.format(score))
40 | 
41 | def test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test):
42 |     model = LSTM_model()
43 | def test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test):
44 |     model = SDAE_model()
45 |     model.build_model()
46 |     model.pre_train(x_train=X_train,x_test=X_test)
47 | 
48 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
49 |               batch_size=sdae_model_config.learning_params_template['batch_size'],
50 |               epochs=sdae_model_config.learning_params_template['nb_epochs'])
51 |     model.save_model()
52 |     score = model.evaluate(X_test=X_test,y_test=y_test)
53 |     print('sdae accuracy :{0}'.format(score))
54 | if __name__ == '__main__':
55 |     X_train, y_train, X_valid, y_valid, X_test, y_test =  LoadDataApp_crossversion("/home3/jmh/app_dataset_noise/")
56 |     test_df(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
57 | 


--------------------------------------------------------------------------------
/models/dl/sdae_eval.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import  os
 3 | #设置Tensorflow的日志等级
 4 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级，显示所有信息
 5 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
 6 | os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error
 7 | 
 8 | from attacks import  CNN_model,DF_model,SDAE_model,LSTM_model
 9 | from df import df_model_config
10 | 
11 | from cnn import cnn_model_config
12 | from sdae import sdae_model_config
13 | from lstm import  lstm_model_config
14 | 
15 | from data_utils import LoadDataNoDefCW100,LoadDataNoDefCW,LoadDataWalkieTalkieCW,LoadDataApp,LoadDataApp_crossversion
16 | 
17 | #使用步骤
18 | #1. 修改各个模型的超参数,xx_model_config.py,把里面的输入向量和标签数改成自己所需要的
19 | #2. 读取数据,构造好训练集,验证集,测试集
20 | #3. build_model()
21 | #4. 调用fit()
22 | #5. 测试一下
23 | #6. 保存模型
24 | def test_cnn(X_train,y_train,X_valid,y_valid,X_test,y_test):
25 |     model = CNN_model()
26 |     model.build_model()
27 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
28 |               batch_size=cnn_model_config.learning_params_template['batch_size'],
29 |               epochs=cnn_model_config.learning_params_template['nb_epochs'])
30 |     model.save_model()
31 |     score = model.evaluate(X_test=X_test,y_test=y_test)
32 |     print('simple CNN accuracy :{0}'.format(score))
33 | 
34 | def test_df(X_train,y_train,X_valid,y_valid,X_test,y_test):
35 | 
36 |     model =DF_model()
37 |     model.build_model()
38 | 
39 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
40 |               batch_size=df_model_config.learning_params_template['batch_size'],
41 |               epochs=df_model_config.learning_params_template['epoch'])
42 |     model.save_model()
43 |     score = model.evaluate(X_test=X_test,y_test=y_test)
44 |     print('Deep Fingerprinting accuracy :{0}'.format(score))
45 | 
46 | 
47 | 
48 | def test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test):
49 |     model = LSTM_model()
50 | def test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test):
51 |     model = SDAE_model()
52 |     model.load_model()
53 |     score = model.evaluate(X_test=X_test,y_test=y_test)
54 |     print('sdae Test on test dataset accuracy :{0}'.format(score))
55 | if __name__ == '__main__':
56 |     X_train, y_train, X_valid, y_valid, X_test, y_test =  LoadDataApp_crossversion()#LoadDataWalkieTalkieCW() #LoadDataNoDefCW()
57 |     #test_df(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
58 |     #test_cnn(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
59 |     test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test)


--------------------------------------------------------------------------------
/models/dl/cnn_eval.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import  os
 3 | #设置Tensorflow的日志等级
 4 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级，显示所有信息
 5 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
 6 | os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error
 7 | 
 8 | from attacks import  CNN_model,DF_model,SDAE_model,LSTM_model
 9 | from df import df_model_config
10 | 
11 | from cnn import cnn_model_config
12 | from sdae import sdae_model_config
13 | from lstm import  lstm_model_config
14 | 
15 | from data_utils import LoadDataNoDefCW100,LoadDataNoDefCW,LoadDataWalkieTalkieCW,LoadDataApp,LoadDataApp,LoadDataApp_crossversion
16 | 
17 | #使用步骤
18 | #1. 修改各个模型的超参数,xx_model_config.py,把里面的输入向量和标签数改成自己所需要的
19 | #2. 读取数据,构造好训练集,验证集,测试集
20 | #3. build_model()
21 | #4. 调用fit()
22 | #5. 测试一下
23 | #6. 保存模型
24 | def test_cnn(X_train,y_train,X_valid,y_valid,X_test,y_test):
25 |     model = CNN_model()
26 |     model.load_model()
27 |     score = model.evaluate(X_test=X_test,y_test=y_test)
28 |     print('simple CNN  Test on test dataset accuracy :{0}'.format(score))
29 | 
30 | def test_df(X_train,y_train,X_valid,y_valid,X_test,y_test):
31 | 
32 |     model =DF_model()
33 |     model.build_model()
34 | 
35 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
36 |               batch_size=df_model_config.learning_params_template['batch_size'],
37 |               epochs=df_model_config.learning_params_template['epoch'])
38 |     model.save_model()
39 |     score = model.evaluate(X_test=X_test,y_test=y_test)
40 |     print('Deep Fingerprinting accuracy :{0}'.format(score))
41 | 
42 | 
43 | 
44 | def test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test):
45 |     model = LSTM_model()
46 | def test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test):
47 |     model = SDAE_model()
48 |     model.build_model()
49 |     model.pre_train(x_train=X_train,x_test=X_test)
50 | 
51 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
52 |               batch_size=sdae_model_config.learning_params_template['batch_size'],
53 |               epochs=sdae_model_config.learning_params_template['nb_epochs'])
54 |     model.save_model()
55 |     score = model.evaluate(X_test=X_test,y_test=y_test)
56 |     print('sdae accuracy :{0}'.format(score))
57 | if __name__ == '__main__':
58 |     X_train, y_train, X_valid, y_valid, X_test, y_test =  LoadDataApp_crossversion()
59 |     #test_df(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
60 |     test_cnn(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
61 |     #test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test)


--------------------------------------------------------------------------------
/models/dl/graphDapp/test.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import  numpy as np
 3 | from  models.dl.graphDapp import logger_wrappers
 4 | import torch as th
 5 | from torch import nn
 6 | from torch import optim
 7 | from torch.nn import functional as F
 8 | 
 9 | from models.dl.graphDapp.model_seriealization import save,load
10 | from models.dl.graphDapp.data_builder import Dataset_fgnet
11 | from models.dl.graphDapp.DApp_Classifier import DApp_classifier
12 | from models.dl.graphDapp.graphDapp_config import config
13 | from sklearn.metrics import classification_report
14 | use_gpu = th.cuda.is_available()
15 | if use_gpu :
16 |     device_id = config['device_id']
17 |     device= device_id
18 | else:
19 |     device= "cpu"
20 | 
21 | def main(dataset_name, modelpath,max_epoch=0):
22 |     data_loader = Dataset_fgnet(raw_dir=r'',dumpfile=dataset_name,renew=False)
23 |     print(data_loader)
24 |     model = DApp_classifier(nb_classes=len(data_loader.labelname),
25 |                             gin_layer_num=config['gin_layer_num'],
26 |                             gin_hidden_units=config['gin_hidden_units'],
27 |                             iteration_nums=config['iteration_nums'],
28 |                             device= device,use_gpu= use_gpu)
29 |     loss_func = nn.CrossEntropyLoss()
30 |     optimizer = optim.Adam(params=model.parameters(),lr=5e-5)
31 | 
32 |     model = load(model,optimizer=optimizer,checkpoint_path=modelpath)
33 |     if use_gpu:
34 |         model = model.cuda(device)
35 |     batch_size = config['batch_size']
36 | 
37 |     model.eval()
38 |     acc_list =[]
39 |     ground_truth = []
40 |     predict_truth = []
41 | 
42 |     for subset in range(len(data_loader.test_set)//batch_size):
43 |         graphs,labels = data_loader.next_test_batch(batch_size=batch_size)
44 |         if use_gpu :
45 |             graphs = graphs.to(th.device(device))
46 |             labels = labels.to(th.device(device))
47 |         predict_labels = model(graphs)
48 |         predict_labels = F.softmax(predict_labels,1)
49 |         argmax_labels = th.argmax(predict_labels,1)
50 |         ground_truth = ground_truth + labels.tolist()
51 |         predict_truth = predict_truth + argmax_labels.tolist()
52 |         acc = (labels == argmax_labels).float().sum().item() / len(labels) * 100
53 |         acc_list.append(acc)
54 |         info='Accuracy of argmax predictions on the test subset{1}: {0:4f}%'.format(acc,subset)
55 |     info = 'Average Accuracy on entire test set:{:0.4f}%'.format(np.mean(acc_list))
56 |     logger_wrappers.info(info)
57 |     print(classification_report(y_true=ground_truth,y_pred=predict_truth,digits=5))
58 | 


--------------------------------------------------------------------------------
/models/dl/fsnet/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # FS-Net
 3 | 
 4 | Implementation of "FS-Net: A Flow Sequence Network For Encrypted Traffic Classification".
 5 | 
 6 | If you find this method helpful for your research, please cite this paper:
 7 | 
 8 | ```latex
 9 | @inproceedings{LiuHXCL19,
10 |   author    = {Chang Liu and
11 |                Longtao He and
12 |                Gang Xiong and
13 |                Zigang Cao and
14 |                Zhen Li},
15 |   title     = {FS-Net: {A} Flow Sequence Network For Encrypted Traffic Classification},
16 |   booktitle = {{IEEE} Conference on Computer Communications (INFOCOM), 2019},
17 |   pages     = {1171--1179},
18 |   year      = {2019}
19 | }
20 | ```
21 | 
22 | ------
23 | 
24 | ### Requirement
25 | 
26 | - python >= 3.4
27 | - numpy == 1.14.5
28 | - tqdm
29 | - tensorflow == 1.8.0
30 | 
31 | ------
32 | 
33 | ### Dataset Format
34 | 
35 | The dataset consists of multiple files, and each file contains all the flow records of a specific application. And the files are ended with `.num`. For example
36 | 
37 | ```
38 | origin_data
39 | 	|---- alicdn.num
40 | 	|---- baidu.num
41 | ```
42 | 
43 | For a specific application, each flow record is consists with two parts, for example
44 | 
45 | ```
46 | 50	3	7	5	5	5	;2920	167	51	78	968	38	
47 | ```
48 | 
49 | There are two sequences in a record: the first one is encoded status sequence and the second on is the packet length sequence. The two sequences are separated with `;`, and the elements in the sequences are separated with `\t`. 
50 | 
51 | ### How to use
52 | 
53 | #### Step 1. Pre-Process The Dataset
54 | 
55 | The dataset is first formalized into `.json` files, and the train set and development set are split as follows:
56 | 
57 | ```bash
58 | python main.py --mode=prepro
59 | ```
60 | 
61 | The dataset will saved in the `record` folder, and the files are start with `train` and `test`. The setting can be changed with `--train_json`, `--test_json`, `--train_meta` and `--test_meta`.
62 | 
63 | #### Step 2: Train The Model
64 | 
65 | We can train our model by:
66 | 
67 | ```bash
68 | python main.py --mode=train
69 | ```
70 | 
71 | **Note**: hyper-parameters (such as batch size, hidden size, layer number) of the model and the training process can be explored in the `main.py`.
72 | 
73 | #### Step 3: Evaluation.
74 | 
75 | Given the evaluation dataset, we can conduct the evaluation with:
76 | 
77 | ```bash
78 | python main.py --mode=test --test_json=xxxxxx --test_model_dir=yyyyy
79 | ```
80 | 
81 | The model will loaded from the `${test_model_dir}`, and the `${test_json}` is the test data. The test data have the same format with the results of the Step 1.
82 | 


--------------------------------------------------------------------------------
/models/dl/fsnet/dataset.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tqdm import tqdm
 3 | import numpy as np
 4 | import json
 5 | 
 6 | 
 7 | PAD_KEY = 0
 8 | START_KEY = 1
 9 | END_KEY = 2
10 | 
11 | 
12 | def read_file_generator(filename, max_len, keep_ratio=1):
13 | 
14 |     def gen():
15 |         with open(filename) as fp:
16 |             data = json.load(fp)
17 |         data_all = []
18 |         for exp in data:
19 |             flow_length = len(exp['flow'])
20 |             if flow_length <= max_len:
21 |                 flow = [START_KEY] + exp['flow'] + [END_KEY] + [PAD_KEY] * (max_len - flow_length)
22 |                 data_all.append((str.encode(exp['id']), exp['label'], flow))
23 |         numx = 0
24 |         total_num = min(int(keep_ratio * len(data_all)), len(data_all)-1)
25 |         data_all = data_all[:total_num]
26 |         #print('total_num',total_num)
27 |         while True:
28 |             if numx == 0:
29 |                 np.random.shuffle(data_all)
30 |             #print('numx',numx)
31 |             yield data_all[numx]
32 |             numx = (numx + 1) % total_num
33 |     return gen
34 | 
35 | 
36 | def get_dataset_from_generator(file, config, max_len, keep_ratio=1):
37 |     data_gen = read_file_generator(file, max_len, keep_ratio)
38 |     dataset = tf.data.Dataset.from_generator(
39 |         data_gen,
40 |         (tf.string, tf.int32, tf.int32),
41 |         (tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([max_len + 2]))
42 |     ).shuffle(config.capacity).batch(config.batch_size).prefetch(4)
43 |     return dataset
44 | 
45 | 
46 | def _get_summary(metric):
47 |     summ = []
48 |     for met in metric:
49 |         sx = tf.Summary(value=[tf.Summary.Value(tag=met, simple_value=metric[met])])
50 |         summ.append(sx)
51 |     return summ
52 | 
53 | 
54 | def accuracy(model, val_num_batches, sess, handle, str_handle, name):
55 |     pred_all, pred_right, losses, r_losses, c_losses = 0, 0, [], [], []
56 |     metric = {}
57 |     for _ in tqdm(range(val_num_batches), desc='eval', ascii=True):
58 |         loss,\
59 |         pred, label = sess.run(
60 |             [model.loss,
61 |              model.pred, model.label],
62 |             feed_dict={handle: str_handle})
63 |         losses.append(loss)
64 |         #r_losses.append(r_loss)
65 |         #c_losses.append(c_loss)
66 |         pred_all += len(pred)
67 |         pred_right += np.sum(pred == label)
68 |     loss = np.mean(losses)
69 |     metric[name + '/loss/all'] = loss
70 |     #metric[name + '/loss/clf'] = np.mean(c_losses)
71 |     #metric[name + '/loss/rec'] = np.mean(r_losses)
72 |     metric[name + '/accuracy'] = pred_right / pred_all
73 |     summ = _get_summary(metric)
74 | 
75 |     return loss, summ, metric


--------------------------------------------------------------------------------
/models/ml/appscanner/train.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | from wf_attacks.data_utils import LoadDataApp_crossversion
 3 | from wf_attacks.appscanner.feature_extractor import  feature_extract
 4 | import  lightgbm as lgb
 5 | from sklearn.metrics import  accuracy_score
 6 | import numpy as np
 7 | from wf_attacks.appscanner.min_max import  _max,_min
 8 | ##原始的包长序列
 9 | def main(train_set,modelpath):
10 |     global _min,_max
11 |     X_train_r, y_train_r, X_valid_r, y_valid_r, X_test_r, y_test_r =  LoadDataApp_crossversion(train_set)
12 |     saved_model = modelpath# "appscanner.model"
13 |     print('before extract feature')
14 |     ##提取统计特征
15 |     X_train =[]
16 |     X_valid =[]
17 |     X_test  =[]
18 | 
19 |     for i in range(X_train_r.shape[0]):
20 |         X_train.append(feature_extract(X_train_r[i]))
21 |     for i in range(X_test_r.shape[0]):
22 |         X_test.append(feature_extract(X_test_r[i]))
23 |     for i in range(X_valid_r.shape[0]):
24 |         X_valid.append(feature_extract(X_valid_r[i]))
25 |     print('feature extract well!')
26 |     ##归一化操作
27 |     X_train = np.array(X_train)
28 |     X_valid = np.array(X_valid)
29 |     X_test = np.array(X_test)
30 |     _min = np.array(_min)
31 |     _max =np.array(_max)
32 |     print('_min:')
33 |     print(_min)
34 |     print('_max:')
35 |     print(_max)
36 | 
37 |     X_train = (X_train-_min)/(_max-_min)
38 |     X_valid = (X_valid-_min)/(_max-_min)
39 |     X_test = (X_test-_min)/(_max-_min)
40 |     print('normalize well!')
41 |     print(X_train[0])
42 |     print(X_valid[1])
43 |     print(X_test[2])
44 |     ##
45 |     y_test = np.argmax(y_test_r,1)
46 |     y_train =np.argmax(y_train_r,1)
47 |     y_valid =np.argmax(y_valid_r,1)
48 |     print(y_test[0:10])
49 |     ##开始训练
50 | 
51 |     lgb_train = lgb.Dataset(data=X_train,label=y_train)
52 |     lgb_eval = lgb.Dataset(data=X_valid,label=y_valid)
53 | 
54 |     hyper_params = {
55 |        'boosting_type': 'rf',
56 |      'objective': 'multiclass',
57 |      'num_leaves': 512,
58 |      'learning_rate': 0.05,
59 |      'feature_fraction': 0.9,
60 |      'bagging_fraction': 0.8,
61 |      'bagging_freq': 5,
62 |      'verbose': 0,
63 |      'num_class':55,
64 |      'lambda_l1':0.05,
65 |      'lambda_l2':0.15
66 |     }
67 | 
68 |     gbm = lgb.train(params=hyper_params,
69 |                     train_set=lgb_train,
70 |                     valid_sets=lgb_eval,
71 |                     num_boost_round=50,
72 |                     early_stopping_rounds=5)
73 |     #save model
74 |     try:
75 |         gbm.save_model(saved_model)
76 |     except BaseException as exp:
77 |         pass
78 |     logit = gbm.predict(data=X_test)
79 |     label_predict = list(map(lambda x : np.argmax(x),logit))
80 | 
81 |     accuracy = accuracy_score(y_test,label_predict)
82 |     print('[Appscanner test on {0} acc:{1}]'.format(train_set,accuracy))
83 | 


--------------------------------------------------------------------------------
/models/dl/fsnet/eval.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | import numpy as np
 3 | import json
 4 | from sklearn.metrics import classification_report
 5 | ALL_ = -1
 6 | TPR_KEY = 'TPR'
 7 | FPR_KEY = 'FPR'
 8 | FTF_KEY = 'FTF'
 9 | 
10 | 
11 | def _fpr_trp_app(real, pred, app_ind):
12 |     real_app = real == app_ind
13 |     pred_app = pred == app_ind
14 |     TP = 0
15 |     TN = 0
16 |     FP = 0
17 |     FN = 0
18 |     for r, p in zip(real_app, pred_app):
19 |         if r and p:
20 |             TP += 1
21 |         elif r and not p:
22 |             FN += 1
23 |         elif not r and p:
24 |             FP += 1
25 |         else:
26 |             TN += 1
27 |     return TP, TN, FP, FN
28 | 
29 | 
30 | def _evaluate_fpr_and_tpr(real, pred):
31 |     app_num = len(pred)
32 |     real = np.concatenate(real)
33 |     pred = np.concatenate(pred)
34 |     TP = 0
35 |     TN = 0
36 |     FP = 0
37 |     FN = 0
38 |     TPR = {}
39 |     FPR = {}
40 |     for app_ind in tqdm(range(app_num), ascii=True, desc='Eval'):
41 |         TP_app, TN_app, FP_app, FN_app = _fpr_trp_app(real, pred, app_ind)
42 |         TP += TP_app
43 |         TN += TN_app
44 |         FP += FP_app
45 |         FN += FN_app
46 |         TPR[app_ind] = TP_app / (TP_app + FN_app)
47 |         FPR[app_ind] = FP_app / (FP_app + TN_app)
48 |     TPR[ALL_] = TP / (TP + FN)
49 |     FPR[ALL_] = FP / (FP + TN)
50 |     #print("Total Accuracy:",(TP+TN)/(TP+TN+FP+FN))
51 |     return TPR, FPR
52 | 
53 | 
54 | def _evaluate_ftf(TPR, FPR, class_num):
55 |     res = 0
56 |     sam_num = np.array(class_num, dtype=np.float)
57 |     sam_num /= sam_num.sum()
58 | 
59 |     for key in TPR:
60 |         if key == ALL_:
61 |             continue
62 |         res += sam_num[key] * TPR[key] / (1 + FPR[key])
63 |     return res
64 | 
65 | 
66 | def save_res(res, filename):
67 |     with open(filename, 'w') as fp:
68 |         json.dump(res, fp, indent=1, sort_keys=True)
69 | 
70 | 
71 | def evaluate(real, pred):
72 |     print('real.shape:{0},len.shape{1}'.format(np.array(real).shape,np.array(pred).shape))
73 |     r=0
74 |     t=0
75 |     y_real =[]
76 |     y_pred =[]
77 |     for i in range(len(real)):
78 |        for j in range(len(real[i])):
79 |             y_real.append(real[i][j])
80 |             y_pred.append(pred[i][j])
81 |             if real[i][j]==pred[i][j]:
82 |                r+=1
83 |             t+=1
84 | 
85 |     example_len = [len(ix) for ix in real]
86 |     TPR, FPR = _evaluate_fpr_and_tpr(real, pred)
87 |     FTF = _evaluate_ftf(TPR, FPR, example_len)
88 |     res = {
89 |         TPR_KEY: TPR,
90 |         FPR_KEY: FPR,
91 |         FTF_KEY: FTF
92 |     }
93 |     print('Accuracy:',r*1.0/t)
94 |     print(classification_report(y_true=y_real,y_pred=y_pred, digits=5))
95 | 
96 |     return res
97 | 


--------------------------------------------------------------------------------
/models/ml/rdp/convert_to_csv.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | #把生成的文件转换为csv格式的.
 3 | #逐行生成,并不合并,等实际模型需要的时候再进行合并
 4 | #格式
 5 | #时间戳,类别id,特征1，特征2，···
 6 | '''
 7 | 类别id    :       动作类别
 8 | 0         ：     editing_doc
 9 | 1         :      reading_doc
10 | 2         :      surfing_web
11 | 3         :      installing_software
12 | 4         :      transfering_file
13 | 5         :      watching_video
14 | '''
15 | class_to_id={
16 |     'editing_doc':0,
17 |     'reading_doc':1,
18 |     'surfing_web':2,
19 |     'installing_software':3,
20 |     'transfering_file':4,
21 |     'watching_video':5
22 | }
23 | __author__ = 'jmh081701'
24 | import  os
25 | import  re
26 | import  json
27 | import  sys
28 | def get_files(appname,gap,directory=r"E:\TempWorkStation\i-know-what-are-you-doing\dataset\vector"):
29 |     files=[]
30 |     for _root,_subs,_files in os.walk(directory):
31 |         for file  in _files:
32 |             if file.count(appname) and file.count("gap=%s"%gap):
33 |                 files.append(directory+"\\"+file)
34 |     return files
35 | if __name__ == '__main__':
36 |     appnames=['micrords','anydesk','realvnc','teamviewer']
37 |     gaps = [0.5,0.2,0.8]
38 |     for appname in appnames:
39 |         for gap in gaps:
40 |             #if appname!='teamviewer' or gap!=0.2:
41 |             #    continue
42 |             DIRECOTRY=r"E:\TempWorkStation\i-know-what-are-you-doing\dataset\vector_flowid"
43 |             files = get_files(appname=appname,gap=gap,directory=DIRECOTRY)
44 |             label_rule = "_(.*?)\."
45 |             label_pattern = re.compile(label_rule)
46 |             TARGET=DIRECOTRY+"\\"+"csv"+"\\"+appname+"_"+str(gap) +".txt"
47 |             fp = open(TARGET,'w')
48 |             for file in files:
49 |                 print(file)
50 |                 label=class_to_id[label_pattern.findall(file.split("\\")[-1] )[0] ]
51 |                 with open(file) as jfp:
52 |                     peaks_features=json.load(jfp)
53 |                 for i in range(peaks_features['counter']):
54 |                     timestamp = peaks_features['timestamps'][i]
55 |                     feature = peaks_features['feature'][i]
56 |                     flowid  = peaks_features['flowids'][i]
57 |                     fp.writelines("%s,%d,%d,"%(timestamp,label,flowid))
58 |                     #print(len(feature))
59 |                     if len(feature)!=96:
60 |                         print(feature,i)
61 |                         exit()
62 |                     for j in range(len(feature)):
63 |                         fp.writelines(str(feature[j]))
64 |                         if j < (len(feature)-1):
65 |                             fp.writelines(",")
66 |                     fp.writelines("\n")
67 |                     if i %1000 ==0 :
68 |                         print('finished %d/%d'%(i,peaks_features['counter']))
69 |                 fp.flush()
70 |             fp.close()


--------------------------------------------------------------------------------
/models/ml/cumul/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import numpy as np
 3 | import  os
 4 | import  sys
 5 | import copy
 6 | class cumul_feature_extractor:
 7 |     def __init__(self,
 8 |                  feature_length = 100,
 9 |                  min = 0, max = 1
10 |                  ):
11 |         self.feature_length = feature_length        #cumul模型的输入向量的长度,默认是100
12 |         self.equidistance = None                    #采样的间距
13 |         #标准化的参数
14 |         self.min = min
15 |         self.max = max
16 |         ##############训练模型使用的数据
17 | 
18 |     def feature_extract(self,trace_sequence, cell_size=None):
19 |         """feature_extract() : 从[-1,1,1...]的cell的方向序列中，生成CUMUL模型所需的特征向量
20 | 
21 |         :param trace_sequence: `numpy.narray` ,形状：batch_size * trace_length
22 |                             输入的[-1,1,1...]向量,-1表示ingoing的cell,+1表示outgoing的流
23 |         :param cell_size:   每个cell的大小,默认None,因为最后还得归一化
24 |         :return:
25 |         """
26 |         
27 |         if cell_size == None:
28 |             cell_size = 1
29 |         if not isinstance(type(trace_sequence),np.ndarray):
30 |             trace_sequence = np.array(trace_sequence)
31 |         shape = trace_sequence.shape
32 |         culmulative_sum_a = np.zeros(shape=shape,dtype = np.float)
33 |         culmulative_sum_c = np.zeros(shape=shape,dtype = np.float)
34 |         xp = np.linspace(0,shape[1]-1,shape[1])
35 |         features  = np.zeros(shape=(shape[0],2*self.feature_length),dtype = np.float)
36 |         #计算累计和
37 |         for i in range(0,shape[0]):
38 |             for j in range(1,shape[1]):
39 |                 culmulative_sum_a[i,j] += culmulative_sum_a[i,j-1] + abs(trace_sequence[i,j])
40 |                 culmulative_sum_c[i,j] += culmulative_sum_c[i,j-1] + trace_sequence[i,j]
41 |         #加上cell_size
42 |         culmulative_sum_a = cell_size * culmulative_sum_a
43 |         culmulative_sum_c = cell_size * culmulative_sum_c
44 | 
45 |         #线性采样n个特征
46 |         if self.equidistance != None:
47 |             equidistance = self.equidistance
48 |         else:
49 |             equidistance = (shape[1]-1)/self.feature_length
50 |         xval = np.arange(0,equidistance * self.feature_length,equidistance)
51 |         for i in range(shape[0]):
52 |             #print(i,culmulative_sum_a[i])
53 |             #print(i,culmulative_sum_c[i])
54 |             a_interp = (np.interp(xval,xp,culmulative_sum_a[i])-self.min)/(self.max-self.min)
55 |             c_interp = (np.interp(xval,xp,culmulative_sum_c[i])-self.min)/(self.max-self.min)
56 | 
57 |             features[i,0:2*self.feature_length:2]=copy.deepcopy(a_interp)[:self.feature_length]
58 |             features[i,1:2*self.feature_length:2]=copy.deepcopy(c_interp)[:self.feature_length]
59 |             #print(i,features[i])
60 |             #print('#'*30)
61 |         return  features
62 | 


--------------------------------------------------------------------------------
/models/dl/examples.py:
--------------------------------------------------------------------------------
 1 | 
 2 | __author__ = 'dk'
 3 | import  os
 4 | #设置Tensorflow的日志等级
 5 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级，显示所有信息
 6 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
 7 | os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error
 8 | 
 9 | from attacks import  CNN_model,DF_model,SDAE_model,LSTM_model
10 | from df import df_model_config
11 | 
12 | from cnn import cnn_model_config
13 | from sdae import sdae_model_config
14 | from lstm import  lstm_model_config
15 | 
16 | from data_utils import LoadDataNoDefCW100,LoadDataNoDefCW
17 | 
18 | #使用步骤
19 | #1. 修改各个模型的超参数,xx_model_config.py,把里面的输入向量和标签数改成自己所需要的
20 | #2. 读取数据,构造好训练集,验证集,测试集
21 | #3. build_model()
22 | #4. 调用fit()
23 | #5. 测试一下
24 | #6. 保存模型
25 | def test_cnn(X_train,y_train,X_valid,y_valid,X_test,y_test):
26 |     model = CNN_model()
27 |     model.build_model()
28 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
29 |               batch_size=cnn_model_config.learning_params_template['batch_size'],
30 |               epochs=cnn_model_config.learning_params_template['nb_epochs'])
31 |     model.save_model()
32 |     score = model.evaluate(X_test=X_test,y_test=y_test)
33 |     print('simple CNN accuracy :{0}'.format(score))
34 | 
35 | def test_df(X_train,y_train,X_valid,y_valid,X_test,y_test):
36 | 
37 |     model =DF_model()
38 |     model.build_model()
39 | 
40 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
41 |               batch_size=df_model_config.learning_params_template['batch_size'],
42 |               epochs=df_model_config.learning_params_template['epoch'])
43 |     model.save_model()
44 |     score = model.evaluate(X_test=X_test,y_test=y_test)
45 |     print('Deep Fingerprinting accuracy :{0}'.format(score))
46 | 
47 | 
48 | 
49 | def test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test):
50 |     model = LSTM_model()
51 | def test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test):
52 |     model = SDAE_model()
53 |     model.build_model()
54 |     model.pre_train(x_train=X_train,x_test=X_test)
55 | 
56 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
57 |               batch_size=sdae_model_config.learning_params_template['batch_size'],
58 |               epochs=sdae_model_config.learning_params_template['nb_epochs'])
59 |     model.save_model()
60 |     score = model.evaluate(X_test=X_test,y_test=y_test)
61 |     print('sdae accuracy :{0}'.format(score))
62 | if __name__ == '__main__':
63 |     X_train, y_train, X_valid, y_valid, X_test, y_test =  LoadDataNoDefCW100()
64 |     #test_df(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
65 |     #test_cnn(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
66 | 
67 |     test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test)


--------------------------------------------------------------------------------
/models/dl/cnn_example.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import  os
 3 | #设置Tensorflow的日志等级
 4 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级，显示所有信息
 5 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
 6 | os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error
 7 | 
 8 | from attacks import  CNN_model,DF_model,SDAE_model,LSTM_model
 9 | from df import df_model_config
10 | 
11 | from cnn import cnn_model_config
12 | from sdae import sdae_model_config
13 | from lstm import  lstm_model_config
14 | 
15 | from data_utils import LoadDataNoDefCW100,LoadDataNoDefCW,LoadDataWalkieTalkieCW,LoadDataApp
16 | 
17 | #使用步骤
18 | #1. 修改各个模型的超参数,xx_model_config.py,把里面的输入向量和标签数改成自己所需要的
19 | #2. 读取数据,构造好训练集,验证集,测试集
20 | #3. build_model()
21 | #4. 调用fit()
22 | #5. 测试一下
23 | #6. 保存模型
24 | def test_cnn(X_train,y_train,X_valid,y_valid,X_test,y_test):
25 |     model = CNN_model()
26 |     model.build_model()
27 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
28 |               batch_size=cnn_model_config.learning_params_template['batch_size'],
29 |               epochs=cnn_model_config.learning_params_template['nb_epochs'])
30 |     model.save_model()
31 |     score = model.evaluate(X_test=X_test,y_test=y_test)
32 |     print('simple CNN accuracy :{0}'.format(score))
33 | 
34 | def test_df(X_train,y_train,X_valid,y_valid,X_test,y_test):
35 | 
36 |     model =DF_model()
37 |     model.build_model()
38 | 
39 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
40 |               batch_size=df_model_config.learning_params_template['batch_size'],
41 |               epochs=df_model_config.learning_params_template['epoch'])
42 |     model.save_model()
43 |     score = model.evaluate(X_test=X_test,y_test=y_test)
44 |     print('Deep Fingerprinting accuracy :{0}'.format(score))
45 | 
46 | 
47 | 
48 | def test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test):
49 |     model = LSTM_model()
50 | def test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test):
51 |     model = SDAE_model()
52 |     model.build_model()
53 |     model.pre_train(x_train=X_train,x_test=X_test)
54 | 
55 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
56 |               batch_size=sdae_model_config.learning_params_template['batch_size'],
57 |               epochs=sdae_model_config.learning_params_template['nb_epochs'])
58 |     model.save_model()
59 |     score = model.evaluate(X_test=X_test,y_test=y_test)
60 |     print('sdae accuracy :{0}'.format(score))
61 | if __name__ == '__main__':
62 |     X_train, y_train, X_valid, y_valid, X_test, y_test =  LoadDataApp()#LoadDataWalkieTalkieCW()#LoadDataNoDefCW()
63 |     #test_df(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
64 |     test_cnn(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
65 | 
66 |     #test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test)


--------------------------------------------------------------------------------
/models/dl/df_example.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import  os
 3 | #设置Tensorflow的日志等级
 4 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级，显示所有信息
 5 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
 6 | os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error
 7 | 
 8 | from attacks import  CNN_model,DF_model,SDAE_model,LSTM_model
 9 | from df import df_model_config
10 | 
11 | from cnn import cnn_model_config
12 | from sdae import sdae_model_config
13 | from lstm import  lstm_model_config
14 | 
15 | from data_utils import LoadDataNoDefCW100,LoadDataNoDefCW,LoadDataWalkieTalkieCW,LoadDataApp
16 | 
17 | #使用步骤
18 | #1. 修改各个模型的超参数,xx_model_config.py,把里面的输入向量和标签数改成自己所需要的
19 | #2. 读取数据,构造好训练集,验证集,测试集
20 | #3. build_model()
21 | #4. 调用fit()
22 | #5. 测试一下
23 | #6. 保存模型
24 | def test_cnn(X_train,y_train,X_valid,y_valid,X_test,y_test):
25 |     model = CNN_model()
26 |     model.build_model()
27 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
28 |               batch_size=cnn_model_config.learning_params_template['batch_size'],
29 |               epochs=cnn_model_config.learning_params_template['nb_epochs'])
30 |     model.save_model()
31 |     score = model.evaluate(X_test=X_test,y_test=y_test)
32 |     print('simple CNN accuracy :{0}'.format(score))
33 | 
34 | def test_df(X_train,y_train,X_valid,y_valid,X_test,y_test):
35 | 
36 |     model =DF_model()
37 |     model.build_model()
38 | 
39 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
40 |               batch_size=df_model_config.learning_params_template['batch_size'],
41 |               epochs=df_model_config.learning_params_template['epoch'])
42 |     model.save_model()
43 |     score = model.evaluate(X_test=X_test,y_test=y_test)
44 |     print('Deep Fingerprinting accuracy :{0}'.format(score))
45 | 
46 | 
47 | 
48 | def test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test):
49 |     model = LSTM_model()
50 | def test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test):
51 |     model = SDAE_model()
52 |     model.build_model()
53 |     model.pre_train(x_train=X_train,x_test=X_test)
54 | 
55 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
56 |               batch_size=sdae_model_config.learning_params_template['batch_size'],
57 |               epochs=sdae_model_config.learning_params_template['nb_epochs'])
58 |     model.save_model()
59 |     score = model.evaluate(X_test=X_test,y_test=y_test)
60 |     print('sdae accuracy :{0}'.format(score))
61 | if __name__ == '__main__':
62 |     X_train, y_train, X_valid, y_valid, X_test, y_test =  LoadDataApp()#LoadDataWalkieTalkieCW() #LoadDataNoDefCW()
63 |     test_df(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
64 |     #test_cnn(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
65 | 
66 |     #test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test)


--------------------------------------------------------------------------------
/models/dl/sdae_example.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import  os
 3 | #设置Tensorflow的日志等级
 4 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级，显示所有信息
 5 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
 6 | os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error
 7 | 
 8 | from attacks import  CNN_model,DF_model,SDAE_model,LSTM_model
 9 | from df import df_model_config
10 | 
11 | from cnn import cnn_model_config
12 | from sdae import sdae_model_config
13 | from lstm import  lstm_model_config
14 | 
15 | from data_utils import LoadDataNoDefCW100,LoadDataNoDefCW,LoadDataWalkieTalkieCW,LoadDataApp
16 | 
17 | #使用步骤
18 | #1. 修改各个模型的超参数,xx_model_config.py,把里面的输入向量和标签数改成自己所需要的
19 | #2. 读取数据,构造好训练集,验证集,测试集
20 | #3. build_model()
21 | #4. 调用fit()
22 | #5. 测试一下
23 | #6. 保存模型
24 | def test_cnn(X_train,y_train,X_valid,y_valid,X_test,y_test):
25 |     model = CNN_model()
26 |     model.build_model()
27 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
28 |               batch_size=cnn_model_config.learning_params_template['batch_size'],
29 |               epochs=cnn_model_config.learning_params_template['nb_epochs'])
30 |     model.save_model()
31 |     score = model.evaluate(X_test=X_test,y_test=y_test)
32 |     print('simple CNN accuracy :{0}'.format(score))
33 | 
34 | def test_df(X_train,y_train,X_valid,y_valid,X_test,y_test):
35 | 
36 |     model =DF_model()
37 |     model.build_model()
38 | 
39 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
40 |               batch_size=df_model_config.learning_params_template['batch_size'],
41 |               epochs=df_model_config.learning_params_template['epoch'])
42 |     model.save_model()
43 |     score = model.evaluate(X_test=X_test,y_test=y_test)
44 |     print('Deep Fingerprinting accuracy :{0}'.format(score))
45 | 
46 | 
47 | 
48 | def test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test):
49 |     model = LSTM_model()
50 | def test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test):
51 |     model = SDAE_model()
52 |     model.build_model()
53 |     model.pre_train(x_train=X_train,x_test=X_test)
54 | 
55 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
56 |               batch_size=sdae_model_config.learning_params_template['batch_size'],
57 |               epochs=sdae_model_config.learning_params_template['nb_epochs'])
58 |     model.save_model()
59 |     score = model.evaluate(X_test=X_test,y_test=y_test)
60 |     print('sdae accuracy :{0}'.format(score))
61 | if __name__ == '__main__':
62 |     X_train, y_train, X_valid, y_valid, X_test, y_test =  LoadDataApp()#LoadDataWalkieTalkieCW() #LoadDataNoDefCW()
63 |     #test_df(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
64 |     #test_cnn(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
65 | 
66 |     test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test)


--------------------------------------------------------------------------------
/models/dl/mimetic/build_model.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import torch.nn as nn
 3 | import torch as th
 4 | 
 5 | class CNN_block(nn.Module):
 6 |             def __init__(self, kernel_size=25, filter_num=256):
 7 |                 super(CNN_block,  self).__init__()
 8 |                 self.kernel_size = kernel_size
 9 |                 self.filter_num = filter_num
10 | 
11 |                 self._1conv1d = nn.Conv1d(stride=1,
12 |                                           kernel_size=kernel_size,
13 |                                           in_channels=1,
14 |                                           out_channels=16,
15 |                                           padding=kernel_size//2)
16 |                 self._2maxpooling = nn.MaxPool1d(kernel_size=3, stride= 1)
17 | 
18 |                 self._3conv1d = nn.Conv1d(stride=1,
19 |                                           kernel_size=kernel_size,
20 |                                           in_channels=16,
21 |                                           out_channels=32,
22 |                                           padding=kernel_size//2)
23 |                 self._4maxpooling = nn.MaxPool1d(kernel_size=kernel_size, stride= 4)
24 |                 self._6flattern = nn.Flatten()
25 |                 self._5fc = nn.Linear(in_features=144, out_features=128)
26 | 
27 |             def forward(self, x):
28 |                 x = x.unsqueeze(1)
29 | 
30 |                 x = self._1conv1d(x)
31 |                 x = self._2maxpooling(x)
32 |                 x = self._3conv1d(x)
33 |                 x = self._4maxpooling(x)
34 |                 x = self._5fc(x)
35 |                 x = self._6flattern(x)
36 | 
37 |                 #print('cnn x shape',x.shape)
38 |                 return x
39 | 
40 | class MIMETICModel(nn.Module):
41 |     def __init__(self, payload_sz, packet_nb, class_nb,  gru_layer_nb=2):
42 |         super(MIMETICModel, self).__init__()
43 |         self.payload_sz = payload_sz
44 |         self.packet_nb = packet_nb
45 | 
46 |         self.gru_encoder = nn.GRU(  input_size = 3,  ##包长序列、到达时间序列、方向序列、 window-size
47 |                                     hidden_size= 64,
48 |                                     num_layers=gru_layer_nb,
49 |                                     bidirectional=True, batch_first=True)
50 | 
51 |         self.cnn_encoder = CNN_block()
52 | 
53 |         self.fc = nn.Linear(in_features=12288, out_features= class_nb)
54 |         self.dropout = nn.Dropout(p=0.1)
55 | 
56 |     def forward(self, field, payload):
57 |         batch_size= field.shape[0]
58 | 
59 |         #print('packet embed shape', packet_embed.shape)
60 |         packet_vector, hidden = self.gru_encoder(field)
61 |         #print('packet vector shape',packet_vector.shape)
62 |         packet_vector = packet_vector.reshape(batch_size, -1)
63 | 
64 |         payload_vector = self.cnn_encoder(payload)
65 | 
66 |         representation = th.cat((packet_vector, payload_vector), dim=1)
67 |         representation = self.dropout(self.fc(representation))
68 |         return representation


--------------------------------------------------------------------------------
/models/dl/fsnet/preprocess.py:
--------------------------------------------------------------------------------
 1 | import tqdm
 2 | import numpy as np
 3 | import os
 4 | import sys
 5 | import json
 6 | 
 7 | 
 8 | def eprint(*args, **kwargs):
 9 |     print(*args, file=sys.stderr, **kwargs)
10 | 
11 | 
12 | def load_origin_data(data_dir, app_num):
13 |     datas = [[] for _ in range(app_num)]
14 |     filenames = [filename for filename in os.listdir(data_dir) \
15 |                  if os.path.isfile(os.path.join(data_dir, filename)) and filename.split(".")[-1] == "num"]
16 |     filenames.sort()
17 |     print(filenames,app_num)
18 |     for app in tqdm.tqdm(range(app_num), ascii=True, desc='[Load Data]'):
19 |         with open(os.path.join(data_dir, filenames[app])) as fp:
20 |             for line in fp:
21 | 
22 |                 _length = line.strip().split(';')[1].strip().split('\t')
23 |                 length =[]
24 |                 for ix in _length:
25 |                     if int(ix) !=0 :
26 |                         length.append(abs(int(ix)))
27 |                     else:
28 |                         break
29 |                 datas[app].append({'label': app, 'flow': length, 'lo': length.copy()})
30 |     return datas
31 | 
32 | 
33 | def _transform(datas, block, limit, max_packet):
34 |     data_trans = [[] for _ in range(len(datas))]
35 |     for app in tqdm.tqdm(range(len(datas)), ascii=True, desc='[Transform]'):
36 |         app_data = datas[app]
37 |         for idx, example in enumerate(app_data):
38 |             flow = example['flow']
39 |             if len(flow) < limit:
40 |                 #print(flow)
41 |                 continue
42 |             flow = [ix if ix <= max_packet else max_packet for ix in flow]
43 |             flow = [ix // block + 3 for ix in flow]
44 |             data_trans[app].append(
45 |                 {'label': example['label'], 'flow': flow, 'lo': example['lo'], 'id': str(app) + '-' + str(idx)}
46 |             )
47 |     return data_trans
48 | 
49 | 
50 | def split_train_and_dev(datas, ratio=0.8, keep_ratio=1):
51 |     train, dev = [], []
52 |     for app_data in tqdm.tqdm(datas, ascii=True, desc='[Split]'):
53 |         is_keep = np.random.rand(len(app_data)) <= keep_ratio
54 |         is_train = np.random.rand(len(app_data)) <= ratio
55 |         for example, kp, tr in zip(app_data, is_keep, is_train):
56 |             if kp and tr:
57 |                 train.append(example)
58 |             elif kp and not tr:
59 |                 dev.append(example)
60 |     np.random.shuffle(train)
61 |     np.random.shuffle(dev)
62 |     return train, dev
63 | 
64 | 
65 | def preprocess(config):
66 |     eprint('Generate train and test.')
67 |     origin = load_origin_data(config.data_dir, config.class_num)
68 |     length = _transform(origin, config.length_block, config.min_length, config.max_packet_length)
69 |     train, test = split_train_and_dev(length, config.split_ratio, config.keep_ratio)
70 |     with open(config.train_json, 'w') as fp:
71 |         json.dump(train, fp, indent=1)
72 |     with open(config.test_json, 'w') as fp:
73 |         json.dump(test, fp, indent=1)
74 |     with open(config.train_meta, 'w') as fp:
75 |         fp.write(str(len(train)))
76 |     with open(config.test_meta, 'w') as fp:
77 |         fp.write(str(len(test)))
78 | 


--------------------------------------------------------------------------------
/models/model_base.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import random
 3 | import os
 4 | import pickle
 5 | import numpy as np
 6 | import gzip
 7 | from sklearn.metrics import multilabel_confusion_matrix, roc_auc_score, auc
 8 | class abs_model:
 9 |     def __init__(self, name, randseed):
10 |         self.database = './data/'
11 |         self.name = name
12 |         self.rand = random.Random(x = randseed)
13 |         self.data = None
14 |         self.model = None
15 |         self.full_rdata = []
16 | 
17 |     def data_exists(self):
18 |         return  os.path.exists(self.data)
19 |     def model_exist(self):
20 |         return  os.path.exists(self.model)
21 | 
22 |     def train(self):
23 |         pass
24 | 
25 |     def test(self):
26 |         pass
27 | 
28 |     def parser_raw_data(self):
29 |         ##从原始通用数据集获取自己所需格式数据集能力
30 |         pass
31 | 
32 |     def save_model(self):
33 |         pass
34 | 
35 |     def load_model(self):
36 |         pass
37 |     def save_data(self,X_train, y_train, X_valid, y_valid, X_test, y_test):
38 |         fp = gzip.GzipFile(self.data + 'data.gzip','wb')
39 |         pickle.dump({
40 |             'X_train':X_train,
41 |             'y_train':y_train,
42 |             'X_valid':X_valid,
43 |             'y_valid':y_valid,
44 |             'X_test':X_test,
45 |             'y_test':y_test
46 |         },file=fp)
47 |         fp.close()
48 |     def load_data(self):
49 |         fp = gzip.GzipFile(self.data + 'data.gzip','rb')
50 |         data = pickle.load(fp)
51 |         fp.close()
52 |         X_train = data['X_train']
53 |         y_train = data['y_train']
54 |         X_valid = data['X_valid']
55 |         y_valid = data['y_valid']
56 |         X_test = data['X_test']
57 |         y_test = data['y_test']
58 |         import random
59 |         indexs = [x for x in range(len(y_test))]
60 |         random.shuffle(indexs)
61 |         return np.array(X_train), np.array(y_train), np.array(X_valid), np.array(y_valid), np.array(X_test)[indexs], np.array(y_test)[indexs]
62 |     def num_classes(self):
63 |         for _root, _dir, _files in os.walk(self.full_rdata):
64 |             classes = _files
65 |         return len(classes)
66 |     def fpr_tpr_auc(self,  y_pred, y_real,y_pred_logit=None):
67 |         labels =set()
68 |         for each in y_real:
69 |             labels.add(each)
70 |         labels =list(labels)
71 |         mcm = multilabel_confusion_matrix(y_true=y_real,y_pred=y_pred,labels=labels)
72 |         #print(mcm)
73 |         fp ={}
74 |         tp ={}
75 |         fn ={}
76 |         tn ={}
77 |         for i in range(len(labels)):
78 |             fp.setdefault(labels[i],mcm[i,0,1])
79 |             tp.setdefault(labels[i],mcm[i,1,1])
80 |             fn.setdefault(labels[i],mcm[i,1,0])
81 |             tn.setdefault(labels[i],mcm[i,0,0])
82 |         acc={}
83 |         fpr={}
84 |         tpr={}
85 |         for each in fp:
86 |             acc.setdefault(each,(tp[each]+tn[each])/(fp[each]+tn[each]+fn[each]+tp[each]))
87 |             fpr.setdefault(each,fp[each]/(fp[each]+tn[each]))
88 |             tpr.setdefault(each,tp[each]/(tp[each]+fn[each]))
89 | 
90 |         print('tpr:',tpr)
91 | 
92 |         print('fpr:',fpr)
93 |         #auc = roc_auc_score(y_true=y_real, y_score=y_pred_logit[:,1])
94 |         #print('auc (prob):', auc)
95 | 
96 |         auc = roc_auc_score(y_true=y_real, y_score=y_pred)
97 |         print('auc (label):', auc)


--------------------------------------------------------------------------------
/models/dl/cnn/cnn_model.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from keras.models import Sequential
 3 | from keras.layers.core import Dense, Dropout
 4 | from keras.layers import Conv1D, MaxPooling1D, Flatten
 5 | from keras.callbacks import EarlyStopping, ModelCheckpoint
 6 | from keras.utils import np_utils
 7 | from keras.optimizers import RMSprop, Adam, SGD
 8 | from keras import regularizers
 9 | from keras.layers.recurrent import LSTM
10 | from .cnn_model_config import  learning_params_template,nb_classes_template
11 | from keras import optimizers
12 | 
13 | def build_model(learn_params=learning_params_template, nb_classes=nb_classes_template):
14 |     input_length = learn_params["maxlen"]
15 |     input_dim = learn_params["nb_features"]
16 |     layers = learn_params["layers"]
17 | 
18 |     model = Sequential()
19 | 
20 |     maxlen = input_length
21 |     max_features = input_dim
22 | 
23 |     if len(layers) == 0:
24 |         raise("No layers")
25 | 
26 |     first_l = layers[0]
27 |     rest_l = layers[1:]
28 | 
29 |     # First layer
30 |     if first_l["name"] == 'dropout':
31 |         model.add(Dropout(input_shape=(maxlen, max_features), rate=first_l['rate']))
32 |     elif first_l["name"] == 'conv':
33 |         model.add(Conv1D(filters=first_l['filters'],
34 |                          kernel_size=first_l['kernel_size'],
35 |                          padding='valid',
36 |                          activation=first_l['activation'],
37 |                          strides=first_l['stride']))
38 | 
39 |     # Middle layers (conv, dropout, pooling, dense, lstm.....)
40 |     for l in rest_l:
41 |         if l["name"] == 'maxpooling':
42 |             model.add(MaxPooling1D(pool_size=l['pool_size'], padding='valid'))
43 |         elif l["name"] == 'conv':
44 |             model.add(Conv1D(filters=l['filters'],
45 |                              kernel_size=l['kernel_size'],
46 |                              padding='valid',
47 |                              activation=l['activation'],
48 |                              strides=l['stride']))
49 |         elif l["name"] == 'dropout':
50 |             model.add(Dropout(rate=l['rate']))
51 |         elif l["name"] == 'lstm':
52 |             model.add(LSTM(l['units']))
53 |         elif l["name"] == 'flatten':
54 |             model.add(Flatten())
55 |         elif l["name"] == 'dense':
56 |             if l['regularization'] > 0.0:
57 |                 model.add(Dense(units=l['units'], activation=l['activation'],
58 |                             kernel_regularizer=regularizers.l2(last_l['regularization']),
59 |                             activity_regularizer=regularizers.l1(last_l['regularization'])))
60 |             else:
61 |                 model.add(Dense(units=l['units'], activation=l['activation']))
62 | 
63 | 
64 |     learn_params = learning_params_template
65 |     if learn_params['optimizer'] == "sgd":
66 |         optimizer = SGD(lr=learn_params['lr'],
67 |                         decay=learn_params['decay'],
68 |                         momentum=learn_params['momentum'],
69 |                         nesterov=True)
70 |     elif learn_params['optimizer'] == "adam":
71 |         optimizer = Adam(lr=learn_params['lr'],
72 |                          decay=learn_params['decay'])
73 |     else:  # elif learn_params['optimizer'] == "rmsprop":
74 |         optimizer = RMSprop(lr=learn_params['lr'],
75 |                             decay=learn_params['decay'])
76 |     metrics=['accuracy']
77 |     model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=metrics)
78 |     return model
79 | 
80 | 


--------------------------------------------------------------------------------
/models/dl/lstm/lstm_model.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from keras.layers.core import Dense, Dropout
 3 | from keras.layers.recurrent import LSTM
 4 | from keras.layers import Input
 5 | from keras.models import Sequential
 6 | try:
 7 |     import hyperas
 8 | except ImportError as exp:
 9 |     print("Error:{0},\n\t please execute: {1}".format(exp,"pip install hyperas -i https://mirrors.aliyun.com/pypi/simple/"))
10 |     raise exp
11 | from keras.optimizers import SGD, Adam, RMSprop
12 | from .lstm_model_config import learn_params_template,nb_classes_template
13 | 
14 | def build_model(learn_params=learn_params_template, nb_classes=nb_classes_template):
15 |     input_length = learn_params["maxlen"]
16 |     input_dim = learn_params["nb_features"]
17 |     layers = learn_params["layers"]
18 | 
19 |     model = Sequential()
20 |     # input_shape = (input_length, input_dim)
21 |     # input_length = maxlen
22 |     # input_dim = nb_features
23 | 
24 |     if len(layers) == 0:
25 |         raise ("No layers")
26 | 
27 |     if len(layers) == 1:
28 |         layer = layers[0]
29 |         model.add(LSTM(input_shape=(input_length, input_dim),
30 |                        #batch_input_shape=(batch_size, input_length, input_dim),
31 |                        units=layer['units'],
32 |                        activation=layer['activation'],
33 |                        recurrent_activation=layer['rec_activation'],
34 |                        return_sequences=False,
35 |                        #stateful=True,
36 |                        dropout=layer['dropout']))
37 |         model.add(Dense(units=nb_classes, activation='softmax'))
38 |         return model
39 | 
40 |     first_l = layers[0]
41 |     last_l = layers[-1]
42 |     middle_ls = layers[1:-1]
43 |     #
44 |     model.add(LSTM(input_shape=(input_length, input_dim),
45 |                    #batch_input_shape=(batch_size, input_length, input_dim),
46 |                    units=first_l['units'],
47 |                    activation=first_l['activation'],
48 |                    recurrent_activation=first_l['rec_activation'],
49 |                    return_sequences=True,
50 |                    #stateful=True,
51 |                    dropout=first_l['dropout']))
52 |     for l in middle_ls:
53 |         model.add(LSTM(units=l['units'],
54 |                        activation=l['activation'],
55 |                        recurrent_activation=l['rec_activation'],
56 |                        return_sequences=True,
57 |                        #stateful=True,
58 |                        dropout=l['dropout']))
59 | 
60 |     model.add(LSTM(units=last_l['units'],
61 |                    activation=last_l['activation'],
62 |                    recurrent_activation=last_l['rec_activation'],
63 |                    return_sequences=False,
64 |                    #stateful=True,
65 |                    dropout=last_l['dropout']))
66 | 
67 |     model.add(Dense(units=nb_classes, activation='softmax'))
68 | 
69 |     if learn_params['optimizer'] == "sgd":
70 |         optimizer = SGD(lr=learn_params['lr'],
71 |                         decay=learn_params['decay'],
72 |                         momentum=0.9,
73 |                         nesterov=True)
74 |     elif learn_params['optimizer'] == "adam":
75 |         optimizer = Adam(lr=learn_params['lr'],
76 |                          decay=learn_params['decay'])
77 |     else:  # elif learn_params['optimizer'] == "rmsprop":
78 |         optimizer = RMSprop(lr=learn_params['lr'],
79 |                             decay=learn_params['decay'])
80 |     metrics=['accuracy']
81 |     model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=metrics)
82 | 
83 |     return model
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/models/dl/README.md:
--------------------------------------------------------------------------------
 1 | # website_fingerprinting
 2 | 
 3 | 目前本项目支持如下模型：
 4 | 
 5 | 
 6 | - Deep Fingerprinting 
 7 | 
 8 | - SDAE
 9 | 
10 | - LSTM
11 | 
12 | - CNN
13 | 
14 | 
15 | 剩余两个是统计机器学习模型：【 目前这两个模型没有适配好，但是里面的特征提取是有效的】
16 | 
17 | - CUMUL
18 | 
19 | - AppScanner
20 | 
21 | 
22 | # 使用方法
23 | 
24 | ## 数据准备
25 | 
26 | 首先，需要准备好数据格式：
27 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201013165821472.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2ptaDE5OTY=,size_16,color_FFFFFF,t_70#pic_center)
28 | 需要将网络流量整理为如上的6个文件，并放在同一个目录，文件名如上。
29 | 
30 | ```python
31 | X_train_pkt_length.pkl : 包长序列，训练集。
32 | X_valid_pkt_length.pkl : 包长序列，验证集。
33 | X_test_pkt_length.pkl : 包长序列，测试集。
34 | y_train_pkt_length.pkl : 流量标签，训练集。
35 | y_valid_pkt_length.pkl : 流量标签，验证集。
36 | y_test_pkt_length.pkl : 流量标签，测试集。
37 | ```
38 | 其中，`X_*_pkt_length.pkl` 是一个使用pickle.save()保存的numpy 矩阵，它的形状为 $m\times l$ 。其中m是样本个数，l是包长序列的长度，**同一数据集所有样本的包长序列需要填充到相同的长度** 。
39 | `y_*_pkt_length.pkl` 也是一个pickle.save()保存的numpy矩阵，它的形状为 $m\times1$，m表示样本个数，第i个元素都是整数，表示对应的训练集、验证集、测试集第i个样本的标签。
40 | 数据集的保存需要使用类似如下的步骤：
41 | 
42 | ```python
43 |         with gzip.GzipFile(path_dir+"/"+"X_train_"+feature_name+".pkl","wb") as fp:
44 |             pickle.dump(X_train,fp,-1)
45 |         with gzip.GzipFile(path_dir+"/"+"X_valid_"+feature_name+".pkl","wb") as fp:
46 |             pickle.dump(X_valid,fp,-1)
47 |         with gzip.GzipFile(path_dir+"/"+"X_test_"+feature_name+".pkl","wb") as fp:
48 |             pickle.dump(X_test,fp,-1)
49 | 
50 |         with gzip.GzipFile(path_dir+"/"+"y_train_"+feature_name+".pkl","wb") as fp:
51 |             pickle.dump(y_train,fp,-1)
52 |         with gzip.GzipFile(path_dir+"/"+"y_valid_"+feature_name+".pkl","wb") as fp:
53 |             pickle.dump(y_valid,fp,-1)
54 |         with gzip.GzipFile(path_dir+"/"+"y_test_"+feature_name+".pkl","wb") as fp:
55 |             pickle.dump(y_test,fp,-1)
56 | ```
57 | 
58 | **训练集的包长序列的样本数目需要等于训练集的流量标签序列的样本数。
59 | 验证集的包长序列的样本数目需要等于验证集的流量标签序列的样本数。
60 | 测试集的包长序列的样本数目需要等于测试集的流量标签序列的样本数。**
61 | 
62 | 项目提供了一个示例数据集 app_dataset，它是一个55分类的数据集，每条样本的包长序列长度为1000,不足的填充0，超过1000的就截断。
63 | 
64 | 
65 | ---
66 | ## 修改数据目录
67 | 在按照上述步骤准备好数据后，需要修改数据目录。
68 | 修改`website_fingerprinting/data_utils.py`  文件里面的`NB_CLASSES` 变量 和 默认数目集目录`dataset_dir` 变量。
69 | 其中`NB_CLASSES` 变量是数据集不同标签的数目。
70 | `dataset_dir` 是默认数据集的目录
71 | 
72 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201013171642726.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2ptaDE5OTY=,size_16,color_FFFFFF,t_70#pic_center)
73 | 
74 | ---
75 | ## 配置模型
76 | 在运行模型之前，需要先修改他们的配置文件。
77 | 目前，各个模型的配置文件以模型名命名的目录下：
78 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201013171123714.png#pic_center)
79 | 例如，对于Deep fingerprinting模型，它的配置文件为df目录下的df_model_config.py。
80 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201013171223270.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2ptaDE5OTY=,size_16,color_FFFFFF,t_70#pic_center)
81 | 修改模型文件：**修改里面的类别数目和包长序列的长度参数。** 对于里面需要修改的参数，各模式文件都做了标注。
82 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201013171257550.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2ptaDE5OTY=,size_16,color_FFFFFF,t_70#pic_center)
83 | ## 运行模型
84 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201013172217604.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2ptaDE5OTY=,size_16,color_FFFFFF,t_70#pic_center)
85 | 运行 `X_example.py ` 进行模型的训练，其中X可以是df,cnn,lstm,sdae。
86 | 运行`X_eval.py` 进行模型的测试，其中X可以是df,cnn,lstm,sdae.
87 | 
88 | 例如：
89 | 在自带的app_dataset数据集运行的 `df_example.py` 的结果为：
90 | ![在这里插入图片描述](https://img-blog.csdnimg.cn/20201013173850869.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2ptaDE5OTY=,size_16,color_FFFFFF,t_70#pic_center)
91 | 
92 | 


--------------------------------------------------------------------------------
/models/dl/lstm_eval.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import  os
 3 | #设置Tensorflow的日志等级
 4 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级，显示所有信息
 5 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
 6 | os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error
 7 | 
 8 | from attacks import  CNN_model,DF_model,SDAE_model,LSTM_model
 9 | from df import df_model_config
10 | 
11 | from cnn import cnn_model_config
12 | from sdae import sdae_model_config
13 | from lstm import  lstm_model_config
14 | 
15 | from data_utils import LoadDataNoDefCW100,LoadDataNoDefCW,LoadDataWalkieTalkieCW,LoadDataApp,LoadDataApp_crossversion
16 | 
17 | #使用步骤
18 | #1. 修改各个模型的超参数,xx_model_config.py,把里面的输入向量和标签数改成自己所需要的
19 | #2. 读取数据,构造好训练集,验证集,测试集
20 | #3. build_model()
21 | #4. 调用fit()
22 | #5. 测试一下
23 | #6. 保存模型
24 | def test_cnn(X_train,y_train,X_valid,y_valid,X_test,y_test):
25 |     model = CNN_model()
26 |     model.build_model()
27 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
28 |               batch_size=cnn_model_config.learning_params_template['batch_size'],
29 |               epochs=cnn_model_config.learning_params_template['nb_epochs'])
30 |     model.save_model()
31 |     score = model.evaluate(X_test=X_test,y_test=y_test)
32 |     print('simple CNN accuracy :{0}'.format(score))
33 | 
34 | def test_df(X_train,y_train,X_valid,y_valid,X_test,y_test):
35 | 
36 |     model =DF_model()
37 |     model.build_model()
38 | 
39 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
40 |               batch_size=df_model_config.learning_params_template['batch_size'],
41 |               epochs=df_model_config.learning_params_template['epoch'])
42 |     model.save_model()
43 |     score = model.evaluate(X_test=X_test,y_test=y_test)
44 |     print('Deep Fingerprinting accuracy :{0}'.format(score))
45 | 
46 | 
47 | 
48 | def test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test):
49 | 
50 |     model = LSTM_model()
51 |     model.load_model()
52 |     score = model.evaluate(X_test=X_test,y_test=y_test)
53 |     print('lstm Test on test dataset accuracy :{0}'.format(score))
54 |     
55 | def test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test):
56 |     model = SDAE_model()
57 |     model.build_model()
58 |     model.pre_train(x_train=X_train,x_test=X_test)
59 | 
60 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
61 |               batch_size=sdae_model_config.learning_params_template['batch_size'],
62 |               epochs=sdae_model_config.learning_params_template['nb_epochs'])
63 |     model.save_model()
64 |     score = model.evaluate(X_test=X_test,y_test=y_test)
65 |     print('sdae accuracy :{0}'.format(score))
66 | if __name__ == '__main__':
67 |     X_train, y_train, X_valid, y_valid, X_test, y_test =  LoadDataApp_crossversion()##LoadDataWalkieTalkieCW() # LoadDataNoDefCW()
68 |     #test_df(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
69 |     #test_cnn(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
70 |     #test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test)
71 |     if X_train.shape[1] > lstm_model_config.learn_params_template['maxlen']:
72 |         X_train = X_train.reshape(X_train.shape[0],X_train.shape[1])
73 |         X_valid = X_valid.reshape(X_valid.shape[0],X_valid.shape[1])
74 |         X_test = X_test.reshape(X_test.shape[0],X_test.shape[1])
75 | 
76 |         X_train = X_train[:,:lstm_model_config.learn_params_template['maxlen']]
77 |         X_valid = X_valid[:,:lstm_model_config.learn_params_template['maxlen']]
78 |         X_test = X_test[:,:lstm_model_config.learn_params_template['maxlen']]
79 | 
80 |         X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],1)
81 |         X_valid = X_valid.reshape(X_valid.shape[0],X_valid.shape[1],1)
82 |         X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],1)
83 |     test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test)
84 | 


--------------------------------------------------------------------------------
/models/dl/beauty/cnn_model.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from keras.models import Sequential
 3 | from keras.layers.core import Dense, Dropout
 4 | from keras.layers import Conv1D, MaxPooling1D, Flatten
 5 | from keras.callbacks import EarlyStopping, ModelCheckpoint
 6 | from keras.utils import np_utils
 7 | from keras.optimizers import RMSprop, Adam, SGD
 8 | from keras import regularizers
 9 | from keras.layers.recurrent import LSTM
10 | from models.dl.beauty.cnn_model_config import  learning_params_template
11 | from keras import optimizers
12 | 
13 | def build_model(learn_params=learning_params_template, nb_classes= -1):
14 |     input_length = learn_params["maxlen"]
15 |     input_dim = learn_params["nb_features"]
16 |     layers = learn_params["layers"]
17 | 
18 |     model = Sequential()
19 | 
20 |     maxlen = input_length
21 |     max_features = input_dim
22 | 
23 |     if len(layers) == 0:
24 |         raise("No layers")
25 | 
26 |     first_l = layers[0]
27 |     rest_l = layers[1:]
28 | 
29 |     # First layer
30 |     if first_l["name"] == 'dropout':
31 |         model.add(Dropout(input_shape=(maxlen, max_features), rate=first_l['rate']))
32 |     elif first_l["name"] == 'conv':
33 |         model.add(Conv1D(filters=first_l['filters'],
34 |                          kernel_size=first_l['kernel_size'],
35 |                          padding='valid',
36 |                          activation=first_l['activation'],
37 |                          strides=first_l['stride'],
38 |                          input_shape=(learn_params['input_length'],1)))
39 | 
40 |     # Middle layers (conv, dropout, pooling, dense, lstm.....)
41 |     for l in rest_l:
42 |         if l["name"] == 'maxpooling':
43 |             model.add(MaxPooling1D(pool_size=l['pool_size'], padding='valid'))
44 |         elif l["name"] == 'conv':
45 |             model.add(Conv1D(filters=l['filters'],
46 |                              kernel_size=l['kernel_size'],
47 |                              padding='valid',
48 |                              activation=l['activation'],
49 |                              strides=l['stride']))
50 |         elif l["name"] == 'dropout':
51 |             model.add(Dropout(rate=l['rate']))
52 |         elif l["name"] == 'lstm':
53 |             model.add(LSTM(l['units']))
54 |         elif l["name"] == 'flatten':
55 |             model.add(Flatten())
56 |         elif l["name"] == 'dense':
57 |             if 'last' in l and  l['last'] == True:
58 |                 l['units'] = nb_classes
59 |             print(l['units'])
60 |             if l['regularization'] > 0.0:
61 |                 module=Dense(units=l['units'],
62 |                                activation=l['activation'],
63 |                             kernel_regularizer=regularizers.l2(l['regularization']),
64 |                             activity_regularizer=regularizers.l1(l['regularization'])
65 |                 )
66 |             else:
67 |                 module=Dense(units=l['units'], activation=l['activation'])
68 | 
69 |             model.add(module)
70 | 
71 | 
72 |     learn_params = learning_params_template
73 |     if learn_params['optimizer'] == "sgd":
74 |         optimizer = SGD(lr=learn_params['lr'],
75 |                         decay=learn_params['decay'],
76 |                         momentum=learn_params['momentum'],
77 |                         nesterov=True)
78 |     elif learn_params['optimizer'] == "adam":
79 |         optimizer = Adam(lr=learn_params['lr'],
80 |                          decay=learn_params['decay'])
81 |     else:  # elif learn_params['optimizer'] == "rmsprop":
82 |         optimizer = RMSprop(lr=learn_params['lr'],
83 |                             decay=learn_params['decay'])
84 |     metrics=['accuracy']
85 |     model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=metrics)
86 |     return model
87 | 
88 | if __name__ == '__main__':
89 |     import  numpy as np
90 |     model = build_model(nb_classes=200)
91 |     print(model.summary())


--------------------------------------------------------------------------------
/models/dl/lstm_example.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import  os
 3 | #设置Tensorflow的日志等级
 4 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='1' # 这是默认的显示等级，显示所有信息
 5 | #os.environ["TF_CPP_MIN_LOG_LEVEL"]='2' # 只显示 warning 和 Error
 6 | os.environ["TF_CPP_MIN_LOG_LEVEL"]='3' # 只显示 Error
 7 | 
 8 | from attacks import  CNN_model,DF_model,SDAE_model,LSTM_model
 9 | from df import df_model_config
10 | 
11 | from cnn import cnn_model_config
12 | from sdae import sdae_model_config
13 | from lstm import  lstm_model_config
14 | 
15 | from data_utils import LoadDataNoDefCW100,LoadDataNoDefCW,LoadDataWalkieTalkieCW,LoadDataApp
16 | 
17 | #使用步骤
18 | #1. 修改各个模型的超参数,xx_model_config.py,把里面的输入向量和标签数改成自己所需要的
19 | #2. 读取数据,构造好训练集,验证集,测试集
20 | #3. build_model()
21 | #4. 调用fit()
22 | #5. 测试一下
23 | #6. 保存模型
24 | def test_cnn(X_train,y_train,X_valid,y_valid,X_test,y_test):
25 |     model = CNN_model()
26 |     model.build_model()
27 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
28 |               batch_size=cnn_model_config.learning_params_template['batch_size'],
29 |               epochs=cnn_model_config.learning_params_template['nb_epochs'])
30 |     model.save_model()
31 |     score = model.evaluate(X_test=X_test,y_test=y_test)
32 |     print('simple CNN accuracy :{0}'.format(score))
33 | 
34 | def test_df(X_train,y_train,X_valid,y_valid,X_test,y_test):
35 | 
36 |     model =DF_model()
37 |     model.build_model()
38 | 
39 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
40 |               batch_size=df_model_config.learning_params_template['batch_size'],
41 |               epochs=df_model_config.learning_params_template['epoch'])
42 |     model.save_model()
43 |     score = model.evaluate(X_test=X_test,y_test=y_test)
44 |     print('Deep Fingerprinting accuracy :{0}'.format(score))
45 | 
46 | 
47 | 
48 | def test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test):
49 | 
50 |     model = LSTM_model()
51 |     model.build_model()
52 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
53 |               batch_size=lstm_model_config.learn_params_template['batch_size'],
54 |               epochs=lstm_model_config.learn_params_template['nb_epochs'])
55 |     model.save_model()
56 |     score = model.evaluate(X_test=X_test,y_test=y_test)
57 |     print('lstm accuracy :{0}'.format(score))
58 | def test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test):
59 |     model = SDAE_model()
60 |     model.build_model()
61 |     model.pre_train(x_train=X_train,x_test=X_test)
62 | 
63 |     model.fit(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,
64 |               batch_size=sdae_model_config.learning_params_template['batch_size'],
65 |               epochs=sdae_model_config.learning_params_template['nb_epochs'])
66 |     model.save_model()
67 |     score = model.evaluate(X_test=X_test,y_test=y_test)
68 |     print('sdae accuracy :{0}'.format(score))
69 | if __name__ == '__main__':
70 |     X_train, y_train, X_valid, y_valid, X_test, y_test =  LoadDataApp()#LoadDataWalkieTalkieCW() # LoadDataNoDefCW()
71 |     #test_df(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
72 |     #test_cnn(X_train=X_train,y_train=y_train,X_valid=X_valid,y_valid=y_valid,X_test=X_test,y_test=y_test)
73 |     #test_sdae(X_train,y_train,X_valid,y_valid,X_test,y_test)
74 |     if X_train.shape[1] > lstm_model_config.learn_params_template['maxlen']:
75 |         X_train = X_train.reshape(X_train.shape[0],X_train.shape[1])
76 |         X_valid = X_valid.reshape(X_valid.shape[0],X_valid.shape[1])
77 |         X_test = X_test.reshape(X_test.shape[0],X_test.shape[1])
78 | 
79 |         X_train = X_train[:,:lstm_model_config.learn_params_template['maxlen']]
80 |         X_valid = X_valid[:,:lstm_model_config.learn_params_template['maxlen']]
81 |         X_test = X_test[:,:lstm_model_config.learn_params_template['maxlen']]
82 | 
83 |         X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],1)
84 |         X_valid = X_valid.reshape(X_valid.shape[0],X_valid.shape[1],1)
85 |         X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],1)
86 |     test_lstm(X_train,y_train,X_valid,y_valid,X_test,y_test)
87 | 


--------------------------------------------------------------------------------
/models/dl/appnet/build_model.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'dk'
 2 | import torch.nn as nn
 3 | import torch as th
 4 | 
 5 | class CNN_block(nn.Module):
 6 |             def __init__(self, embedding_dim, kernel_size=7, filter_num=256):
 7 |                 super(CNN_block,  self).__init__()
 8 |                 self.kernel_size = kernel_size
 9 |                 self.filter_num = filter_num
10 | 
11 |                 self._1conv1d = nn.Conv1d(stride=1,
12 |                                           kernel_size=kernel_size,
13 |                                           in_channels=embedding_dim,
14 |                                           out_channels=filter_num,
15 |                                           padding=kernel_size//2)
16 |                 self._2maxpooling = nn.MaxPool1d(kernel_size=kernel_size, stride= kernel_size)
17 | 
18 |                 self._3conv1d = nn.Conv1d(stride=1,
19 |                                           kernel_size=kernel_size,
20 |                                           in_channels=256,
21 |                                           out_channels=filter_num,
22 |                                           padding=kernel_size//2)
23 | 
24 |                 self._4maxpooling = nn.MaxPool1d(kernel_size=kernel_size, stride= kernel_size)
25 | 
26 |                 self._5conv1d = nn.Conv1d(stride=1,
27 |                                           kernel_size=kernel_size,
28 |                                           in_channels=256,
29 |                                           out_channels=filter_num,
30 |                                           padding=kernel_size//2)
31 | 
32 |                 self._6maxpooling = nn.MaxPool1d(kernel_size=kernel_size*2, stride= 4)
33 | 
34 |                 self._7flattern = nn.Flatten()
35 | 
36 |             def forward(self, x):
37 |                 
38 |                 batch_size, seq_len, embedding_dim  = x.shape
39 |                 x = x.permute(0,2,1)
40 | 
41 |                 x = self._1conv1d(x)
42 |                 x = self._2maxpooling(x)
43 |                 x = self._3conv1d(x)
44 |                 x = self._4maxpooling(x)
45 |                 #print(x.shape)
46 |                 x = self._5conv1d(x)
47 |                 x = self._6maxpooling(x)
48 |                 x = self._7flattern(x)
49 |                 #print('cnn x shape',x.shape)
50 |                 return x
51 | 
52 | class AppNetModel(nn.Module):
53 |     def __init__(self, payload_sz, payload_embed_sz, packet_nb, packet_embed_sz, class_nb,  lstm_layer_nb=2):
54 |         super(AppNetModel, self).__init__()
55 |         self.payload_sz = payload_sz
56 |         self.payload_embed_sz = payload_embed_sz
57 |         self.packet_nb = packet_nb
58 |         self.packet_embed_sz = packet_embed_sz
59 | 
60 |         self.payload_embed_layer = nn.Embedding(num_embeddings=512, embedding_dim=payload_embed_sz)
61 |         self.packet_embed_layer = nn.Embedding(num_embeddings=3200, embedding_dim=packet_embed_sz)
62 | 
63 |         self.lstm_encoder = nn.LSTM(input_size = packet_embed_sz,
64 |                                     hidden_size= packet_embed_sz,
65 |                                     num_layers=lstm_layer_nb,
66 |                                     bidirectional=True, batch_first=True)
67 | 
68 |         self.cnn_encoder = CNN_block(payload_embed_sz)
69 | 
70 |         self.fc = nn.Linear(in_features=5632, out_features= class_nb)
71 |         self.dropout = nn.Dropout(p=0.1)
72 | 
73 |     def forward(self, packet_size, payload):
74 |         batch_size= packet_size.shape[0]
75 |         try:
76 |            packet_embed = self.packet_embed_layer(packet_size)
77 |         except BaseException as exp:
78 |            print(packet_size)
79 |            print(exp)
80 |         #print('packet embed shape', packet_embed.shape)
81 |         packet_vector, hidden = self.lstm_encoder(packet_embed)
82 |         #print('packet vector shape',packet_vector.shape)
83 |         packet_vector = packet_vector.reshape(batch_size, -1)
84 | 
85 |         payload_embed = self.payload_embed_layer(payload)
86 |         payload_vector = self.cnn_encoder(payload_embed)
87 | 
88 |         representation = th.cat((packet_vector, payload_vector), dim=1)
89 |         representation = self.dropout(self.fc(representation))
90 |         return representation


--------------------------------------------------------------------------------
/models/dl/fsnet/main.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import os
 3 | import train
 4 | import preprocess
 5 | 
 6 | home = os.getcwd()
 7 | record_dir = os.path.join(home, 'record')
 8 | save_base = os.path.join(home, 'log')
 9 | log_dir = os.path.join(save_base)
10 | data_dir = os.path.join(home, 'filter')
11 | pred_dir = os.path.join(home, 'result')
12 | 
13 | for dirx in [save_base, record_dir, log_dir, data_dir, pred_dir]:
14 |     if not os.path.exists(dirx):
15 |         os.mkdir(dirx)
16 | 
17 | train_record = os.path.join(record_dir, 'train.json')
18 | test_record = os.path.join(record_dir, 'test.json')
19 | train_meta = os.path.join(record_dir, 'train.meta')
20 | test_meta = os.path.join(record_dir, 'test.meta')
21 | status_label = os.path.join(data_dir, 'status.label')
22 | 
23 | flags = tf.flags
24 | 
25 | flags.DEFINE_string('train_json', train_record, 'the processed train json file')
26 | flags.DEFINE_string('test_json', test_record, 'the processed test json file')
27 | flags.DEFINE_string('train_meta', train_meta, 'the processed train number')
28 | flags.DEFINE_string('test_meta', test_meta, 'the processed test number')
29 | flags.DEFINE_string('log_dir', log_dir, 'where to save the log')
30 | flags.DEFINE_string('model_dir', log_dir, 'where to save the model')
31 | flags.DEFINE_string('data_dir', data_dir, 'where to read data')
32 | flags.DEFINE_integer('class_num', 53, 'the class number')
33 | flags.DEFINE_integer('length_block', 1, 'the length of a block')
34 | flags.DEFINE_integer('min_length', 2, 'the flow under this parameter will be filtered')
35 | flags.DEFINE_integer('max_packet_length', 1000, 'the largest packet length')
36 | flags.DEFINE_float('split_ratio', 0.8, 'ratio of train set of target app')
37 | flags.DEFINE_float('keep_ratio', 1, 'ratio of keeping the example (for small dataset test)')
38 | flags.DEFINE_integer('max_flow_length_train', 200, 'the max flow length, if larger, drop')
39 | flags.DEFINE_integer('max_flow_length_test', 1000, 'the max flow length, if larger, drop')
40 | flags.DEFINE_string('test_model_dir', log_dir, 'the model dir for test result')
41 | flags.DEFINE_string('pred_dir', pred_dir, 'the dir to save predict result')
42 | 
43 | flags.DEFINE_integer('batch_size', 128, 'train batch size')
44 | flags.DEFINE_integer('hidden', 128, 'GRU dimension of hidden state')
45 | flags.DEFINE_integer('layer', 2, 'layer number of length RNN')
46 | flags.DEFINE_integer('length_dim', 16, 'dimension of length embedding')
47 | flags.DEFINE_string('length_num', 'auto', 'length_num')
48 | 
49 | flags.DEFINE_float('keep_prob', 0.8, 'the keep probability for dropout')
50 | flags.DEFINE_float('learning_rate', 0.001, 'learning rate')
51 | flags.DEFINE_integer('iter_num', int(0.7e5), 'iteration number')
52 | flags.DEFINE_integer('eval_batch', 77, 'evaluated train batches')
53 | flags.DEFINE_integer('train_eval_batch', 77, 'evaluated train batches')
54 | flags.DEFINE_string('decay_step', 'auto', 'the decay step')
55 | flags.DEFINE_float('decay_rate', 0.5, 'the decay rate')
56 | 
57 | flags.DEFINE_string('mode', 'train', 'model mode: train/prepro/test')
58 | flags.DEFINE_integer("capacity", int(1e3), "size of dataset shuffle")
59 | flags.DEFINE_integer("loss_save", 100, "step of saving loss")
60 | flags.DEFINE_integer("checkpoint", 5000, "checkpoint to save and evaluate the model")
61 | flags.DEFINE_float("grad_clip", 5.0, "Global Norm gradient clipping rate")
62 | 
63 | flags.DEFINE_boolean('is_cudnn', True, 'whether take the cudnn gru')
64 | flags.DEFINE_float('rec_loss', 0.5, 'the parameter to control the reconstruction of length sequence')
65 | 
66 | 
67 | def main(_):
68 |     config = flags.FLAGS
69 |     if config.length_num == 'auto':
70 |         config.length_num = config.max_packet_length // config.length_block + 4
71 |     else:
72 |         config.length_num = int(config.length_num)
73 |     if config.decay_step != 'auto':
74 |         config.decay_step = int(config.decay_step)
75 |     if config.mode == 'train':
76 |         train.train(config)
77 |     elif config.mode == 'prepro':
78 |         preprocess.preprocess(config)
79 |     elif config.mode == 'test':
80 |         print(config.test_model_dir)
81 |         train.predict(config)
82 |     else:
83 |         print('unknown mode, only support train now')
84 |         raise Exception
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     tf.app.run()
89 | 


--------------------------------------------------------------------------------
/models/dl/df/generate_dataset.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'jmh081701'
  2 | from flowcontainer.extractor import extract
  3 | import os
  4 | import json
  5 | import requests
  6 | import tqdm
  7 | import threading
  8 | def payload2packet_length(payload):
  9 |     rst = []
 10 |     i = 0
 11 |     while i < len(payload):
 12 |         rst.append(int(payload[i:i+2],base=16))
 13 |         i+= 2
 14 |     return rst
 15 | 
 16 | def request_label(packet):
 17 |     url = 'http://172.31.251.82:8899/datacon'
 18 |     post = {"packet_length": packet}
 19 |     response = requests.post(url=url,json=post)
 20 |     #print(response.json())
 21 |     return response.json()['label']
 22 | def traversal_training(dir):
 23 |     dataset= {}
 24 |     for _root, _dirs, _files in os.walk(dir):
 25 |         if len(_files)==0 :
 26 |             continue
 27 |         for file in tqdm.tqdm(_files):
 28 |             if  '.pcap' not  in file:
 29 |                 continue
 30 |             label = file.split('_')[0]
 31 | 
 32 |             if label not in dataset:
 33 |                 dataset[label] = []
 34 |             path = _root + '/' + file
 35 |             flows = extract(path,extension=['tcp.payload','udp.payload'], filter='tcp or udp')
 36 |             for each in tqdm.tqdm(flows, desc=file):
 37 |                 flow = flows[each]
 38 |                 if 'tcp.payload' in flow.extension:
 39 |                     payloads = flow.extension['tcp.payload']
 40 |                 else:
 41 |                     payloads = flow.extension['udp.payload']
 42 |                 for payload, index in payloads:
 43 |                     pkt_size= payload2packet_length(payload)
 44 |                     dataset[label].append({
 45 |                         "packet_length": pkt_size
 46 |                     })
 47 | 
 48 |     for label in dataset:
 49 |         with open('datacon/'+label + '.json', 'w') as fp:
 50 |             json.dump(dataset[label],fp)
 51 |             print('dump ', label)
 52 | 
 53 | def traversal_test(dir):
 54 |     log_file = 'test.log'
 55 |     rst_file = 'result.txt'
 56 |     for _root, _dirs, _files in os.walk(dir):
 57 |         if len(_files)==0 :
 58 |             continue
 59 |         for file in tqdm.tqdm(_files):
 60 |             if  '.pcap' not  in file:
 61 |                 continue
 62 | 
 63 |             path = _root + '/' + file
 64 |             flows = extract(path,extension=['tcp.payload','udp.payload'], filter='tcp or udp')
 65 |             max_counter = 4096
 66 |             counter = 0
 67 |             label_counter = {}
 68 |             for each in tqdm.tqdm(flows, desc=file):
 69 |                 flow = flows[each]
 70 |                 if 'tcp.payload' in flow.extension:
 71 |                     payloads = flow.extension['tcp.payload']
 72 |                 else:
 73 |                     payloads = flow.extension['udp.payload']
 74 | 
 75 |                 packet_length = []
 76 |                 for payload, index in payloads[:256]:
 77 |                     ##一个batch,一个batch的测试
 78 |                     pkt_size= payload2packet_length(payload)
 79 | 
 80 |                     if counter < max_counter or len(packet_length) == 0:
 81 |                         packet_length.append(pkt_size)
 82 |                         counter += 1
 83 | 
 84 |                 _labels = request_label(packet=packet_length)
 85 |                 for label in _labels:
 86 |                     if label  not  in label_counter:
 87 |                         label_counter[label] = 0
 88 |                     label_counter[label] += 1
 89 | 
 90 |             label_counter = list(label_counter.items())
 91 |             label_counter= sorted(label_counter, key= lambda  x: x[1])
 92 |             print('file: {0}, label counter: {1}, vote:{2}\n'.format(file, label_counter, label_counter[-1][0]))
 93 | 
 94 |             with open(log_file, 'a') as fp:
 95 |                 fp.writelines('file: {0}, label counter: {1}, vote:{2}\n'.format(file, label_counter, label_counter[-1][0]))
 96 | 
 97 |             with open(rst_file,'a') as fp:
 98 |                 fp.writelines('{0} {1}\n'.format(file, label_counter[-1][0]))
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     #traversal_training(r'G:\chromeDownload\datacon2021_traffic_eta_part1\part1\sample')
103 |     traversal_test(dir=r'G:\chromeDownload\datacon2021_traffic_eta_part1\part1\real_data')
104 | 


--------------------------------------------------------------------------------
/models/dl/df_only_D/generate_dataset.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'jmh081701'
  2 | from flowcontainer.extractor import extract
  3 | import os
  4 | import json
  5 | import requests
  6 | import tqdm
  7 | import threading
  8 | def payload2packet_length(payload):
  9 |     rst = []
 10 |     i = 0
 11 |     while i < len(payload):
 12 |         rst.append(int(payload[i:i+2],base=16))
 13 |         i+= 2
 14 |     return rst
 15 | 
 16 | def request_label(packet):
 17 |     url = 'http://172.31.251.82:8899/datacon'
 18 |     post = {"packet_length": packet}
 19 |     response = requests.post(url=url,json=post)
 20 |     #print(response.json())
 21 |     return response.json()['label']
 22 | def traversal_training(dir):
 23 |     dataset= {}
 24 |     for _root, _dirs, _files in os.walk(dir):
 25 |         if len(_files)==0 :
 26 |             continue
 27 |         for file in tqdm.tqdm(_files):
 28 |             if  '.pcap' not  in file:
 29 |                 continue
 30 |             label = file.split('_')[0]
 31 | 
 32 |             if label not in dataset:
 33 |                 dataset[label] = []
 34 |             path = _root + '/' + file
 35 |             flows = extract(path,extension=['tcp.payload','udp.payload'], filter='tcp or udp')
 36 |             for each in tqdm.tqdm(flows, desc=file):
 37 |                 flow = flows[each]
 38 |                 if 'tcp.payload' in flow.extension:
 39 |                     payloads = flow.extension['tcp.payload']
 40 |                 else:
 41 |                     payloads = flow.extension['udp.payload']
 42 |                 for payload, index in payloads:
 43 |                     pkt_size= payload2packet_length(payload)
 44 |                     dataset[label].append({
 45 |                         "packet_length": pkt_size
 46 |                     })
 47 | 
 48 |     for label in dataset:
 49 |         with open('datacon/'+label + '.json', 'w') as fp:
 50 |             json.dump(dataset[label],fp)
 51 |             print('dump ', label)
 52 | 
 53 | def traversal_test(dir):
 54 |     log_file = 'test.log'
 55 |     rst_file = 'result.txt'
 56 |     for _root, _dirs, _files in os.walk(dir):
 57 |         if len(_files)==0 :
 58 |             continue
 59 |         for file in tqdm.tqdm(_files):
 60 |             if  '.pcap' not  in file:
 61 |                 continue
 62 | 
 63 |             path = _root + '/' + file
 64 |             flows = extract(path,extension=['tcp.payload','udp.payload'], filter='tcp or udp')
 65 |             max_counter = 4096
 66 |             counter = 0
 67 |             label_counter = {}
 68 |             for each in tqdm.tqdm(flows, desc=file):
 69 |                 flow = flows[each]
 70 |                 if 'tcp.payload' in flow.extension:
 71 |                     payloads = flow.extension['tcp.payload']
 72 |                 else:
 73 |                     payloads = flow.extension['udp.payload']
 74 | 
 75 |                 packet_length = []
 76 |                 for payload, index in payloads[:256]:
 77 |                     ##一个batch,一个batch的测试
 78 |                     pkt_size= payload2packet_length(payload)
 79 | 
 80 |                     if counter < max_counter or len(packet_length) == 0:
 81 |                         packet_length.append(pkt_size)
 82 |                         counter += 1
 83 | 
 84 |                 _labels = request_label(packet=packet_length)
 85 |                 for label in _labels:
 86 |                     if label  not  in label_counter:
 87 |                         label_counter[label] = 0
 88 |                     label_counter[label] += 1
 89 | 
 90 |             label_counter = list(label_counter.items())
 91 |             label_counter= sorted(label_counter, key= lambda  x: x[1])
 92 |             print('file: {0}, label counter: {1}, vote:{2}\n'.format(file, label_counter, label_counter[-1][0]))
 93 | 
 94 |             with open(log_file, 'a') as fp:
 95 |                 fp.writelines('file: {0}, label counter: {1}, vote:{2}\n'.format(file, label_counter, label_counter[-1][0]))
 96 | 
 97 |             with open(rst_file,'a') as fp:
 98 |                 fp.writelines('{0} {1}\n'.format(file, label_counter[-1][0]))
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     #traversal_training(r'G:\chromeDownload\datacon2021_traffic_eta_part1\part1\sample')
103 |     traversal_test(dir=r'G:\chromeDownload\datacon2021_traffic_eta_part1\part1\real_data')
104 | 


--------------------------------------------------------------------------------
/models/ml/cumul/util.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'jmh081701'
  2 | #特征提取
  3 | ############
  4 | import numpy as np
  5 | import  os
  6 | import  sys
  7 | import copy
  8 | from src.df.src.utility import LoadDataWakieTalkie_Single_DataSet
  9 | class CUMUL_datagenerator:
 10 | 
 11 |     def __init__(self,
 12 |                  feature_length=100,min=-2305,max=2305,
 13 |                  equidistance=None,cell_size=512,is_train=False):
 14 |         self.feature_length = feature_length        #cumul模型的输入向量的长度,默认是100
 15 |         self.cell_size = cell_size                  #Tor的cell的大小
 16 |         self.equidistance = None                    #采样的间距
 17 | 
 18 |         #标准化的参数
 19 |         self.min = min
 20 |         self.max = max
 21 |         ##############训练模型使用的数据
 22 | 
 23 |         self.is_train = is_train
 24 | 
 25 |         self.train_X = None
 26 |         self.train_y = None
 27 | 
 28 |         self.valid_X = None
 29 |         self.valid_y = None
 30 | 
 31 |         self.test_X = None
 32 |         self.test_y = None
 33 | 
 34 |         if is_train:
 35 |             self.load_tor_cell_sequence()
 36 | 
 37 |     def feature_extract(self,trace_sequence,cell_size=None):
 38 |         """feature_extract() : 从[-1,1,1...]的cell的方向序列中，生成CUMUL模型所需的特征向量
 39 | 
 40 |         :param trace_sequence: `numpy.narray` ,形状：batch_size * trace_length
 41 |                             输入的[-1,1,1...]向量,-1表示ingoing的cell,+1表示outgoing的流
 42 |         :param cell_size:   每个cell的大小,默认None,因为最后还得归一化
 43 |         :return:
 44 |         """
 45 |         if cell_size == None:
 46 |             cell_size = 1
 47 |         if not isinstance(type(trace_sequence),np.ndarray):
 48 |             trace_sequence = np.array(trace_sequence)
 49 |         shape = trace_sequence.shape
 50 |         culmulative_sum_a = np.zeros(shape=shape,dtype = np.float)
 51 |         culmulative_sum_c = np.zeros(shape=shape,dtype = np.float)
 52 |         xp = np.linspace(0,shape[1]-1,shape[1])
 53 |         features  = np.zeros(shape=(shape[0],2*self.feature_length),dtype = np.float)
 54 |         #计算累计和
 55 |         for i in range(0,shape[0]):
 56 |             for j in range(1,shape[1]):
 57 |                 culmulative_sum_a[i,j] += culmulative_sum_a[i,j-1] + abs(trace_sequence[i,j])
 58 |                 culmulative_sum_c[i,j] += culmulative_sum_c[i,j-1] + trace_sequence[i,j]
 59 |         #加上cell_size
 60 |         culmulative_sum_a = cell_size * culmulative_sum_a
 61 |         culmulative_sum_c = cell_size * culmulative_sum_c
 62 | 
 63 |         #线性采样n个特征
 64 |         if self.equidistance != None:
 65 |             equidistance = self.equidistance
 66 |         else:
 67 |             equidistance = (shape[1]-1)/self.feature_length
 68 |         xval = np.arange(0,equidistance * self.feature_length,equidistance)
 69 |         for i in range(shape[0]):
 70 |             #print(i,culmulative_sum_a[i])
 71 |             #print(i,culmulative_sum_c[i])
 72 |             a_interp = (np.interp(xval,xp,culmulative_sum_a[i])-self.min)/(self.max-self.min)
 73 |             c_interp = (np.interp(xval,xp,culmulative_sum_c[i])-self.min)/(self.max-self.min)
 74 | 
 75 |             features[i,0:2*self.feature_length:2]=copy.deepcopy(a_interp)
 76 |             features[i,1:2*self.feature_length:2]=copy.deepcopy(c_interp)
 77 |             #print(i,features[i])
 78 |             #print('#'*30)
 79 |         return  features
 80 | 
 81 |     def load_tor_cell_sequence(self):
 82 |         if not self.is_train :
 83 |             return
 84 | 
 85 |         _,__,self.train_X,self.train_y = LoadDataWakieTalkie_Single_DataSet('train',is_cluster=False,normalized=False)
 86 |         _,__,self.valid_X,self.valid_y = LoadDataWakieTalkie_Single_DataSet('valid',is_cluster=False,normalized=False)
 87 |         _,__,self.test_X,self.test_y = LoadDataWakieTalkie_Single_DataSet('test',is_cluster=False,normalized=False)
 88 | 
 89 |         self.train_X = self.feature_extract(self.train_X)
 90 |         print('feature extract....')
 91 |         self.valid_X =self.feature_extract(self.valid_X)
 92 |         self.test_X = self.feature_extract(self.test_X)
 93 | 
 94 |         print('Load tor cell sequence dataset well.')
 95 |         print('X train shape:',self.train_X.shape)
 96 |         print('y train shape:',self.train_y.shape)
 97 |         print('X valid shape:',self.valid_X.shape)
 98 |         print('y valid shape:',self.valid_y.shape)
 99 |         print('X test shape:',self.test_X.shape)
100 |         print('y test shape:',self.test_y.shape)
101 | 
102 |     def trainSet(self):
103 |         return self.train_X,self.train_y
104 |     def validSet(self):
105 |         return self.valid_X,self.valid_y
106 |     def testSet(self):
107 |         return self.test_X,self.test_y
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     dator = CUMUL_datagenerator(is_train=True)
112 |     print(dator.test_X[1],dator.test_y[1])
113 | 


--------------------------------------------------------------------------------
/models/dl/graphDapp/train.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'dk'
  2 | import  numpy as np
  3 | from  models.dl.graphDapp import logger_wrappers
  4 | import torch as th
  5 | from torch import nn
  6 | from torch import optim
  7 | from torch.nn import functional as F
  8 | import tqdm
  9 | 
 10 | from sklearn.metrics import classification_report
 11 | from models.dl.graphDapp.model_seriealization import save,load
 12 | from models.dl.graphDapp.data_builder import Dataset_fgnet
 13 | from models.dl.graphDapp.DApp_Classifier import DApp_classifier
 14 | from models.dl.graphDapp.graphDapp_config import config
 15 | use_gpu = th.cuda.is_available()
 16 | if use_gpu :
 17 |     device_id = config['device_id']
 18 |     device = device_id
 19 | else:
 20 |     device= "cpu"
 21 | 
 22 | def main(dataset_name, modelpath, max_epoch=config['max_epoch']):
 23 |     data_loader = Dataset_fgnet(raw_dir='', dumpfile= dataset_name,renew=False)
 24 |     print(data_loader)
 25 |     model = DApp_classifier(nb_classes=len(data_loader.labelname),
 26 |                             gin_layer_num= config['gin_layer_num'],
 27 |                             gin_hidden_units=config['gin_hidden_units'],
 28 |                             iteration_nums=config['iteration_nums'],
 29 | 							iteration_first=True,
 30 |                             device= device,use_gpu= use_gpu)
 31 |     loss_func = nn.CrossEntropyLoss()
 32 |     optimizer = optim.Adam(params=model.parameters(),lr=config['learning_rate'])
 33 |     #model = load(model,optimizer=optimizer,checkpoint_path=modelpath)
 34 |     if use_gpu:
 35 |         model = model.cuda(device)
 36 |         loss_func = loss_func.cuda(device)
 37 | 
 38 |     #训练
 39 |     model.train()
 40 |     epoch_losses = []
 41 |     epoch_acces = []
 42 |     batch_size = config['batch_size']
 43 | 
 44 |     for epoch in tqdm.trange(max_epoch):
 45 |         epoch_loss = 0
 46 |         iter = 0
 47 |         while data_loader.epoch_num == epoch:
 48 |             graphs,labels= data_loader.next_train_batch(batch_size)
 49 |             if use_gpu :
 50 |                 graphs = graphs.to(th.device(device))
 51 |                 labels = labels.to(th.device(device))
 52 |             predict_label = model(graphs)
 53 |             #print(predict_label.size())
 54 |             #print(labels.size())
 55 |             loss = loss_func(predict_label,labels)
 56 |             optimizer.zero_grad()
 57 |             loss.backward()
 58 |             optimizer.step()
 59 |             if use_gpu:
 60 |                 lv= loss.detach().item()
 61 |             else:
 62 |                 lv = loss.detach().cpu().item()
 63 |             epoch_loss += lv
 64 |             iter +=1
 65 |             #print('Inner loss: {:.4f},Train Watch:{}'.format(lv,data_loader.train_watch))
 66 |             #epoch_losses.append(lv)
 67 |         epoch_loss /= (iter+0.0000001)
 68 |         info='Epoch {}, loss: {:.4f}'.format(epoch,epoch_loss)
 69 |         logger_wrappers.warning(info)
 70 |         epoch_losses.append(epoch_loss)
 71 |         #测试一下:
 72 |         graphs,labels = data_loader.next_valid_batch(batch_size=batch_size)
 73 |         if use_gpu :
 74 |             graphs = graphs.to(th.device(device))
 75 |             labels = labels.to(th.device(device))
 76 |         predict_labels = model(graphs)
 77 |         predict_labels = F.softmax(predict_labels,1)
 78 |         argmax_labels = th.argmax(predict_labels,1)
 79 |         #print('pred:', argmax_labels)
 80 |         #print('real:', labels)
 81 |         acc = (labels == argmax_labels).float().sum().item() / len(labels) * 100
 82 |         info='Accuracy of argmax predictions on the valid set: {:4f}%'.format(
 83 |             acc)
 84 |         epoch_acces.append(acc)
 85 |         logger_wrappers.info(info)
 86 |         ###保存一下模型
 87 |         save(model,optimizer,checkpoint_path=modelpath)
 88 |     model.eval()
 89 |     acc_list =[]
 90 |     y_pred=  []
 91 |     y_ture = []
 92 | 
 93 |     for subset in range(len(data_loader.test_set)//batch_size):
 94 |         graphs,labels = data_loader.next_test_batch(batch_size=batch_size)
 95 |         if use_gpu :
 96 |             graphs = graphs.to(th.device(device))
 97 |             labels = labels.to(th.device(device))
 98 |         predict_labels = model(graphs)
 99 |         predict_labels = F.softmax(predict_labels,1)
100 |         argmax_labels = th.argmax(predict_labels,1)
101 |         y_pred += argmax_labels.tolist()
102 |         y_ture += labels.tolist()
103 |         acc = (labels == argmax_labels).float().sum().item() / len(labels) * 100
104 |         acc_list.append(acc)
105 |         info='Accuracy of argmax predictions on the test subset{1}: {0:4f}%'.format(acc,subset)
106 |         logger_wrappers.info(info)
107 |     info = 'Average Accuracy on entire test set:{:0.4f}%'.format(np.mean(acc_list))
108 |     logger_wrappers.info(info)
109 |     print(classification_report(y_pred=y_pred,y_true=y_ture, digits=5))
110 | 


--------------------------------------------------------------------------------
/models/ml/bind/README.md:
--------------------------------------------------------------------------------
 1 | # 说明
 2 | 本文件夹是对BIND论文里面提到的方法进行复现.
 3 | 论文引用bibtex:
 4 | ```
 5 | @inproceedings{al2016adaptive,
 6 | title={Adaptive encrypted traffic fingerprinting with bi-directional dependence},
 7 | author={Al-Naami, Khaled and Chandra, Swarup and Mustafa, Ahmad and Khan, Latifur and Lin, Zhiqiang and Hamlen, Kevin and Thuraisingham, Bhavani},
 8 | booktitle={Proceedings of the 32nd Annual Conference on Computer Security Applications},
 9 | pages={177--188},
10 | year={2016}
11 | }
12 | ```
13 | # 数据集样式
14 | 向量化前,每条流具有的特征:
15 | ```
16 | BIND_Feature_Raw={
17 |     'Dn-Up-Burst-Size':{(x,y):counter},#先Down再Up的burst size依次为x,y的次数
18 |     'Dn-Up-Burst-Time':{(x,y):counter},#先Down再Up的burst 持续时间依次为x,y的次数,(只保留2位有效数字)
19 |     'Up-Dn-Burst-Size':{(x,y):counter},#先Up再Down的burst size依次为x,y的次数
20 |     'Up-Dn-Burst-Time':{(x,y):counter},#先Up再Down的burst 持续时间依次为x,y的次数,(只保留2位有效数字)
21 |     'Uni-Burst-Size':{x:counter},#单个burst size的出现次数的统计
22 |     'Uni-Burst-Time':{x:counter},#单个burst size持续时间的统计
23 |     'Pkt-Size':{x:counter},#包长的出现次数的统计
24 | }
25 | ```
26 | 样例:
27 | ```
28 | {'Dn-Up-size': {(1425, 176): 1, (2317, 184): 1, (2988, 191): 1, (2014, 197): 1, (1862, 173): 1, (7616, 191): 1, (4685, 192): 1, (2832, 185): 1, (2301, 197): 1, (5750, 180): 1, (2565, 200): 1, (3803, 178): 1, (3816, 181): 1, (2485, 181): 2, (2178, 200): 1, (2578, 200): 1, (2571, 199): 1, (2676, 186): 1, (2335, 185): 1, (4167, 200): 1, (5062, 200): 1, (5062, 167): 1, (2499, 167): 1, (2499, 182): 1, (2729, 200): 1, (2619, 192): 1, (4019, 211): 1, (2612, 211): 1, (2612, 210): 1, (3812, 194): 1, (3010, 194): 1, (3004, 169): 1, (2854, 177): 1, (3442, 177): 1, (3449, 178): 1, (2261, 183): 1, (1343, 183): 1, (1343, 169): 1}, 'Dn-Up-time': {(0.02, 1.03): 1, (0.08, 0.22): 1, (0.02, 0.3): 1, (0.05, 0.81): 1, (0.05, -0.62): 1, (0.01, 0.11): 2, (0.02, 0.0): 1, (0.07, 0.02): 1, (0.03, 0.76): 1, (0.0, 0.07): 1, (0.03, 0.12): 1, (0.08, 0.1): 1, (0.0, 0.04): 1, (0.01, 0.06): 1, (0.02, 0.69): 1, (0.0, -0.51): 1, (0.03, 0.05): 1, (0.04, 0.01): 1, (0.0, 0.11): 1, (0.04, 0.13): 1, (0.0, 0.0): 1, (0.11, 0.0): 2, (0.0, 0.09): 1, (0.01, 0.01): 2, (0.04, 0.09): 1, (0.3, 0.27): 1, (-0.42, 0.0): 1, (0.11, 0.01): 1, (0.1, 0.0): 1, (0.03, 0.06): 1, (0.0, 0.05): 1, (0.09, 0.0): 1, (0.0, 0.1): 1, (0.01, 0.59): 1, (0.36, -0.68): 1, (0.01, 1.32): 1}, 'Up-Dn-size': {(167, 1425): 1, (176, 2317): 1, (184, 2988): 1, (191, 2014): 1, (197, 1862): 1, (173, 7616): 1, (191, 4685): 1, (192, 2832): 1, (185, 2301): 1, (197, 5750): 1, (180, 2565): 1, (200, 3803): 1, (178, 3816): 1, (181, 2485): 2, (181, 2178): 1, (200, 2578): 1, (200, 2571): 1, (199, 2676): 1, (186, 2335): 1, (185, 4167): 1, (200, 5062): 2, (167, 2499): 2, (182, 2729): 1, (200, 2619): 1, (192, 4019): 1, (211, 2612): 2, (210, 3812): 1, (194, 3010): 1, (194, 3004): 1, (169, 2854): 1, (177, 3442): 1, (177, 3449): 1, (178, 2261): 1, (183, 1343): 2, (169, 2570): 1}, 'Up-Dn-time': {(0.0, 0.02): 1, (1.03, 0.08): 1, (0.22, 0.02): 1, (0.3, 0.05): 1, (0.81, 0.05): 1, (-0.62, 0.01): 1, (0.11, 0.01): 1, (0.11, 0.02): 1, (0.0, 0.07): 1, (0.02, 0.03): 1, (0.76, 0.0): 1, (0.07, 0.03): 1, (0.12, 0.08): 1, (0.1, 0.0): 1, (0.04, 0.01): 1, (0.06, 0.02): 1, (0.69, 0.0): 1, (-0.51, 0.03): 1, (0.05, 0.04): 1, (0.01, 0.0): 1, (0.11, 0.04): 1, (0.13, 0.0): 1, (0.0, 0.11): 2, (0.0, 0.0): 2, (0.09, 0.01): 1, (0.01, 0.04): 1, (0.09, 0.3): 1, (0.27, -0.42): 1, (0.0, 0.01): 1, (0.01, 0.11): 1, (0.01, 0.1): 1, (0.0, 0.03): 1, (0.06, 0.0): 1, (0.05, 0.09): 1, (0.1, 0.01): 1, (0.59, 0.36): 1, (-0.68, 0.01): 1, (1.32, 0.02): 1}, 'Uni-size': {167: 3, 1425: 1, 176: 1, 2317: 1, 184: 1, 2988: 1, 191: 2, 2014: 1, 197: 2, 1862: 1, 173: 1, 7616: 1, 4685: 1, 192: 2, 2832: 1, 185: 2, 2301: 1, 5750: 1, 180: 1, 2565: 1, 200: 6, 3803: 1, 178: 2, 3816: 1, 181: 3, 2485: 2, 2178: 1, 2578: 1, 2571: 1, 199: 1, 2676: 1, 186: 1, 2335: 1, 4167: 1, 5062: 2, 2499: 2, 182: 1, 2729: 1, 2619: 1, 4019: 1, 211: 2, 2612: 2, 210: 1, 3812: 1, 194: 2, 3010: 1, 3004: 1, 169: 2, 2854: 1, 177: 2, 3442: 1, 3449: 1, 2261: 1, 183: 2, 1343: 2, 2570: 1}, 'Uni-time': {0.0: 16, 0.02: 6, 1.03: 1, 0.08: 2, 0.22: 1, 0.3: 2, 0.05: 4, 0.81: 1, -0.62: 1, 0.01: 11, 0.11: 6, 0.07: 2, 0.03: 4, 0.76: 1, 0.12: 1, 0.1: 3, 0.04: 4, 0.06: 2, 0.69: 1, -0.51: 1, 0.13: 1, 0.09: 3, 0.27: 1, -0.42: 1, 0.59: 1, 0.36: 1, -0.68: 1, 1.32: 1}, 'Pkt-size': {167: 3, 1425: 1, 176: 1, 1448: 56, 4: 18, 8: 2, 857: 1, 184: 1, 9: 15, 78: 1, 5: 15, 191: 2, 566: 1, 197: 2, 414: 1, 173: 1, 14: 1, 1265: 1, 545: 1, 333: 1, 192: 2, 1375: 1, 185: 2, 600: 8, 253: 1, 206: 1, 180: 1, 1103: 1, 200: 6, 889: 1, 178: 2, 320: 1, 181: 3, 1023: 1, 1033: 1, 721: 1, 1121: 1, 1114: 1, 199: 1, 1219: 1, 186: 1, 878: 1, 1258: 1, 16: 1, 689: 1, 702: 1, 1042: 1, 1037: 1, 182: 1, 1267: 1, 1157: 1, 519: 1, 211: 2, 1150: 2, 210: 1, 861: 1, 903: 1, 194: 2, 17: 2, 93: 1, 956: 1, 169: 2, 806: 1, 177: 2, 542: 1, 532: 1, 799: 1, 183: 2, 1343: 2, 1113: 1}}
29 | 
30 | ```
31 | 向量化的过程:
32 | - 1. 获取全局的字典：
33 | 线性遍历每条流的BIND-Feature-Raw,获取7类特征的字典,字典反映了各类特征可能出现那些取值。
34 | 
35 | 如果取值太大了,可能安装频率的高低只保留频率TOPN的一些取值。
36 | 
37 | - 2. 向量化
38 | 根据全局的特征字典,只保留每条流里面存在于全局特征字典key里面的key.
39 | 例如，假设全局化字典里面的'Dn-Up-Burst-Size'特征，一共保留了512个key.
40 | 那么每条流的'Dn-Up-Burst-Size'特征就是一个512维的向量,其中第i个向量的取值表示第i个key在这条流出现的次数。
41 | 为了防止流的长度为特征取值的影响,这512维特征会除以这条流Dn-Up-Burst-Size里面所有value的总和。
42 | 
43 | 最后把7类特征拼接起来，得到最终的向量特征。
44 | 


--------------------------------------------------------------------------------
/models/dl/df/df_main_model.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'dk'
  2 | from models.dl.attacks import DF_model, parser_raw_data
  3 | from models.dl.df import df_model_config
  4 | from models.model_base import abs_model
  5 | import os
  6 | from config import raw_dataset_base
  7 | from keras.utils import np_utils
  8 | import numpy as np
  9 | os.environ['CUDA_VISBALE_DEIVCES'] ='cuda:2'
 10 | class model(abs_model):
 11 |     def __init__(self, dataset, randseed, splitrate):
 12 |         super(model,self).__init__('df',randseed= randseed)
 13 |         if os.path.exists(self.database) == False:
 14 |             os.makedirs(self.database,exist_ok=True)
 15 | 
 16 |         self.dataset = dataset
 17 |         self.model = self.database + '/'+ self.name + '_' + dataset + '_model'
 18 |         self.data = self.database + '/'+ self.name + '_' + dataset + '/'
 19 |         self.splitrate = splitrate
 20 |         #原始数据集目录
 21 |         full_rdata = raw_dataset_base + self.dataset
 22 |         self.full_rdata = full_rdata
 23 | 
 24 |         if self.data_exists() == False:
 25 |             self.parser_raw_data()
 26 | 
 27 |         self.df_model = None
 28 |     def parser_raw_data(self):
 29 |         full_rdata = self.full_rdata
 30 |         if os.path.exists(full_rdata) == False:
 31 |             raise OSError('Dataset {0} (full path: {1}) does not exist!'.format(self.dataset,full_rdata))
 32 |         os.makedirs(self.data, exist_ok=True)
 33 |         ##从原始数据集构建DF所需的数据集
 34 |         X_train,y_train, X_valid, y_valid, X_test, y_test = parser_raw_data(self, self.full_rdata, max_len = df_model_config.learning_params_template['in_dim'])
 35 | 
 36 |         self.save_data(X_train,y_train, X_valid, y_valid, X_test, y_test)
 37 | 
 38 | 
 39 |     def train(self):
 40 |         X_train,y_train, X_valid, y_valid, X_test, y_test = self.load_data()
 41 |         num_class = self.num_classes()
 42 |         df_model_config.nb_classes_template = num_class
 43 | 
 44 |         y_train = np_utils.to_categorical(y_train, num_classes=num_class)
 45 |         y_valid = np_utils.to_categorical(y_valid, num_classes=num_class)
 46 |         y_test = np_utils.to_categorical(y_test, num_classes= self.num_classes())
 47 | 
 48 |         X_train = X_train[:, :,np.newaxis]
 49 |         X_valid = X_valid[:, :,np.newaxis]
 50 |         X_test  = X_test[:, :,np.newaxis]
 51 | 
 52 |         df_model = DF_model(num_class = num_class)
 53 |         df_model.build_model()
 54 | 
 55 |         df_model.fit(X_train=X_train,y_train=y_train,
 56 |                      X_valid= X_valid, y_valid = y_valid,
 57 |                      batch_size= df_model_config.learning_params_template['batch_size'],
 58 |                      epochs=df_model_config.learning_params_template['epoch'])
 59 | 
 60 |         df_model.save_model(path=self.model)
 61 |         score = df_model.evaluate(X_test=X_test, y_test = y_test)
 62 |         print('[Deep Fingerprinting Test on {0} accuracy {1}'.format(self.dataset, score))
 63 |     def test(self):
 64 |         X_train,y_train, X_valid, y_valid, X_test, y_test = self.load_data()
 65 |         y_test = np_utils.to_categorical(y_test, num_classes= self.num_classes())
 66 |         X_test  = X_test[:, :,np.newaxis]
 67 | 
 68 |         df_model = DF_model(num_class= self.num_classes())
 69 |         df_model.load_model(self.model)
 70 |         score = df_model.evaluate(X_test=X_test,y_test=y_test)
 71 |         print('Deep Fingerprinting Test on {0} accuracy :{1}'.format(self.dataset,score))
 72 | 
 73 |     def predict(self,pkt_size):
 74 |         def pad_sequence(x, max_len, pad_value=0):
 75 |             r =  x + [pad_value] * (max_len - len(x))
 76 |             return r[:max_len]
 77 | 
 78 |         if self.df_model == None:
 79 |             self.df_model = DF_model(num_class= self.num_classes())
 80 |             self.df_model.load_model(self.model)
 81 | 
 82 |         x = [pad_sequence(_pkt_size, max_len= df_model_config.learning_params_template['in_dim']) for _pkt_size in pkt_size]
 83 |         x = np.array(x)[:, :,np.newaxis]
 84 |         y_logit = self.df_model.predict(x, actual_lable=True)
 85 |         return y_logit.tolist()
 86 |     def get_feature(self):
 87 |         X_train,y_train, X_valid, y_valid, X_test, y_test = self.load_data()
 88 |         #y_test = np_utils.to_categorical(y_test, num_classes= self.num_classes())
 89 |         X_test  = X_test[:5000]
 90 |         X_test  = X_test[:, :,np.newaxis]
 91 | 
 92 |         df_model = DF_model(num_class= self.num_classes())
 93 |         df_model.load_model(self.model)
 94 |         logit, feature = df_model.predict(X_test=X_test,actual_lable=False, return_feature=True)
 95 |         print(feature.shape, logit.shape)
 96 |         logit = logit.tolist()
 97 |         feature = feature.tolist()
 98 |         #feature = logit
 99 |         y_true = y_test[:5000].tolist()
100 |         feature_set = {}
101 |         feature_vector = []
102 |         for i in range(len(y_true)):
103 |            if y_true[i] not in feature_set:
104 |               feature_set[y_true[i]] = []
105 |            feature_set[y_true[i]].append([feature[i]])
106 |         import pickle
107 |         with open('feature_set_D1_53_DF.pkl','wb') as fp:
108 |             pickle.dump(feature_set, fp)
109 |         print(y_true[-1],logit[-1])
110 |         print(feature[-1])		
111 | if __name__ == '__main__':
112 |   for test_rate in [0.1]:
113 |     print(test_rate)
114 |     dataset='app150'
115 |     df_model = model(dataset, randseed= 128, splitrate=test_rate)
116 |     #df_model.parser_raw_data()
117 |     df_model.train()
118 |     df_model.test()
119 |     print(dataset)
120 |     print(test_rate)
121 |     #import os
122 |     #os.remove(df_model.model)
123 |     #df_model.get_feature()
124 |     break
125 | 


--------------------------------------------------------------------------------
/models/dl/df/df_model.py:
--------------------------------------------------------------------------------
  1 | # DF model,
  2 | # This code is to implement deep fingerprinting model for website fingerprinting attacks
  3 | # ACM Reference Formant
  4 | # Payap Sirinam, Mohsen Imani, Marc Juarez, and Matthew Wright. 2018.
  5 | # Deep Fingerprinting: Undermining Website Fingerprinting Defenses with Deep Learning.
  6 | # In 2018 ACM SIGSAC Conference on Computer and Communications Security (CCS ’18),
  7 | # October 15–19, 2018, Toronto, ON, Canada. ACM, New York, NY, USA, 16 pages.
  8 | # https://doi.org/10.1145/3243734.3243768
  9 | from keras.models import Sequential
 10 | from keras.layers import Conv1D, MaxPooling1D, BatchNormalization
 11 | from keras.layers.core import Activation, Flatten, Dense, Dropout
 12 | from keras.layers.advanced_activations import ELU
 13 | from keras.initializers import glorot_uniform
 14 | 
 15 | from .df_model_config import learning_params_template,nb_classes_template
 16 | from keras.optimizers import Adamax
 17 | def build_model(input_shape=(learning_params_template['in_dim'],1), classes=nb_classes_template):
 18 |     model = Sequential()
 19 |     #Block1
 20 |     filter_num = ['None',32,64,128,256]
 21 |     kernel_size = ['None',8,8,8,8]
 22 |     conv_stride_size = ['None',1,1,1,1]
 23 |     pool_stride_size = ['None',4,4,4,4]
 24 |     pool_size = ['None',8,8,8,8]
 25 | 
 26 |     model.add(Conv1D(filters=filter_num[1], kernel_size=kernel_size[1], input_shape=input_shape,
 27 |                      strides=conv_stride_size[1], padding='same',
 28 |                      name='block1_conv1'))
 29 |     model.add(BatchNormalization(axis=-1))
 30 |     model.add(ELU(alpha=1.0, name='block1_adv_act1'))
 31 |     model.add(Conv1D(filters=filter_num[1], kernel_size=kernel_size[1],
 32 |                      strides=conv_stride_size[1], padding='same',
 33 |                      name='block1_conv2'))
 34 |     model.add(BatchNormalization(axis=-1))
 35 |     model.add(ELU(alpha=1.0, name='block1_adv_act2'))
 36 |     model.add(MaxPooling1D(pool_size=pool_size[1], strides=pool_stride_size[1],
 37 |                            padding='same', name='block1_pool'))
 38 |     model.add(Dropout(0.1, name='block1_dropout'))
 39 | 
 40 |     model.add(Conv1D(filters=filter_num[2], kernel_size=kernel_size[2],
 41 |                      strides=conv_stride_size[2], padding='same',
 42 |                      name='block2_conv1'))
 43 |     model.add(BatchNormalization())
 44 |     model.add(Activation('relu', name='block2_act1'))
 45 | 
 46 |     model.add(Conv1D(filters=filter_num[2], kernel_size=kernel_size[2],
 47 |                      strides=conv_stride_size[2], padding='same',
 48 |                      name='block2_conv2'))
 49 |     model.add(BatchNormalization())
 50 |     model.add(Activation('relu', name='block2_act2'))
 51 |     model.add(MaxPooling1D(pool_size=pool_size[2], strides=pool_stride_size[3],
 52 |                            padding='same', name='block2_pool'))
 53 |     model.add(Dropout(0.1, name='block2_dropout'))
 54 | 
 55 |     model.add(Conv1D(filters=filter_num[3], kernel_size=kernel_size[3],
 56 |                      strides=conv_stride_size[3], padding='same',
 57 |                      name='block3_conv1'))
 58 |     model.add(BatchNormalization())
 59 |     model.add(Activation('relu', name='block3_act1'))
 60 |     model.add(Conv1D(filters=filter_num[3], kernel_size=kernel_size[3],
 61 |                      strides=conv_stride_size[3], padding='same',
 62 |                      name='block3_conv2'))
 63 |     model.add(BatchNormalization())
 64 |     model.add(Activation('relu', name='block3_act2'))
 65 |     model.add(MaxPooling1D(pool_size=pool_size[3], strides=pool_stride_size[3],
 66 |                            padding='same', name='block3_pool'))
 67 |     model.add(Dropout(0.1, name='block3_dropout'))
 68 | 
 69 |     model.add(Conv1D(filters=filter_num[4], kernel_size=kernel_size[4],
 70 |                      strides=conv_stride_size[4], padding='same',
 71 |                      name='block4_conv1'))
 72 |     model.add(BatchNormalization())
 73 |     model.add(Activation('relu', name='block4_act1'))
 74 |     model.add(Conv1D(filters=filter_num[4], kernel_size=kernel_size[4],
 75 |                      strides=conv_stride_size[4], padding='same',
 76 |                      name='block4_conv2'))
 77 |     model.add(BatchNormalization())
 78 |     model.add(Activation('relu', name='block4_act2'))
 79 |     model.add(MaxPooling1D(pool_size=pool_size[4], strides=pool_stride_size[4],
 80 |                            padding='same', name='block4_pool'))
 81 |     model.add(Dropout(0.1, name='block4_dropout'))
 82 | 
 83 |     model.add(Flatten(name='flatten'))
 84 |     model.add(Dense(512, kernel_initializer=glorot_uniform(seed=0), name='fc1'))
 85 |     #model.add(BatchNormalization())
 86 |     model.add(Activation('relu', name='fc1_act'))
 87 | 
 88 |     model.add(Dropout(0.7, name='fc1_dropout'))
 89 | 
 90 |     model.add(Dense(512, kernel_initializer=glorot_uniform(seed=0), name='fc2'))
 91 |     #model.add(BatchNormalization())
 92 |     model.add(Activation('relu', name='fc2_act'))
 93 | 
 94 |     model.add(Dropout(0.5, name='fc2_dropout'))
 95 | 
 96 |     model.add(Dense(classes, kernel_initializer=glorot_uniform(seed=0), name='fc3'))
 97 |     model.add(Activation('softmax', name="softmax"))
 98 | 
 99 | 
100 |     OPTIMIZER = Adamax(lr=learning_params_template['lr'],
101 |                       beta_1=learning_params_template['beta_1'],
102 |                       beta_2=learning_params_template['beta_2'],
103 |                       epsilon=learning_params_template['epsilon'],
104 |                       decay=learning_params_template['decay'])
105 |     model.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER,metrics=["accuracy"])
106 |     return model
107 | 


--------------------------------------------------------------------------------
/models/dl/df_only_D/df_model.py:
--------------------------------------------------------------------------------
  1 | # DF model,
  2 | # This code is to implement deep fingerprinting model for website fingerprinting attacks
  3 | # ACM Reference Formant
  4 | # Payap Sirinam, Mohsen Imani, Marc Juarez, and Matthew Wright. 2018.
  5 | # Deep Fingerprinting: Undermining Website Fingerprinting Defenses with Deep Learning.
  6 | # In 2018 ACM SIGSAC Conference on Computer and Communications Security (CCS ’18),
  7 | # October 15–19, 2018, Toronto, ON, Canada. ACM, New York, NY, USA, 16 pages.
  8 | # https://doi.org/10.1145/3243734.3243768
  9 | from keras.models import Sequential
 10 | from keras.layers import Conv1D, MaxPooling1D, BatchNormalization
 11 | from keras.layers.core import Activation, Flatten, Dense, Dropout
 12 | from keras.layers.advanced_activations import ELU
 13 | from keras.initializers import glorot_uniform
 14 | 
 15 | from .df_model_config import learning_params_template,nb_classes_template
 16 | from keras.optimizers import Adamax
 17 | def build_model(input_shape=(learning_params_template['in_dim'],1), classes=nb_classes_template):
 18 |     model = Sequential()
 19 |     #Block1
 20 |     filter_num = ['None',32,64,128,256]
 21 |     kernel_size = ['None',8,8,8,8]
 22 |     conv_stride_size = ['None',1,1,1,1]
 23 |     pool_stride_size = ['None',4,4,4,4]
 24 |     pool_size = ['None',8,8,8,8]
 25 | 
 26 |     model.add(Conv1D(filters=filter_num[1], kernel_size=kernel_size[1], input_shape=input_shape,
 27 |                      strides=conv_stride_size[1], padding='same',
 28 |                      name='block1_conv1'))
 29 |     model.add(BatchNormalization(axis=-1))
 30 |     model.add(ELU(alpha=1.0, name='block1_adv_act1'))
 31 |     model.add(Conv1D(filters=filter_num[1], kernel_size=kernel_size[1],
 32 |                      strides=conv_stride_size[1], padding='same',
 33 |                      name='block1_conv2'))
 34 |     model.add(BatchNormalization(axis=-1))
 35 |     model.add(ELU(alpha=1.0, name='block1_adv_act2'))
 36 |     model.add(MaxPooling1D(pool_size=pool_size[1], strides=pool_stride_size[1],
 37 |                            padding='same', name='block1_pool'))
 38 |     model.add(Dropout(0.1, name='block1_dropout'))
 39 | 
 40 |     model.add(Conv1D(filters=filter_num[2], kernel_size=kernel_size[2],
 41 |                      strides=conv_stride_size[2], padding='same',
 42 |                      name='block2_conv1'))
 43 |     model.add(BatchNormalization())
 44 |     model.add(Activation('relu', name='block2_act1'))
 45 | 
 46 |     model.add(Conv1D(filters=filter_num[2], kernel_size=kernel_size[2],
 47 |                      strides=conv_stride_size[2], padding='same',
 48 |                      name='block2_conv2'))
 49 |     model.add(BatchNormalization())
 50 |     model.add(Activation('relu', name='block2_act2'))
 51 |     model.add(MaxPooling1D(pool_size=pool_size[2], strides=pool_stride_size[3],
 52 |                            padding='same', name='block2_pool'))
 53 |     model.add(Dropout(0.1, name='block2_dropout'))
 54 | 
 55 |     model.add(Conv1D(filters=filter_num[3], kernel_size=kernel_size[3],
 56 |                      strides=conv_stride_size[3], padding='same',
 57 |                      name='block3_conv1'))
 58 |     model.add(BatchNormalization())
 59 |     model.add(Activation('relu', name='block3_act1'))
 60 |     model.add(Conv1D(filters=filter_num[3], kernel_size=kernel_size[3],
 61 |                      strides=conv_stride_size[3], padding='same',
 62 |                      name='block3_conv2'))
 63 |     model.add(BatchNormalization())
 64 |     model.add(Activation('relu', name='block3_act2'))
 65 |     model.add(MaxPooling1D(pool_size=pool_size[3], strides=pool_stride_size[3],
 66 |                            padding='same', name='block3_pool'))
 67 |     model.add(Dropout(0.1, name='block3_dropout'))
 68 | 
 69 |     model.add(Conv1D(filters=filter_num[4], kernel_size=kernel_size[4],
 70 |                      strides=conv_stride_size[4], padding='same',
 71 |                      name='block4_conv1'))
 72 |     model.add(BatchNormalization())
 73 |     model.add(Activation('relu', name='block4_act1'))
 74 |     model.add(Conv1D(filters=filter_num[4], kernel_size=kernel_size[4],
 75 |                      strides=conv_stride_size[4], padding='same',
 76 |                      name='block4_conv2'))
 77 |     model.add(BatchNormalization())
 78 |     model.add(Activation('relu', name='block4_act2'))
 79 |     model.add(MaxPooling1D(pool_size=pool_size[4], strides=pool_stride_size[4],
 80 |                            padding='same', name='block4_pool'))
 81 |     model.add(Dropout(0.1, name='block4_dropout'))
 82 | 
 83 |     model.add(Flatten(name='flatten'))
 84 |     model.add(Dense(512, kernel_initializer=glorot_uniform(seed=0), name='fc1'))
 85 |     #model.add(BatchNormalization())
 86 |     model.add(Activation('relu', name='fc1_act'))
 87 | 
 88 |     model.add(Dropout(0.7, name='fc1_dropout'))
 89 | 
 90 |     model.add(Dense(512, kernel_initializer=glorot_uniform(seed=0), name='fc2'))
 91 |     #model.add(BatchNormalization())
 92 |     model.add(Activation('relu', name='fc2_act'))
 93 | 
 94 |     model.add(Dropout(0.5, name='fc2_dropout'))
 95 | 
 96 |     model.add(Dense(classes, kernel_initializer=glorot_uniform(seed=0), name='fc3'))
 97 |     model.add(Activation('softmax', name="softmax"))
 98 | 
 99 | 
100 |     OPTIMIZER = Adamax(lr=learning_params_template['lr'],
101 |                       beta_1=learning_params_template['beta_1'],
102 |                       beta_2=learning_params_template['beta_2'],
103 |                       epsilon=learning_params_template['epsilon'],
104 |                       decay=learning_params_template['decay'])
105 |     model.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER,metrics=["accuracy"])
106 |     return model
107 | 


--------------------------------------------------------------------------------
/models/dl/sdae/sdae_model.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from keras.layers import Dense, Dropout
  3 | from keras.layers import Input
  4 | from keras.models import Model
  5 | import keras.utils.np_utils as npu
  6 | import numpy as np
  7 | from keras.optimizers import SGD, Adam, RMSprop
  8 | 
  9 | from .sdae_model_config import learning_params_template, nb_classes_template
 10 | 
 11 | global encoded_layers
 12 | def make_layer(layer, x_train, x_test, steps=0, gen=False):
 13 |     in_dim = layer['in_dim']
 14 |     out_dim = layer['out_dim']
 15 |     epochs = layer['epochs']
 16 |     batch_size = layer['batch_size']
 17 |     optimizer = layer['optimizer']
 18 |     enc_act = layer['enc_activation']
 19 |     dec_act = layer['dec_activation']
 20 | 
 21 |     if optimizer == "sgd":
 22 |         optimizer = SGD(lr=layer['lr'],
 23 |                         decay=layer['decay'],
 24 |                         momentum=layer['momentum'])
 25 |     elif optimizer == "adam":
 26 |         optimizer = Adam(lr=layer['lr'],
 27 |                          decay=layer['decay'])
 28 |     elif optimizer == "rmsprop":
 29 |         optimizer = RMSprop(lr=layer['lr'],
 30 |                             decay=layer['decay'])
 31 | 
 32 | 
 33 |     # this is our input placeholder
 34 |     input_data = Input(shape=(in_dim,))
 35 |     # "encoded" is the encoded representation of the input_data
 36 |     encoded = Dense(out_dim, activation=enc_act)(input_data)
 37 |     # "decoded" is the lossy reconstruction of the input_data
 38 |     decoded = Dense(in_dim, activation=dec_act)(encoded)
 39 | 
 40 |     # this model maps an input_data to its reconstruction
 41 |     autoencoder = Model(input_data, decoded)
 42 | 
 43 |     # this model maps an input_data to its encoded representation
 44 |     encoder = Model(input_data, encoded)
 45 | 
 46 |     autoencoder.compile(optimizer=optimizer, loss='mean_squared_error')
 47 | 
 48 |     # train layer 1
 49 |     if gen:
 50 |         (train_steps, test_steps) = steps
 51 |         autoencoder.fit_generator(x_train, steps_per_epoch=train_steps, epochs=epochs)
 52 |     else:
 53 |         autoencoder.fit(x_train, x_train, epochs=epochs, batch_size=batch_size)
 54 | 
 55 |     # encode and decode some digits
 56 |     # note that we take them from the *test* set
 57 | 
 58 |     if gen:
 59 |         (train_steps, test_steps) = steps
 60 |         new_x_train1 = encoder.predict_generator(x_train, steps=train_steps)
 61 |         new_x_test1 = encoder.predict_generator(x_test, steps=test_steps)
 62 |     else:
 63 |         new_x_train1 = encoder.predict(x_train)
 64 |         new_x_test1 = encoder.predict(x_test)
 65 | 
 66 |     weights = encoder.layers[1].get_weights()
 67 | 
 68 |     return new_x_train1, new_x_test1, weights
 69 | 
 70 | def build_model(learn_params=learning_params_template, nb_classes=nb_classes_template):
 71 |     ##注意输入的数据是迭代器
 72 |     #(x_train, y_train), (x_test, y_test) = train, test
 73 |     layers = learn_params["layers"]
 74 | 
 75 |     # Building SAE
 76 |     input_data = Input(shape=(layers[0]['in_dim'],))
 77 |     prev_layer = input_data
 78 | 
 79 |     i = 0
 80 |     global encoded_layers
 81 |     encoded_layers = []
 82 |     for l in layers:
 83 |         encoded = Dense(l['out_dim'], activation=l['enc_activation'])(prev_layer)    #多个自编码层之间用了一个全连接层
 84 |         i += 1
 85 |         encoded_layers.append(i)
 86 |         dropout = l["dropout"]
 87 |         if dropout > 0.0:
 88 |             drop = Dropout(dropout)(encoded)
 89 |             i += 1
 90 |             prev_layer = drop
 91 |         else:
 92 |             prev_layer = encoded
 93 | 
 94 |     softmax = Dense(nb_classes, activation='softmax')(prev_layer)       #最后一层是个全连接层
 95 |     sae = Model(input_data, softmax)
 96 |     '''
 97 |     if pre_train:
 98 |         #这里是在预训练自编码器的encoder-decoder,于是应该提供X的数据
 99 |         # Pre-training AEs
100 |         prev_x_train = None
101 |         prev_x_test = None
102 |         for i, l in enumerate(layers):
103 |             if i == 0:
104 |                 prev_x_train, prev_x_test, weights = make_layer(l, train_gen, test_gen, steps=steps, gen=True)
105 |             else:
106 |                 prev_x_train, prev_x_test, weights = make_layer(l, prev_x_train, prev_x_test)
107 |             sae.layers[encoded_layers[i]].set_weights(weights)
108 |         #print(sae.get_weights())
109 |     '''
110 |     if learn_params['optimizer'] == "sgd":
111 |         optimizer = SGD(lr=learn_params['lr'],
112 |                         decay=learn_params['decay'],
113 |                         momentum=0.9,
114 |                         nesterov=True)
115 |     elif learn_params['optimizer'] == "adam":
116 |         optimizer = Adam(lr=learn_params['lr'],
117 |                          decay=learn_params['decay'])
118 |     else:  # elif learn_params['optimizer'] == "rmsprop":
119 |         optimizer = RMSprop(lr=learn_params['lr'],
120 |                             decay=learn_params['decay'])
121 |     metrics=['accuracy']
122 |     sae.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=metrics)
123 |     return sae
124 | 
125 | def pre_train(model,x_train, x_test, learn_params=learning_params_template):
126 |     #这里是在预训练自编码器的encoder-decoder,于是应该提供X的数据
127 |     # Pre-training AEs
128 |     global  encoded_layers
129 |     prev_x_train = None
130 |     prev_x_test = None
131 |     layers = learn_params['layers']
132 |     for i, l in enumerate(layers):
133 |         if i == 0:
134 |             prev_x_train, prev_x_test, weights = make_layer(l, x_train, x_test,gen=False)
135 |         else:
136 |             prev_x_train, prev_x_test, weights = make_layer(l, prev_x_train, prev_x_test)
137 |         model.layers[encoded_layers[i]].set_weights(weights)
138 |     #print(sae.get_weights())
139 | 
140 |     return  model


--------------------------------------------------------------------------------
/models/dl/beauty/beauty_main_model.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'dk'
  2 | from models.dl.attacks import Beauty_model as CNN_model, parser_raw_data
  3 | from models.dl.beauty import cnn_model_config
  4 | from models.model_base import abs_model
  5 | import os
  6 | from config import raw_dataset_base
  7 | from keras.utils import np_utils
  8 | import numpy as np
  9 | class model(abs_model):
 10 |     def __init__(self, dataset, randseed, splitrate):
 11 |         super(model,self).__init__('beauty',randseed= randseed)
 12 |         if os.path.exists(self.database) == False:
 13 |             os.makedirs(self.database,exist_ok=True)
 14 | 
 15 |         self.dataset = dataset
 16 |         self.model = self.database + '/'+ self.name + '_' + dataset + '_model'
 17 |         self.data = self.database + '/'+ self.name + '_' + dataset + '/'
 18 |         self.splitrate = splitrate
 19 |         #原始数据集目录
 20 |         full_rdata = raw_dataset_base + self.dataset
 21 |         self.full_rdata = full_rdata
 22 | 
 23 |         if self.data_exists() == False:
 24 |             self.parser_raw_data()
 25 | 
 26 |         self.cnn_model = None
 27 |     def parser_raw_data(self):
 28 |         full_rdata = self.full_rdata
 29 |         if os.path.exists(full_rdata) == False:
 30 |             raise OSError('Dataset {0} (full path: {1}) does not exist!'.format(self.dataset,full_rdata))
 31 |         os.makedirs(self.data, exist_ok=True)
 32 |         ##从原始数据集构建DF所需的数据集
 33 |         X_train,y_train, X_valid, y_valid, X_test, y_test = parser_raw_data(self, self.full_rdata, max_len = cnn_model_config.learning_params_template['input_length'],burstification=True)
 34 | 
 35 |         self.save_data(X_train,y_train, X_valid, y_valid, X_test, y_test)
 36 | 
 37 | 
 38 |     def train(self):
 39 |         X_train,y_train, X_valid, y_valid, X_test, y_test = self.load_data()
 40 |         num_class = self.num_classes()
 41 |         cnn_model_config.nb_classes_template = num_class
 42 |         print(num_class)
 43 |         y_train = np_utils.to_categorical(y_train, num_classes=num_class)
 44 |         y_valid = np_utils.to_categorical(y_valid, num_classes=num_class)
 45 |         y_test = np_utils.to_categorical(y_test, num_classes= self.num_classes())
 46 | 
 47 |         X_train = X_train[:, :,np.newaxis]
 48 |         X_valid = X_valid[:, :,np.newaxis]
 49 |         X_test  = X_test[:, :,np.newaxis]
 50 | 
 51 |         cnn_model = CNN_model(num_class = num_class)
 52 |         cnn_model.build_model()
 53 |         #cnn_model.model.summary()
 54 |         cnn_model.fit(X_train=X_train,y_train=y_train,
 55 |                      X_valid= X_valid, y_valid = y_valid,
 56 |                      batch_size= cnn_model_config.learning_params_template['batch_size'],
 57 |                      epochs=cnn_model_config.learning_params_template['epoch'])
 58 | 
 59 |         cnn_model.save_model(path=self.model)
 60 |         score = cnn_model.evaluate(X_test=X_test, y_test = y_test)
 61 |         print('[Beauty Test on {0} accuracy {1}'.format(self.dataset, score))
 62 |     def test(self):
 63 |         X_train,y_train, X_valid, y_valid, X_test, y_test = self.load_data()
 64 |         y_test = np_utils.to_categorical(y_test, num_classes= self.num_classes())
 65 |         X_test  = X_test[:, :,np.newaxis]
 66 | 
 67 |         cnn_model = CNN_model(num_class= self.num_classes())
 68 |         cnn_model.load_model(self.model)
 69 |         score = cnn_model.evaluate(X_test=X_test,y_test=y_test)
 70 |         print('Beauty Test on {0} accuracy :{1}'.format(self.dataset,score))
 71 | 
 72 |     def predict(self,pkt_size):
 73 |         def pad_sequence(x, max_len, pad_value=0):
 74 |             r =  x + [pad_value] * (max_len - len(x))
 75 |             return r[:max_len]
 76 | 
 77 |         if self.cnn_model == None:
 78 |             self.cnn_model = CNN_model(num_class= self.num_classes())
 79 |             self.cnn_model.load_model(self.model)
 80 | 
 81 |         x = [pad_sequence(_pkt_size, max_len= cnn_model_config.learning_params_template['in_dim']) for _pkt_size in pkt_size]
 82 |         x = np.array(x)[:, :,np.newaxis]
 83 |         y_logit = self.cnn_model.predict(x, actual_lable=True)
 84 |         return y_logit.tolist()
 85 |     def get_feature(self):
 86 |         X_train,y_train, X_valid, y_valid, X_test, y_test = self.load_data()
 87 |         #y_test = np_utils.to_categorical(y_test, num_classes= self.num_classes())
 88 |         X_test  = X_test[:5000]
 89 |         X_test  = X_test[:, :,np.newaxis]
 90 | 
 91 |         cnn_model = CNN_model(num_class= self.num_classes())
 92 |         cnn_model.load_model(self.model)
 93 |         logit, feature = cnn_model.predict(X_test=X_test,actual_lable=False, return_feature=True)
 94 |         print(feature.shape, logit.shape)
 95 |         logit = logit.tolist()
 96 |         feature = feature.tolist()
 97 |         #feature = logit
 98 |         y_true = y_test[:5000].tolist()
 99 |         feature_set = {}
100 |         feature_vector = []
101 |         for i in range(len(y_true)):
102 |            if y_true[i] not in feature_set:
103 |               feature_set[y_true[i]] = []
104 |            feature_set[y_true[i]].append([feature[i]])
105 |         import pickle
106 |         with open('feature_set_D1_53_DF.pkl','wb') as fp:
107 |             pickle.dump(feature_set, fp)
108 |         print(y_true[-1],logit[-1])
109 |         print(feature[-1])		
110 | if __name__ == '__main__':
111 |   for test_rate in [0.1]:
112 |     print(test_rate)
113 |     dataset='app60'
114 |     cnn_model = model(dataset, randseed= 128, splitrate=test_rate)
115 |     #cnn_model.parser_raw_data()
116 |     #cnn_model.train()
117 |     cnn_model.test()
118 |     print(dataset)
119 |     print(test_rate)
120 |     #import os
121 |     break
122 |     #os.remove(df_model.model)
123 |     #df_model.get_feature()
124 | 


--------------------------------------------------------------------------------
/models/dl/df_only_D/df_main_model.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'dk'
  2 | from models.dl.attacks import DF_model, parser_raw_data
  3 | from models.dl.df_only_D import df_model_config
  4 | from models.model_base import abs_model
  5 | import os
  6 | from config import raw_dataset_base
  7 | from keras.utils import np_utils
  8 | import numpy as np
  9 | class model(abs_model):
 10 |     def __init__(self, dataset, randseed, splitrate):
 11 |         super(model,self).__init__('df',randseed= randseed)
 12 |         if os.path.exists(self.database) == False:
 13 |             os.makedirs(self.database,exist_ok=True)
 14 | 
 15 |         self.dataset = dataset
 16 |         self.model = self.database + '/'+ self.name + '_' + dataset + '_model'
 17 |         self.data = self.database + '/'+ self.name + '_' + dataset + '/'
 18 |         self.splitrate = splitrate
 19 |         #原始数据集目录
 20 |         full_rdata = raw_dataset_base + self.dataset
 21 |         self.full_rdata = full_rdata
 22 | 
 23 |         if self.data_exists() == False:
 24 |             self.parser_raw_data()
 25 | 
 26 |         self.df_model = None
 27 |     def parser_raw_data(self):
 28 |         full_rdata = self.full_rdata
 29 |         if os.path.exists(full_rdata) == False:
 30 |             raise OSError('Dataset {0} (full path: {1}) does not exist!'.format(self.dataset,full_rdata))
 31 |         os.makedirs(self.data, exist_ok=True)
 32 |         ##从原始数据集构建DF所需的数据集
 33 |         X_train,y_train, X_valid, y_valid, X_test, y_test = parser_raw_data(self, self.full_rdata, max_len = df_model_config.learning_params_template['in_dim'])
 34 | 
 35 |         ##只使用包的方向
 36 |         X_train = np.sign(X_train)
 37 |         X_valid = np.sign(X_valid)
 38 |         X_test = np.sign(X_test)
 39 | 
 40 |         self.save_data(X_train,y_train, X_valid, y_valid, X_test, y_test)
 41 | 
 42 | 
 43 |     def train(self):
 44 |         X_train,y_train, X_valid, y_valid, X_test, y_test = self.load_data()
 45 |         num_class = self.num_classes()
 46 |         df_model_config.nb_classes_template = num_class
 47 | 
 48 |         y_train = np_utils.to_categorical(y_train, num_classes=num_class)
 49 |         y_valid = np_utils.to_categorical(y_valid, num_classes=num_class)
 50 |         y_test = np_utils.to_categorical(y_test, num_classes= self.num_classes())
 51 | 
 52 |         X_train = X_train[:, :,np.newaxis]
 53 |         X_valid = X_valid[:, :,np.newaxis]
 54 |         X_test  = X_test[:, :,np.newaxis]
 55 | 
 56 |         df_model = DF_model(num_class = num_class)
 57 |         df_model.build_model()
 58 | 
 59 |         df_model.fit(X_train=X_train,y_train=y_train,
 60 |                      X_valid= X_valid, y_valid = y_valid,
 61 |                      batch_size= df_model_config.learning_params_template['batch_size'],
 62 |                      epochs=df_model_config.learning_params_template['epoch'])
 63 | 
 64 |         df_model.save_model(path=self.model)
 65 |         score = df_model.evaluate(X_test=X_test, y_test = y_test)
 66 |         print('[Deep Fingerprinting (only direction) Test on {0} accuracy {1}'.format(self.dataset, score))
 67 |     def test(self):
 68 |         X_train,y_train, X_valid, y_valid, X_test, y_test = self.load_data()
 69 |         y_test = np_utils.to_categorical(y_test, num_classes= self.num_classes())
 70 |         X_test  = X_test[:, :,np.newaxis]
 71 | 
 72 |         df_model = DF_model(num_class= self.num_classes())
 73 |         df_model.load_model(self.model)
 74 |         score = df_model.evaluate(X_test=X_test,y_test=y_test)
 75 |         print('Deep Fingerprinting (only direction) Test on {0} accuracy :{1}'.format(self.dataset,score))
 76 | 
 77 |     def predict(self,pkt_size):
 78 |         def pad_sequence(x, max_len, pad_value=0):
 79 |             r =  x + [pad_value] * (max_len - len(x))
 80 |             return r[:max_len]
 81 | 
 82 |         if self.df_model == None:
 83 |             self.df_model = DF_model(num_class= self.num_classes())
 84 |             self.df_model.load_model(self.model)
 85 | 
 86 |         x = [pad_sequence(_pkt_size, max_len= df_model_config.learning_params_template['in_dim']) for _pkt_size in pkt_size]
 87 |         x = np.array(x)[:, :,np.newaxis]
 88 |         y_logit = self.df_model.predict(x, actual_lable=True)
 89 |         return y_logit.tolist()
 90 |     def get_feature(self):
 91 |         X_train,y_train, X_valid, y_valid, X_test, y_test = self.load_data()
 92 |         #y_test = np_utils.to_categorical(y_test, num_classes= self.num_classes())
 93 |         X_test  = X_test[:5000]
 94 |         X_test  = X_test[:, :,np.newaxis]
 95 | 
 96 |         df_model = DF_model(num_class= self.num_classes())
 97 |         df_model.load_model(self.model)
 98 |         logit, feature = df_model.predict(X_test=X_test,actual_lable=False, return_feature=True)
 99 |         print(feature.shape, logit.shape)
100 |         logit = logit.tolist()
101 |         feature = feature.tolist()
102 |         #feature = logit
103 |         y_true = y_test[:5000].tolist()
104 |         feature_set = {}
105 |         feature_vector = []
106 |         for i in range(len(y_true)):
107 |            if y_true[i] not in feature_set:
108 |               feature_set[y_true[i]] = []
109 |            feature_set[y_true[i]].append([feature[i]])
110 |         import pickle
111 |         with open('feature_set_D1_53_DF.pkl','wb') as fp:
112 |             pickle.dump(feature_set, fp)
113 |         print(y_true[-1],logit[-1])
114 |         print(feature[-1])		
115 | if __name__ == '__main__':
116 |   for test_rate in [0.1]:
117 |     print(test_rate)
118 |     dataset='app60'
119 |     df_model = model(dataset, randseed= 128, splitrate=test_rate)
120 |     df_model.parser_raw_data()
121 |     df_model.train()
122 |     #df_model.test()
123 |     print(dataset)
124 |     print(test_rate)
125 |     #import os
126 |     #os.remove(df_model.model)
127 |     #df_model.get_feature()
128 | 


--------------------------------------------------------------------------------
/models/dl/graphDapp/DApp_Classifier.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'dk'
  2 | import numpy as np
  3 | import  torch as th
  4 | import dgl
  5 | import torch.nn as nn
  6 | from dgl.nn.pytorch import GINConv
  7 | from  models.dl.graphDapp.data_builder import Dataset_fgnet
  8 | class DApp_MLP(nn.Module):
  9 |     def __init__(self,in_feats,out_feats=64, layer_nums = 3):
 10 |         super(DApp_MLP,self).__init__()
 11 |         self.linear_layers =nn.ModuleList()
 12 |         for each in range(layer_nums):
 13 |             if each == 0 :
 14 |                 in_features= in_feats
 15 |             else:
 16 |                 in_features = out_feats
 17 |             self.linear_layers.append(nn.Linear(in_features= in_features,out_features=out_feats))
 18 |         self.activate = nn.ReLU()
 19 |         self.batchnorm = nn.BatchNorm1d(out_feats)
 20 |         self.dropout = nn.Dropout(p=0.0)
 21 | 
 22 |     def forward(self, x):
 23 |         x1 = x
 24 |         for mod in self.linear_layers :
 25 |             x1 = mod(x1)
 26 |             x1 = self.activate(x1)
 27 | 
 28 |         x2 = self.batchnorm(x1)
 29 |         x3 = self.dropout(x2)
 30 |         return x3
 31 | 
 32 | class DApp_classifier(nn.Module):
 33 |     def __init__(self, nb_classes=53, gin_layer_num=3, gin_hidden_units=64, iteration_nums = 3, graph_pooling_type='sum',
 34 |                  neighbor_pooling_type='sum',use_gpu=False, device='cpu', iteration_first=True, embedding= True):
 35 |         #DApp: 3个GIN,顺序级联在一起
 36 |         super(DApp_classifier,self).__init__()
 37 | 
 38 |         self.nb_classes = nb_classes
 39 |         self.gin_layer_num = gin_layer_num
 40 |         self.gin_hidden_uints = gin_hidden_units
 41 |         self.iteration_nums = iteration_nums
 42 | 
 43 |         self.graph_pooling_type = graph_pooling_type
 44 |         self.neighbor_pooling_type= neighbor_pooling_type
 45 | 
 46 |         self.use_gpu = use_gpu
 47 |         self.device = device
 48 | 
 49 |         self.gin_layers = []
 50 |         self.interation_first = iteration_first
 51 |         self.embedding = embedding
 52 |         self.embedding_dim = gin_hidden_units      #embedding的设置为gin的隐藏神经元个数
 53 | 
 54 |         if embedding :
 55 |             self.embedding_layer = th.nn.Embedding(num_embeddings= 3100, embedding_dim= self.embedding_dim)
 56 |         #添加GIN层
 57 |         if iteration_first == False:
 58 |             for each in range(gin_layer_num):
 59 |                 if each == 0:
 60 |                     in_feats = self.embedding_dim if self.embedding == True else 1
 61 |                 else:
 62 |                     in_feats = gin_hidden_units
 63 |                 mlp = DApp_MLP(in_feats, out_feats= gin_hidden_units, layer_nums= self.gin_layer_num)
 64 |                 print(mlp)
 65 |                 if use_gpu :
 66 |                     mlp = mlp.to(th.device(device))
 67 |                 gin_layer =GINConv(
 68 |                     apply_func= mlp,
 69 |                     aggregator_type= self.neighbor_pooling_type,
 70 |                     learn_eps=True
 71 |                 )
 72 |                 if use_gpu:
 73 |                     gin_layer = gin_layer.to(th.device(device))
 74 |                 self.gin_layers.append(gin_layer)
 75 |         else:
 76 |             if embedding == False:
 77 |                 mlp = DApp_MLP(1,out_feats=gin_hidden_units,layer_nums= self.gin_layer_num)
 78 |             else:
 79 |                 mlp = DApp_MLP(self.embedding_dim, gin_hidden_units, layer_nums= self.gin_layer_num)
 80 |             if use_gpu:
 81 |                 mlp = mlp.to(th.device(device))
 82 |             print(mlp)
 83 |             gin_layer = GINConv(
 84 |                 apply_func=mlp,
 85 |                 aggregator_type= self.neighbor_pooling_type,
 86 |                 learn_eps=True
 87 |             )
 88 |             if use_gpu:
 89 |                 gin_layer = gin_layer.to(th.device(device))
 90 |             self.gin_layers.append(gin_layer)
 91 |         #最后的全连接分类层
 92 |         self.linear = nn.Linear(in_features=iteration_nums * gin_hidden_units,out_features=nb_classes)
 93 | 
 94 | 
 95 |     def forward(self, g):
 96 | 
 97 |         node_feature = g.ndata['pkt_length']
 98 | 
 99 |         if self.embedding == True:
100 |             node_feature = self.embedding_layer(th.reshape(node_feature.long(),(-1,)) + Dataset_fgnet.MTU)
101 | 
102 |         graph_feature_history = []
103 |         ##gin
104 |         if self.interation_first == False:
105 |             for layer in self.gin_layers:
106 |                 node_feature = layer(g, node_feature.to(th.device(self.device)))
107 |                 g.ndata['iterated_feature'] = node_feature
108 |                 if self.graph_pooling_type == 'sum':
109 |                     graph_feature = dgl.sum_nodes(g,'iterated_feature')
110 |                 elif self.graph_pooling_type == 'mean':
111 |                     graph_feature = dgl.mean_nodes(g,'iterated_feature')
112 | 
113 |                 graph_feature_history.append(graph_feature)
114 |         else:
115 |             layer = self.gin_layers[-1]
116 |             # 只有一个MLP
117 |             for i in range(self.iteration_nums):
118 |                 node_feature = layer(g, node_feature.to(th.device(self.device)))
119 |                 g.ndata['iterated_feature'] = node_feature
120 |                 if self.graph_pooling_type == 'sum':
121 |                     graph_feature = dgl.sum_nodes(g,'iterated_feature')
122 |                 elif self.graph_pooling_type == 'mean':
123 |                     graph_feature = dgl.mean_nodes(g,'iterated_feature')
124 | 
125 |                 graph_feature_history.append(graph_feature)
126 | 
127 |         ##把所有的历史concate起来,
128 | 
129 |         graph_features = th.cat(graph_feature_history,-1)
130 | 
131 |         #全连接分类
132 |         power = self.linear(graph_features)
133 |         return  power
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------
/models/ml/rdp/util.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'jmh081701'
  2 | import sys
  3 | import os
  4 | import json
  5 | import numpy as np
  6 | from models.ml.rdp import statistic_tractor
  7 | def read_txt(filename):
  8 |     '''
  9 |     :param filename:  text file the record packet length and timestamp.
 10 |     :return:
 11 |         client to server : upload traffic, length > 0
 12 |         server to client : download traffic , length <0
 13 |     '''
 14 |     with open(filename,"r") as fp:
 15 |         while True:
 16 |             line=fp.readline()
 17 |             if line:
 18 |                 rst=[]
 19 |                 line = line.split(",")
 20 |                 line = line[0:3]+line[-2:-1]
 21 |                 rst.append(float(line[0]))
 22 |                 rst.append(int(line[3],10))
 23 |                 try:
 24 |                     if line[1].count('107c') > 0:
 25 |                         #client to server   > 0
 26 |                         rst[1]*=1
 27 |                     else:
 28 |                         #server to client   < 0
 29 |                         rst[1]*= -1
 30 |                 except:
 31 |                     if str(line[1]).count("124.16.") > 0:
 32 |                         #client to server >0
 33 |                         rst[1] *=1
 34 |                     else:
 35 |                         #server to client < 0
 36 |                         rst[0] *=-1
 37 |                 yield  rst
 38 |             else:
 39 |                 break
 40 | def gather_peak_up_down(filename,gap=0.1):
 41 |     file_reader = read_txt(filename)
 42 |     peaks=[[[0,0]]]
 43 |     index = 0
 44 |     for each in file_reader:
 45 |         if abs(peaks[index][-1][0]-each[0])<gap:
 46 |             peaks[index].append(each)
 47 |         else:
 48 |             peaks.append([each])
 49 |             index+=1
 50 |     return peaks[1:]# delete the first one item
 51 | def gather_peak_up(filename,gap=0.1):
 52 |     file_reader = read_txt(filename)
 53 |     peaks=[[[0,0]]]
 54 |     index = 0
 55 |     for each in file_reader:
 56 |         if each[0]>0 and abs(peaks[index][-1][0]-each[0])<gap:
 57 |             peaks[index].append(each)
 58 |         else:
 59 |             peaks.append([each])
 60 |             index+=1
 61 |     return peaks[1:]# delete the first one item
 62 | def gather_peak_down(filename,gap=0.1):
 63 |     file_reader = read_txt(filename)
 64 |     peaks=[[[0,0]]]
 65 |     index = 0
 66 |     for each in file_reader:
 67 |         if each[0]<0 and abs(peaks[index][-1][0]-each[0])<gap:
 68 |             peaks[index].append(each)
 69 |         else:
 70 |             peaks.append([each])
 71 |             index+=1
 72 |     return peaks[1:]# delete the first one item
 73 | if __name__ == '__main__':
 74 |     #gather the max peak length
 75 |     appnames=['micrords','anydesk','realvnc','teamviewer']
 76 |     #gaps=[0.2,0.5,0.8,1,1.5]
 77 |     gaps=[0.5,0.2,0.8]
 78 |     last_length=-1
 79 |     for gap in gaps:
 80 |         for app in appnames:
 81 |             dir=r"E:\TempWorkStation\i-know-what-are-you-doing\pcap\%s"%app
 82 |             peaks_length=[]
 83 |             packet_number_peak_length=[]
 84 |             for root,subdirs,files in os.walk(dir):
 85 |                 for sub in subdirs:
 86 |                     if sub not in ['watching_video','reading_doc','editing_doc','installing_software','surfing_web','transfering_file']:
 87 |                         #'watching_video','reading_doc','editing_doc','installing_software','surfing_web','transfering_file'
 88 |                         continue
 89 |                     for _root,_subdirs,_files in os.walk(dir+"\\"+sub):
 90 |                         features =[]
 91 |                         timestamps=[]
 92 |                         counter=0
 93 |                         flowids=[]
 94 |                         for each in _files:
 95 |                             if each.count(".txt"):
 96 |                                 peaks=gather_peak_up_down(dir+"\\"+sub+"\\"+each,gap)
 97 |                                 if len(peaks)==0:
 98 |                                     print(peaks,each)
 99 |                                     continue
100 |                                 flowid = int(each.split(".")[0])
101 |                                 peaks_length.append(len(peaks))
102 |                                 packet_number_peak_length+=map(lambda  x: len(x),peaks)
103 |                                 for peak in peaks:
104 |                                     feature = statistic_tractor.peak_feature(peak)
105 |                                     if last_length!=-1 and len(feature)!=last_length:
106 |                                         print(feature,peak)
107 |                                         exit(0)
108 |                                     if last_length==-1:
109 |                                         last_length =len(feature)
110 |                                     features.append(feature)
111 |                                     timestamps.append(peak[0][0])
112 |                                     flowids.append(flowid)
113 |                                     counter +=1
114 |                             #exit(0)
115 |                         with open("E:\\TempWorkStation\\i-know-what-are-you-doing\\dataset\\vector_flowid\\%s_%s.data.gap=%s"%(app,sub,str(gap)),"w") as fp:
116 |                             print("E:\\TempWorkStation\\i-know-what-are-you-doing\\dataset\\vector_flowid\\%s_%s.data  : %d"%(app,sub,counter))
117 |                             json.dump({'feature':features,'timestamps':timestamps,'counter':counter,'flowids':flowids},fp)
118 |                             del features
119 |                             del timestamps
120 |                             del counter
121 |                             del flowids
122 |         #        print(np.min(peaks_length),np.max(peaks_length),np.std(peaks_length),np.average(peaks_length))
123 |         #        print(np.min(packet_number_peak_length),np.max(packet_number_peak_length),np.std(packet_number_peak_length),np.average(packet_number_peak_length))
124 | 


--------------------------------------------------------------------------------
/models/dl/awf_dataset_util/data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.utils import np_utils
  3 | import keras.preprocessing.sequence as sq
  4 | import pickle
  5 | import  random
  6 | 
  7 | def categorize(labels, dict_labels=None):
  8 |     possible_labels = list(set(labels))
  9 | 
 10 |     if not dict_labels:
 11 |         dict_labels = {}
 12 |         n = 0
 13 |         for label in possible_labels:
 14 |             dict_labels[label] = n
 15 |             n = n + 1
 16 | 
 17 |     new_labels = []
 18 |     for label in labels:
 19 |         new_labels.append(dict_labels[label])
 20 | 
 21 |     new_labels = np_utils.to_categorical(new_labels)
 22 | 
 23 |     return new_labels
 24 | 
 25 | 
 26 | class DataGenerator(object):
 27 |     """Data generator for DLWF on Keras"""
 28 |     def __init__(self, batch_size=32, shuffle=True):
 29 |         # Initialization
 30 |         self.batch_size = batch_size
 31 |         self.shuffle = shuffle
 32 | 
 33 |     def generate(self, data, labels, indices):
 34 |         """Generates batches of samples"""
 35 |         nb_instances = data.shape[0]
 36 |         nb_classes = labels.shape[1]
 37 |         sample_shape = data[0].shape
 38 |         batch_data_shape = tuple([self.batch_size] + list(sample_shape))
 39 |         batch_label_shape = (self.batch_size, nb_classes)
 40 |         # Infinite loop
 41 |         while True:
 42 |             # Generate an exploration order
 43 |             #indices = np.arange(nb_instances)
 44 |             #if self.shuffle is True:
 45 |             #    np.random.shuffle(indices)
 46 | 
 47 |             # Generate batches
 48 |             imax = int(len(indices) / self.batch_size)
 49 |             for i in range(imax):
 50 |                 # Form a batch
 51 |                 x = np.empty(batch_data_shape)
 52 |                 y = np.empty(batch_label_shape)
 53 |                 for j, k in enumerate(indices[i * self.batch_size: (i + 1) * self.batch_size]):
 54 |                     x[j] = data[k]
 55 |                     y[j] = labels[k]
 56 |                 if x.shape != batch_data_shape:
 57 |                     print(x.shape)
 58 |                     exit(0)
 59 |                 yield x, y
 60 | 
 61 | 
 62 | def load_data(path, maxlen=None, minlen=0, traces=0, dnn_type=None, openw=False):
 63 |     """Load and shape the dataset"""
 64 |     npzfile = np.load(path,allow_pickle=True)
 65 |     data = npzfile["data"]
 66 |     labels = npzfile["labels"]
 67 |     npzfile.close()
 68 | 
 69 |     if minlen:
 70 |         num_traces = {}
 71 |         print("Filter on minlen={}\n".format(minlen))
 72 |         if traces:
 73 |             print("Filter on number of traces={}\n".format(traces))
 74 |         new_data = []
 75 |         new_labels = []
 76 |         for x, y in zip(data, labels):
 77 |             if y not in num_traces:
 78 |                 num_traces[y] = 0
 79 |             count = num_traces[y]
 80 |             if traces:
 81 |                 if count == traces:
 82 |                     continue
 83 |             if len(x) >= minlen:
 84 |                 new_data.append(x)
 85 |                 new_labels.append(y)
 86 |                 num_traces[y] = count + 1
 87 |         data = np.array(new_data)
 88 |         labels = np.array(new_labels)
 89 |         del new_data, new_labels
 90 |         if not data.size:
 91 |             raise ValueError('After filtering, no sequence left.')
 92 |         del num_traces
 93 | 
 94 |     # Pad if traces are of various length or if their uniform length is not equal to maxlen
 95 |     if maxlen:
 96 |         if len(data.shape) == 1 or data.shape[1] != maxlen:
 97 |             print("Pad/trunc with maxlen={}".format(maxlen))
 98 |             #data = data[:, :maxlen]
 99 |             # Old way:
100 |             data = sq.pad_sequences(data, maxlen=maxlen, padding='post', truncating='post', dtype="float64")
101 | 
102 |     if dnn_type == "lstm" or dnn_type == "cnn":
103 |         data = data.reshape(data.shape[0], data.shape[1], 1)
104 |     if type == "sdae" and len(data.shape) > 2:
105 |         print("WEIRD! data.shape={}".format(data.shape))
106 |         data = data.reshape(data.shape[0], data.shape[1])
107 | 
108 |     if not openw:
109 |         print("Categorize")
110 |         labels = categorize(labels)
111 | 
112 |     print("Data {}, labels {}".format(data.shape, labels.shape))
113 | 
114 |     return data, labels
115 | 
116 | 
117 | def split_dataset(x, y, val_split=0.05, test_split=0.05):
118 |     ''' 依概率切分数据集,同时对数据做了shuffle
119 |     :param x:
120 |     :param y:
121 |     :param val_split:
122 |     :param test_split:
123 |     :return:
124 |     '''
125 |     x_train =[]
126 |     x_val =[]
127 |     x_test =[]
128 | 
129 |     y_train =[]
130 |     y_val =[]
131 |     y_test=[]
132 | 
133 |     num = x.shape[0]
134 |     for i in range(num):
135 |         rnd = random.random()
136 |         if rnd <test_split :
137 |             x_test.append(x[i])
138 |             y_test.append(y[i])
139 |         elif rnd<(test_split + val_split):
140 |             x_val.append(x[i])
141 |             y_val.append(y[i])
142 |         else:
143 |             x_train.append(x[i])
144 |             y_train.append(y[i])
145 |     return np.array(x_train), np.array(y_train), np.array(x_val), np.array(y_val), np.array(x_test), np.array(y_test)
146 | 
147 | if __name__ == '__main__':
148 |     prefix = "E:\\awf_dataset\\"
149 |     data,label = load_data(prefix+"tor_100w_2500tr.npz",maxlen=2300,minlen=0)
150 |     x_train, y_train, x_val, y_val, x_test, y_test = split_dataset(data,label)
151 |     print(x_train[0])
152 |     with open(prefix+"cw100\\"+"X_train_cw100.pkl","wb") as fp:
153 |         pickle.dump(x_train,fp)
154 |     with open(prefix+"cw100\\"+"X_valid_cw100.pkl","wb") as fp:
155 |         pickle.dump(x_val,fp)
156 |     with open(prefix+"cw100\\"+"X_test_cw100.pkl","wb") as fp:
157 |         pickle.dump(x_test,fp)
158 | 
159 |     with open(prefix+"cw100\\"+"y_train_cw100.pkl","wb") as fp:
160 |         pickle.dump(y_train,fp)
161 |     with open(prefix+"cw100\\"+"y_valid_cw100.pkl","wb") as fp:
162 |         pickle.dump(y_val,fp)
163 |     with open(prefix+"cw100\\"+"y_test_cw100.pkl","wb") as fp:
164 |         pickle.dump(y_test,fp)


--------------------------------------------------------------------------------
/models/ml/rdp/statistic_tractor.py:
--------------------------------------------------------------------------------
  1 | #coding:utf-8
  2 | #提取peak的统计特征
  3 | __author__ = 'jmh081701'
  4 | import json
  5 | import numpy as np
  6 | def moment(n,data,c=0):
  7 |     # n: 表示求的n阶矩
  8 |     # data: 一个list
  9 |     # c: 0 原点矩;   1 中心矩
 10 |     if not isinstance(data,list):
 11 |         raise  Exception('unknown moment')
 12 |     bin = 30
 13 |     distrubute  = length_distribution(data,bin)
 14 |     rst= 0
 15 |     if c!=0:
 16 |         avg= np.average(data)
 17 |     else:
 18 |         avg=0
 19 |     for i in range(0,distrubute.shape[0]):
 20 |         rst += distrubute[i] * (i*bin - avg) **n
 21 |     if rst < 0 :
 22 |         return -1 * abs(rst) ** (1/n)
 23 |     else:
 24 |         return rst **(1/n)
 25 | 
 26 | def length_percentile(packet_length,percentage):
 27 |     #包长百分位数
 28 |     bin=5
 29 |     distrbution = length_distribution(packet_length,bin)
 30 |     cdf=0
 31 |     for i in range(distrbution.shape[0]):
 32 |         cdf +=distrbution[i]
 33 |         if cdf >=percentage:
 34 |             return i*bin + bin/2.0
 35 | 
 36 | 
 37 | def length_distribution(packet_length,bin=30):
 38 |     distribution = np.zeros(shape =(1500//bin,))
 39 |     for each in packet_length:
 40 |         each = int(abs(each))
 41 |         if each >= 1500 :
 42 |             each = 1499
 43 |         distribution[each//bin] +=1
 44 |     distribution =distribution / len(packet_length)
 45 |     #print(distribution)
 46 |     return distribution
 47 | def poison_generate_function(data):
 48 |     rst = []
 49 |     for x in np.arange(1.1,10,2):
 50 |         sum =0
 51 |         for n in range(0,len(data)):
 52 |             sum =sum + data[n]*np.math.sin(2*x*np.math.pi /31 *n)
 53 |         rst.append(round(sum,3))
 54 |     return rst
 55 | def _poison_generate_function(data):
 56 |     rst =[]
 57 |     sum =0
 58 |     for n in range(0,len(data)):
 59 |         sum =sum + data[n]* np.math.pow(np.math.e,-x) * np.math.pow(x,1+n)/np.math.factorial(1+n)
 60 |     rst.append(round(sum,3))
 61 |     return rst
 62 | def L_generate_function(data):
 63 |     rst = []
 64 |     for x in np.arange(1.1,10,2):
 65 |         sum =0
 66 |         for n in range(0,len(data)):
 67 |             sum =sum + data[n]*np.math.cos(2*x*np.math.pi /91 *n)
 68 |         rst.append(round(sum,3))
 69 |     return rst
 70 | def _L_generate_function(data):
 71 |     rst = []
 72 |     for x in np.arange(1.5,3.5,0.5):
 73 |         sum =0
 74 |         for n in range(0,len(data)):
 75 |             fac=np.math.pow(x,n+1)
 76 |             sum =sum + data[n]*fac/(1-fac)
 77 |         rst.append(round(sum,3))
 78 |     return rst
 79 | def generate_function(data):
 80 |     rst=[]
 81 |     rst+=poison_generate_function(data)
 82 |     rst+=L_generate_function(data)
 83 |     return rst
 84 | def peak_pkt_length_feature(_peak):
 85 |     if len(_peak) == 0:
 86 |         return [0] * 10 + [0] *17
 87 |     peak = np.mat(_peak)
 88 |     packet_length_data = list(map(lambda  x : x[0],peak[:,1].tolist()))  #只取包长
 89 |     gen_features = generate_function(packet_length_data)
 90 | 
 91 |     mom_features = [0] * 17
 92 |     #0-4 阶中心矩
 93 |     mom_features[0] = moment(1,packet_length_data,c=1)
 94 |     mom_features[1] = moment(2,packet_length_data,c=1)
 95 |     mom_features[2] = moment(3,packet_length_data,c=1)
 96 |     mom_features[3] = moment(4,packet_length_data,c=1)
 97 |     mom_features[4] = moment(5,packet_length_data,c=1)
 98 |     #1-3阶 原点矩
 99 |     for each in packet_length_data:
100 |         mom_features[5] +=abs(each)
101 |         mom_features[6] +=abs(each)**2
102 |         mom_features[7] +=abs(each)**3
103 |     mom_features[6] =(mom_features[6]/len(packet_length_data)) **(1/2)
104 |     mom_features[7] =(mom_features[7]/len(packet_length_data)) **(1/3)
105 |     #10%-90% 百分位数
106 | 
107 |     for i in range(8,17):
108 |         mom_features[i] = length_percentile(packet_length_data,percentage=(i-8+1)*0.1)
109 |     return gen_features + mom_features
110 | def peak_relative_arrive_time_feature(_peak):
111 |     if len(_peak) == 0:
112 |         return [0] * 5
113 |     peak = np.mat(_peak)
114 |     arrive_time_data = list(map(lambda  x : x[0],peak[:,0].tolist()) )
115 |     #gen_features = generate_function(arrive_time_data)
116 |     mom_features = [0] * 5
117 |     for i in range(0,5):
118 |         for each in arrive_time_data:
119 |             mom_features[i] +=each** i
120 |         if i != 0:
121 |             mom_features[i]= round((mom_features[i] **(1/i) ).real,3)
122 |     return  mom_features
123 | def peak_feature(peak):
124 |     up_peak=[]
125 |     down_peak=[]
126 |     total_peak=[peak[0]]
127 |     if total_peak[0][1] >0 :
128 |         up_peak.append((total_peak[0][0],total_peak[0][1]))
129 |     else:
130 |         down_peak.append((total_peak[0][0],-total_peak[0][1]))
131 |     for i in range(1,len(peak)):
132 |         total_peak.append((peak[i][0]-peak[0][0],peak[i][1]))
133 |         #上下游的包   #注意,需要验证一下 total_peak本身是否需要带负号
134 |         if peak[i][1]>0:
135 |             #upload的包
136 |             up_peak.append((total_peak[i][0],total_peak[i][1]))
137 |         else:
138 |             #download 的包
139 |             down_peak.append((total_peak[i][0],-total_peak[i][1]))
140 |     features=[]
141 |     #pkt length
142 |     features += peak_pkt_length_feature(total_peak)
143 |     #print('total peak pkt length feature:',len(features))
144 |     features += peak_pkt_length_feature(up_peak)
145 |     #print('up peak pkt length feature:',len(features))
146 |     features += peak_pkt_length_feature(down_peak)
147 |     #print('down peak pkt length feature:',len(features))
148 |     #relative arrive time
149 |     features += peak_relative_arrive_time_feature(total_peak)
150 |     #print('total peak pkt arrive time feature:',len(features))
151 |     features += peak_relative_arrive_time_feature(up_peak)
152 |     #print('up peak pkt arrive time feature:',len(features))
153 |     features += peak_relative_arrive_time_feature(down_peak)
154 |     #print('down peak pkt arrive time feature:',len(features))
155 |     return features
156 | 
157 | if __name__ == '__main__':
158 |     packet_lengths=[(0,40),(0,53),(0,53),(0,1074),(0,73),(0,40),(0,217),(0,131),(0,209),(0,73),(0,40),(0,254),(0,73)]
159 |     print(peak_pkt_length_feature(packet_lengths))
160 | 
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/models/ml/appscanner/model.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'dk'
  2 | from models.model_base import abs_model
  3 | import os
  4 | import shutil
  5 | import json
  6 | from config import raw_dataset_base, min_flow_len
  7 | from models.ml.appscanner import feature_extractor
  8 | import numpy as np
  9 | from models.ml.appscanner import min_max
 10 | import pickle
 11 | import  lightgbm as lgb
 12 | import tqdm
 13 | from sklearn.metrics import  accuracy_score,classification_report
 14 | from models.ml.appscanner.hyper_params import hyper_params
 15 | 
 16 | class model(abs_model):
 17 |     def __init__(self, dataset, randseed, splitrate):
 18 |         super(model,self).__init__('appscanner',randseed= randseed)
 19 |         if os.path.exists(self.database) == False:
 20 |             os.makedirs(self.database,exist_ok=True)
 21 | 
 22 |         self.dataset = dataset
 23 |         self.model = self.database + '/'+ self.name + '_' + dataset + '_model'
 24 |         self.data = self.database + '/'+ self.name + '_' + dataset + '/'
 25 |         self.splitrate = splitrate
 26 |         #原始数据集目录
 27 |         full_rdata = raw_dataset_base + self.dataset
 28 |         self.full_rdata = full_rdata
 29 | 
 30 |         if self.data_exists() == False:
 31 |             self.parser_raw_data()
 32 | 
 33 | 
 34 |     def parser_raw_data(self):
 35 |         full_rdata = self.full_rdata
 36 |         if os.path.exists(full_rdata) == False:
 37 |             raise OSError('Dataset {0} (full path: {1}) does not exist!'.format(self.dataset,full_rdata))
 38 |         #从原始数据集目录构建appscanner所需的数据集
 39 |         X = []
 40 |         y = []
 41 |         for _root, _dirs, _files in os.walk(full_rdata):
 42 |             labels = []
 43 |             for file in _files:
 44 |                 labels.append(file)
 45 |             labels.sort()
 46 |             for file in tqdm.trange(len(_files)):
 47 |                 file = _files[file]
 48 |                 label = labels.index(file)
 49 |                 file = _root + '/' + file
 50 | 
 51 |                 with open(file) as fp:
 52 |                     rdata = json.load(fp)
 53 | 
 54 |                 for each in rdata :
 55 |                     pkt_size= each['packet_length']
 56 |                     if len(pkt_size) < min_flow_len :
 57 |                         continue
 58 |                     x = feature_extractor.feature_extract(pkt_size)
 59 |                     X.append(x)
 60 |                     y.append(label)
 61 |         X = np.array(X)
 62 |         _max =  np.array(min_max._max)
 63 |         _min =  np.array(min_max._min)
 64 |         #归一化
 65 |         X = (X - _min)/(_max - _min)
 66 |         X = X.tolist()
 67 | 
 68 |         X_train = []
 69 |         y_train = []
 70 |         X_valid = []
 71 |         y_valid = []
 72 |         X_test =  []
 73 |         y_test =  []
 74 |         for i in range(len(X)):
 75 |             r = self.rand.uniform(0,1)
 76 |             if r < self.splitrate:
 77 |                 X_test.append(X[i])
 78 |                 y_test.append(y[i])
 79 |             elif r < self.splitrate * (2 - self.splitrate) :
 80 |                 X_valid.append(X[i])
 81 |                 y_valid.append(y[i])
 82 |             else:
 83 |                 X_train.append(X[i])
 84 |                 y_train.append(y[i])
 85 |         os.makedirs(self.data,exist_ok=True)
 86 | 
 87 |         with open(self.data + 'X_train.pkl','wb') as fp:
 88 |             pickle.dump(X_train, fp)
 89 | 
 90 |         with open(self.data + 'y_train.pkl','wb') as fp:
 91 |             pickle.dump(y_train,fp)
 92 | 
 93 |         with open(self.data + 'X_valid.pkl', 'wb') as fp:
 94 |             pickle.dump(X_valid,fp)
 95 | 
 96 |         with open(self.data + 'y_valid.pkl', 'wb') as fp:
 97 |             pickle.dump(y_valid, fp)
 98 | 
 99 |         with open(self.data + 'X_test.pkl', 'wb') as fp :
100 |             pickle.dump(X_test, fp)
101 | 
102 |         with open(self.data + 'y_test.pkl' ,'wb') as fp:
103 |             pickle.dump(y_test, fp)
104 | 
105 |     def load_data(self):
106 |         with open(self.data + 'X_train.pkl','rb') as fp:
107 |             X_train = pickle.load(fp)
108 | 
109 |         with open(self.data + 'y_train.pkl','rb') as fp:
110 |             y_train = pickle.load(fp)
111 | 
112 |         with open(self.data + 'X_valid.pkl','rb') as fp:
113 |             X_valid = pickle.load(fp)
114 | 
115 |         with open(self.data + 'y_valid.pkl','rb') as fp:
116 |             y_valid = pickle.load(fp)
117 | 
118 |         with open(self.data + 'X_test.pkl','rb') as fp :
119 |             X_test = pickle.load(fp)
120 | 
121 |         with open(self.data + 'y_test.pkl','rb') as fp:
122 |             y_test = pickle.load(fp)
123 | 
124 |         return np.array(X_train), np.array(y_train), np.array(X_valid), np.array(y_valid), np.array(X_test), np.array(y_test)
125 | 
126 |     def train(self):
127 |         X_train, y_train, X_valid, y_valid, X_test, y_test =  self.load_data()
128 |         lgb_train = lgb.Dataset(data=X_train,label=y_train)
129 |         lgb_eval = lgb.Dataset(data=X_valid,label=y_valid)
130 | 
131 |         hyper_params['num_class'] = self.num_classes()
132 |         gbm = lgb.train(params=hyper_params,
133 |                         train_set=lgb_train,
134 |                         valid_sets=lgb_eval,
135 |                         num_boost_round=50,
136 |                         early_stopping_rounds=5)
137 |         #save model
138 |         try:
139 |             gbm.save_model(self.model)
140 |         except BaseException as exp:
141 |             pass
142 |         logit = gbm.predict(data=X_test)
143 |         label_predict = list(map(lambda x : np.argmax(x),logit))
144 | 
145 |         accuracy = accuracy_score(y_test,label_predict)
146 |         print('[Appscanner Test on {0} accuracy:{1}]'.format(self.dataset,accuracy))
147 | 
148 |     def test(self):
149 |         X_train, y_train, X_valid, y_valid, X_test, y_test = self.load_data()
150 |         #load model
151 |         try:
152 |             gbm = lgb.Booster(model_file= self.model)
153 |         except BaseException as exp:
154 |             raise exp
155 |         logit = gbm.predict(data=X_test)
156 |         label_predict = list(map(lambda x : np.argmax(x),logit))
157 | 
158 |         accuracy = accuracy_score(y_test,label_predict)
159 |         report = classification_report(y_true=y_test,y_pred=label_predict)
160 | 
161 |         print("[Appscanner] Test on {0}, accuracy is {1}. ".format(self.dataset,accuracy))
162 |         print(report)
163 | 
164 | if __name__ == '__main__':
165 |     appscanner = model('website113', 128, 0.1)
166 |     #appscanner.parser_raw_data()
167 |     appscanner.train()
168 |     appscanner.test()
169 | 


--------------------------------------------------------------------------------