├── .gitattributes ├── README.md ├── aragon.csv ├── bancor.csv ├── canwork-1.csv ├── canwork-2.csv ├── chainy.csv ├── cryptopepes.csv ├── eth_town.csv ├── etheremon.csv ├── idex.csv ├── joyso.csv ├── kitty.csv ├── lordless.csv ├── makerdao.csv ├── matchpool.csv ├── ono-1.csv ├── ono-2.csv ├── originprotocol.csv └── parseFlow2Graph.py /.gitattributes: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:da82a15e13b7163aef0def74f6e4e6fc5d1531269562e557084403ff032e62fb 3 | size 39 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Traffic Data of DApps 2 | 《Accurate Decentralized Application Identification via Encrypted Traffic Analysis Using Graph Neural Networks》 3 | 4 | 5 | 《Encrypted Traffic Classification of Decentralized Applications on Ethereum Using Feature Fusion》 6 | 7 | 8 | "Traffic Interaction Graph (TIG) " implementation please refer to parseFlow2Graph.py 9 | 10 | #Author: Jinpeng Zhang 11 | #Email: zhangjinpeng2117@foxmail.com (zhangjinpeng@bit.edu.cn is invalid due to graduation) 12 | 13 | version https://git-lfs.github.com/spec/v1 14 | oid sha256:b4b2421bf8e5e6476b15bfe377a7e206e120a42eeb2e67ed9691d47fef482561 15 | size 16 16 | 17 | 18 | @article{DBLP:journals/tifs/ShenZZXD21, 19 | author = {Meng Shen and 20 | Jinpeng Zhang and 21 | Liehuang Zhu and 22 | Ke Xu and 23 | Xiaojiang Du}, 24 | title = {Accurate Decentralized Application Identification via Encrypted Traffic 25 | Analysis Using Graph Neural Networks}, 26 | journal = {{IEEE} Trans. Inf. Forensics Secur.}, 27 | volume = {16}, 28 | pages = {2367--2380}, 29 | year = {2021}, 30 | doi = {10.1109/TIFS.2021.3050608}, 31 | timestamp = {Tue, 02 Mar 2021 11:26:04 +0100} 32 | } 33 | 34 | 35 | @inproceedings{10.1145/3326285.3329053, 36 | author = {Shen, Meng and Zhang, Jinpeng and Zhu, Liehuang and Xu, Ke and Du, Xiaojiang and Liu, Yiting}, 37 | title = {Encrypted Traffic Classification of Decentralized Applications on Ethereum Using Feature Fusion}, 38 | year = {2019}, 39 | isbn = {9781450367783}, 40 | publisher = {Association for Computing Machinery}, 41 | address = {New York, NY, USA}, 42 | doi = {10.1145/3326285.3329053}, 43 | booktitle = {Proceedings of the International Symposium on Quality of Service}, 44 | articleno = {18}, 45 | numpages = {10}, 46 | location = {Phoenix, Arizona}, 47 | series = {IWQoS '19} 48 | } 49 | 50 | "Traffic Interaction Graph (TIG) " implementation please refer to parseFlow2Graph.py 51 | -------------------------------------------------------------------------------- /aragon.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:14ff70357c6b9730c28db2ba0e8af96e8d9e62b6e1531959e01396db1439f637 3 | size 15438244 4 | -------------------------------------------------------------------------------- /bancor.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:05912dac30a691b18dd9af5f35355bdb8d9437c575750b9363f616c621df8c20 3 | size 55687453 4 | -------------------------------------------------------------------------------- /canwork-1.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e6e4d51b67a1535bc089711f720224be0125ad44c419e650447aabde28ce7a55 3 | size 75350353 4 | -------------------------------------------------------------------------------- /canwork-2.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9b58612f1feabdf568e907c8571d078233cfe3632438e2456f38a532bb792f7a 3 | size 82923857 4 | -------------------------------------------------------------------------------- /chainy.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:de889e69b0f5d5781a39c4e54cf86de00b18c36f00b37d5996f98642d26e795e 3 | size 70084074 4 | -------------------------------------------------------------------------------- /cryptopepes.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ef05374e2ff6e3af2970a11ffb8da8e41bd84cf70db7af5d5d07a3f7fd7e514f 3 | size 55217169 4 | -------------------------------------------------------------------------------- /eth_town.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2eabbd5c3c3c4def9d6a8b6f1840153874c0718613acc3844f393fedfbb00858 3 | size 6695844 4 | -------------------------------------------------------------------------------- /etheremon.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:eb97d548c42e085a4e6784fac5fc0487be2a01811e10f100b6e78e5f57cac050 3 | size 20448047 4 | -------------------------------------------------------------------------------- /idex.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a3f29b00449532bc60a78edaef141dae45d71687a0a0521497f3c32b8bcb9de9 3 | size 69859590 4 | -------------------------------------------------------------------------------- /joyso.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:5935ac4a51fb99352e146a18437103454edd32febc09f1650b45431629e03024 3 | size 43315608 4 | -------------------------------------------------------------------------------- /kitty.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:0634a31eed1068c3279500552354e6e9320b090d8e921571f43e517e1e605180 3 | size 56705862 4 | -------------------------------------------------------------------------------- /lordless.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a9dfcd8aca8b4fe831b33e001b8147fec2cf33efec3b4f0aad10798579ca7dfd 3 | size 3654846 4 | -------------------------------------------------------------------------------- /makerdao.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:eec4bfbc6cae4e51032e095794bf5c95e961b2ffe10da59afc526ed78d2cff74 3 | size 17569937 4 | -------------------------------------------------------------------------------- /matchpool.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:ac4f619ae35ec6963ad8098beb3b490a05c513558bd63153493e2f83ae70c0d6 3 | size 67892494 4 | -------------------------------------------------------------------------------- /ono-1.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:9a6ba0408e39578844eca6e97d254b8da40abac841c09b1e0a533eb6b765afbd 3 | size 53571634 4 | -------------------------------------------------------------------------------- /ono-2.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:56fb2e48a7589c5881d69e8fcabe58be9ae127d249b5842a02b80602633157df 3 | size 51552312 4 | -------------------------------------------------------------------------------- /originprotocol.csv: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:2ce4e0d59b8fdc9d7dc033109e714ec9bfbea48a64095c9c2ea18dd7ad16e7c0 3 | size 52225986 4 | -------------------------------------------------------------------------------- /parseFlow2Graph.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @author: Zhang Jinpeng 3 | @created: 2019.12.06 4 | @version: Python3.7 5 | @description: 将流进行图结构编码 6 | @email: zhangjinpeng2117@foxmail.com 7 | ''' 8 | # encoding: utf8 9 | import csv 10 | dappName = {} 11 | 12 | # 抽取下行流量的数据包长度和标志位文本 13 | def extractDownLenandFlagT(readfileName, writefileName): 14 | readfile = open(readfileName, 'r') 15 | writefile = open(writefileName, 'w') 16 | readCSV = csv.reader(readfile) 17 | writeCSV = csv.writer(writefile) 18 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 19 | ifFirstAppData = 0 20 | # 记录流长度 21 | for row in readCSV: 22 | if len(row) == 0: 23 | #writefile.write('\n') 24 | writeCSV.writerow([]) 25 | ifFirstAppData = 0 26 | else: 27 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 28 | if row[4].find('TLS') != -1 and ifFirstAppData == 0: 29 | #writefile.write(row[5] + ',' + row[6] + '\n') 30 | writeCSV.writerow([row[5], row[6]]) 31 | if row[6].find('Application Data') != -1: 32 | ifFirstAppData = 1 33 | readfile.close() 34 | writefile.close() 35 | 36 | 37 | # 抽取下行流量的数据包长度和标志位图结构 38 | def extractDownLenandFlagG(readfileName, writefileName): 39 | # 获取dapp名称 40 | dappNameTem = readfileName.split('/')[2].split('-')[0] 41 | print(dappNameTem) 42 | if not dappNameTem in dappName: 43 | mapped = len(dappName) 44 | dappName[dappNameTem] = mapped 45 | print(dappName) 46 | readfile = open(readfileName, 'r') 47 | readCSV = csv.reader(readfile) 48 | writeTxt = open(writefileName, 'w') 49 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 50 | ifFirstAppData = 0 51 | flowCount = 0 # 流数量 52 | flowLen = [] # 记录流长度,流里边的数据包 53 | for row in readCSV: 54 | if len(row) == 0: 55 | ifFirstAppData = 0 56 | #print(flowLen) 57 | writeTxt.write(str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 58 | for i in range(len(flowLen)): 59 | if i == 0: 60 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 61 | elif i == len(flowLen)-1: 62 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen)-1-1) + '\n') 63 | else: 64 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i-1) + ' ' + str(i+1) + '\n') 65 | flowLen = [] 66 | flowCount += 1 67 | else: 68 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 69 | if row[4].find('TLS') != -1 and ifFirstAppData == 0: 70 | #writefile.write(row[5] + ',' + row[6] + '\n') 71 | flowLen.append(row[5]) 72 | if row[6].find('Application Data') != -1: 73 | ifFirstAppData = 1 74 | readfile.close() 75 | writeTxt.close() 76 | #最后将流的数量写入第一行 77 | with open(writefileName, 'r+') as f: 78 | content = f.read() 79 | f.seek(0, 0) 80 | f.write(str(flowCount) + '\n' + content) 81 | 82 | 83 | # 抽取一个文件夹所有流量数据的下行流量的数据包长度和标志位图结构 84 | def extractDownLenandFlagGFlist(readfilepath, writefileName): 85 | writeTxt = open(writefileName, 'w') 86 | flowCount = 0 # 流数量 87 | import os 88 | fileList = os.listdir(readfilepath) 89 | for readfileName in fileList: 90 | # 获取dapp名称 91 | dappNameTem = readfileName.split('-')[0] 92 | print(dappNameTem) 93 | if not dappNameTem in dappName: 94 | mapped = len(dappName) 95 | dappName[dappNameTem] = mapped 96 | print(dappName) 97 | try: 98 | readfile = open(readfilepath+readfileName, 'r', encoding='UTF-8') 99 | readCSV = csv.reader(readfile) 100 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 101 | ifFirstAppData = 0 102 | flowLen = [] # 记录流长度,流里边的数据包 103 | for row in readCSV: 104 | #print(row) 105 | if len(row) == 0: 106 | ifFirstAppData = 0 107 | #print(flowLen) 108 | if (len(flowLen) > 1 and len(flowLen) < 6): 109 | writeTxt.write(str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 110 | for i in range(len(flowLen)): 111 | if i == 0: 112 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 113 | elif i == len(flowLen)-1: 114 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen)-1-1) + '\n') 115 | else: 116 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i-1) + ' ' + str(i+1) + '\n') 117 | flowLen = [] 118 | flowCount += 1 119 | else: 120 | flowLen = [] 121 | else: 122 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 123 | if row[4].find('TLS') != -1 and ifFirstAppData == 0: 124 | #writefile.write(row[5] + ',' + row[6] + '\n') 125 | flowLen.append(row[5]) 126 | if row[6].find('Application Data') != -1: 127 | ifFirstAppData = 1 128 | readfile.close() 129 | except: 130 | readfile = open(readfilepath + readfileName, 'r') 131 | readCSV = csv.reader(readfile) 132 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 133 | ifFirstAppData = 0 134 | flowLen = [] # 记录流长度,流里边的数据包 135 | for row in readCSV: 136 | #print(row) 137 | if len(row) == 0: 138 | ifFirstAppData = 0 139 | # print(flowLen) 140 | if (len(flowLen) > 1 and len(flowLen) < 6): 141 | writeTxt.write(str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 142 | for i in range(len(flowLen)): 143 | if i == 0: 144 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 145 | elif i == len(flowLen) - 1: 146 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen) - 1 - 1) + '\n') 147 | else: 148 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 149 | flowLen = [] 150 | flowCount += 1 151 | else: 152 | flowLen = [] 153 | else: 154 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 155 | if row[4].find('TLS') != -1 and ifFirstAppData == 0: 156 | # writefile.write(row[5] + ',' + row[6] + '\n') 157 | flowLen.append(row[5]) 158 | if row[6].find('Application Data') != -1: 159 | ifFirstAppData = 1 160 | readfile.close() 161 | writeTxt.close() 162 | #最后将流的数量写入第一行 163 | with open(writefileName, 'r+') as f: 164 | content = f.read() 165 | f.seek(0, 0) 166 | f.write(str(flowCount) + '\n' + content) 167 | 168 | 169 | # 抽取一个文件夹所有流量数据的下行流量的数据包长度和标志位图结构 170 | def extractDownLenandFlagNF(readfilepath, writefileName): 171 | packetUse = 18 172 | writeTxt = open(writefileName, 'w') 173 | flowCount = 0 # 流数量 174 | import os 175 | fileList = os.listdir(readfilepath) 176 | for readfileName in fileList: 177 | # 获取dapp名称 178 | dappNameTem = readfileName.split('-')[0] 179 | print(dappNameTem) 180 | if not dappNameTem in dappName: 181 | mapped = len(dappName) 182 | dappName[dappNameTem] = mapped 183 | print(dappName) 184 | # 将dappName写入文件 185 | import json 186 | json_str = json.dumps(dappName) 187 | with open('name_label.json', 'w') as json_file: 188 | json_file.write(json_str) 189 | 190 | try: 191 | readfile = open(readfilepath+readfileName, 'r', encoding='UTF-8') 192 | readCSV = csv.reader(readfile) 193 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 194 | flowLen = [] # 记录流长度,流里边的数据包 195 | for row in readCSV: 196 | #print(row) 197 | if len(row) == 0: 198 | if len(flowLen) > 1: 199 | if len(flowLen) <= packetUse: 200 | writeTxt.write( 201 | str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 202 | for i in range(len(flowLen)): 203 | if i == 0: 204 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 205 | elif i == len(flowLen)-1: 206 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen)-1-1) + '\n') 207 | else: 208 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i-1) + ' ' + str(i+1) + '\n') 209 | else: 210 | writeTxt.write( 211 | str(packetUse) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 212 | for i in range(packetUse): 213 | if i == 0: 214 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 215 | elif i == packetUse-1: 216 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(packetUse-1-1) + '\n') 217 | else: 218 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i-1) + ' ' + str(i+1) + '\n') 219 | flowLen = [] 220 | flowCount += 1 221 | else: 222 | flowLen = [] 223 | else: 224 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 225 | if row[4].find('TLS') != -1 : 226 | flowLen.append(row[5]) 227 | readfile.close() 228 | except: 229 | readfile = open(readfilepath + readfileName, 'r') 230 | readCSV = csv.reader(readfile) 231 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 232 | flowLen = [] # 记录流长度,流里边的数据包 233 | for row in readCSV: 234 | #print(row) 235 | if len(row) == 0: 236 | if len(flowLen) > 1: 237 | if len(flowLen) <= packetUse: 238 | writeTxt.write( 239 | str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 240 | for i in range(len(flowLen)): 241 | if i == 0: 242 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 243 | elif i == len(flowLen) - 1: 244 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen) - 1 - 1) + '\n') 245 | else: 246 | writeTxt.write( 247 | flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 248 | else: 249 | writeTxt.write(str(packetUse) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 250 | for i in range(packetUse): 251 | if i == 0: 252 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 253 | elif i == packetUse-1: 254 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(packetUse - 1 - 1) + '\n') 255 | else: 256 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 257 | flowLen = [] 258 | flowCount += 1 259 | else: 260 | flowLen = [] 261 | else: 262 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 263 | if row[4].find('TLS') != -1: 264 | # writefile.write(row[5] + ',' + row[6] + '\n') 265 | flowLen.append(row[5]) 266 | readfile.close() 267 | writeTxt.close() 268 | #最后将流的数量写入第一行 269 | with open(writefileName, 'r+') as f: 270 | content = f.read() 271 | f.seek(0, 0) 272 | f.write(str(flowCount) + '\n' + content) 273 | 274 | 275 | # 抽取一个文件夹所有流量数据的双向流量的数据包长度和标志位图结构-图结构风筝头-风筝头圆形(环状) 276 | def extractDownLenandFlagBi(readfilepath, writefileName): 277 | packetUse = 20 278 | writeTxt = open(writefileName, 'w') 279 | flowCount = 0 # 流数量 280 | import os 281 | fileList = os.listdir(readfilepath) 282 | for readfileName in fileList: 283 | # 获取dapp名称 284 | dappNameTem = readfileName.split('-')[0] 285 | print(dappNameTem) 286 | if not dappNameTem in dappName: 287 | mapped = len(dappName) 288 | dappName[dappNameTem] = mapped 289 | print(dappName) 290 | # 将dappName写入文件 291 | import json 292 | json_str = json.dumps(dappName) 293 | with open('name_label.json', 'w') as json_file: 294 | json_file.write(json_str) 295 | 296 | try: 297 | readfile = open(readfilepath+readfileName, 'r', encoding='UTF-8') 298 | readCSV = csv.reader(readfile) 299 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 300 | flowLen = [] # 记录流长度,流里边的数据包 301 | firstAppIndex = None # 第一个下行ApplicationData数据包位置 302 | for row in readCSV: 303 | #print(row) 304 | if len(row) == 0: 305 | if len(flowLen) > 5: 306 | #print(flowLen) 307 | if len(flowLen) <= packetUse: 308 | writeTxt.write( 309 | str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 310 | for i in range(len(flowLen)): 311 | if i == 0: 312 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 313 | elif i == len(flowLen)-1: 314 | if firstAppIndex != None and i == firstAppIndex: 315 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(len(flowLen) - 1 - 1) + ' 0\n') 316 | else: 317 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen) - 1 - 1) + '\n') 318 | elif firstAppIndex != None and i == firstAppIndex: 319 | writeTxt.write(flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' 0'+'\n') 320 | #print() 321 | else: 322 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i-1) + ' ' + str(i+1) + '\n') 323 | else: 324 | writeTxt.write( 325 | str(packetUse) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 326 | for i in range(packetUse): 327 | if i == 0: 328 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 329 | elif i == packetUse-1: 330 | if firstAppIndex != None and i == firstAppIndex: 331 | writeTxt.write( 332 | flowLen[i] + ' ' + '2' + ' ' + str(packetUse - 1 - 1) + ' 0\n') 333 | else: 334 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(packetUse - 1 - 1) + '\n') 335 | #writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(packetUse-1-1) + '\n') 336 | elif firstAppIndex != None and i == firstAppIndex: 337 | writeTxt.write(flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' 0'+'\n') 338 | #print() 339 | else: 340 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i-1) + ' ' + str(i+1) + '\n') 341 | flowLen = [] 342 | flowCount += 1 343 | else: 344 | flowLen = [] 345 | else: 346 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 347 | if row[4].find('TLS') != -1: 348 | flowLen.append(row[5]) 349 | if row[6].find('Application Data') != -1 and firstAppIndex==None: 350 | firstAppIndex = len(flowLen) - 1 351 | else: 352 | if row[4].find('TLS') != -1: 353 | #print(row[5]) 354 | flowLen.append(str(int(row[5]) * -1)) 355 | readfile.close() 356 | except: 357 | readfile = open(readfilepath + readfileName, 'r') 358 | readCSV = csv.reader(readfile) 359 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 360 | flowLen = [] # 记录流长度,流里边的数据包 361 | firstAppIndex = None 362 | for row in readCSV: 363 | #print(row) 364 | if len(row) == 0: 365 | if len(flowLen) > 5: 366 | if len(flowLen) <= packetUse: 367 | writeTxt.write( 368 | str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 369 | for i in range(len(flowLen)): 370 | if i == 0: 371 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 372 | elif i == len(flowLen) - 1: 373 | if firstAppIndex != None and i == firstAppIndex: 374 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(len(flowLen) - 1 - 1) + ' 0\n') 375 | else: 376 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen) - 1 - 1) + '\n') 377 | elif firstAppIndex != None and i == firstAppIndex: 378 | writeTxt.write(flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' 0'+'\n') 379 | #print() 380 | else: 381 | writeTxt.write( 382 | flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 383 | else: 384 | writeTxt.write(str(packetUse) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 385 | for i in range(packetUse): 386 | if i == 0: 387 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 388 | elif i == packetUse-1: 389 | if firstAppIndex != None and i == firstAppIndex: 390 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(packetUse - 1 - 1) + ' 0\n') 391 | else: 392 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(packetUse - 1 - 1) + '\n') 393 | elif firstAppIndex != None and i == firstAppIndex: 394 | writeTxt.write(flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' 0'+'\n') 395 | #print() 396 | else: 397 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 398 | flowLen = [] 399 | flowCount += 1 400 | else: 401 | flowLen = [] 402 | else: 403 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 404 | if row[4].find('TLS') != -1: 405 | # writefile.write(row[5] + ',' + row[6] + '\n') 406 | flowLen.append(row[5]) 407 | if row[6].find('Application Data') != -1 and firstAppIndex==None: 408 | firstAppIndex = len(flowLen) - 1 409 | else: 410 | if row[4].find('TLS') != -1: 411 | flowLen.append(str(int(row[5]) * -1)) 412 | readfile.close() 413 | writeTxt.close() 414 | #最后将流的数量写入第一行 415 | with open(writefileName, 'r+') as f: 416 | content = f.read() 417 | f.seek(0, 0) 418 | f.write(str(flowCount) + '\n' + content) 419 | 420 | 421 | # 抽取一个文件夹所有流量数据的双向流量的数据包长度和标志位图结构-图结构风筝头-风筝头圆形(环里边有网) 422 | def extractDownLenandFlagBiDuo(readfilepath, writefileName): 423 | packetUse = 20 424 | writeTxt = open(writefileName, 'w') 425 | flowCount = 0 # 流数量 426 | import os 427 | fileList = os.listdir(readfilepath) 428 | for readfileName in fileList: 429 | print(readfileName) 430 | # 获取dapp名称 431 | dappNameTem = readfileName.split('-')[0] 432 | print(dappNameTem) 433 | if not dappNameTem in dappName: 434 | mapped = len(dappName) 435 | dappName[dappNameTem] = mapped 436 | print(dappName) 437 | # 将dappName写入文件 438 | import json 439 | json_str = json.dumps(dappName) 440 | with open('name_label.json', 'w') as json_file: 441 | json_file.write(json_str) 442 | 443 | try: 444 | readfile = open(readfilepath+readfileName, 'r', encoding='UTF-8') 445 | readCSV = csv.reader(readfile) 446 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 447 | flowLen = [] # 记录流长度,流里边的数据包 448 | firstAppIndex = None # 第一个下行ApplicationData数据包位置 449 | for row in readCSV: 450 | #print(row) 451 | if len(row) == 0: 452 | if len(flowLen) > 5: # 四个数据包以上才作为一条流 453 | #print(flowLen) 454 | if len(flowLen) <= packetUse: # 实际数据包数量不大于用到的数据包数量 455 | writeTxt.write(str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 456 | for i in range(len(flowLen)): 457 | if firstAppIndex != None and firstAppIndex >= 1 and firstAppIndex <= packetUse-1: 458 | if i == 0: 459 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + '1 ' + str(firstAppIndex) + '\n') 460 | elif i < firstAppIndex: 461 | writeTxt.write(flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' ' + str(firstAppIndex) + '\n') 462 | elif i == firstAppIndex: 463 | if i == len(flowLen) - 1: 464 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' 0' + '\n') 465 | else: 466 | writeTxt.write(flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' 0' + '\n') 467 | elif i == len(flowLen) - 1: 468 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen) - 1 - 1) + '\n') 469 | else: 470 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 471 | else: 472 | if i == 0: 473 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 474 | elif i == len(flowLen) - 1: 475 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen) - 1 - 1) + '\n') 476 | else: 477 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 478 | else: 479 | writeTxt.write( 480 | str(packetUse) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 481 | for i in range(packetUse): 482 | if firstAppIndex != None and firstAppIndex <= packetUse-1: 483 | if i == 0: 484 | writeTxt.write(flowLen[i] + ' ' + '2 1 ' + str(firstAppIndex) + '\n') 485 | elif i < firstAppIndex: 486 | writeTxt.write(flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' ' + str(firstAppIndex) + '\n') 487 | elif i == firstAppIndex: 488 | if i == packetUse - 1: 489 | writeTxt.write(flowLen[i] + ' ' + '2 ' + str(i - 1) + ' 0' + '\n') 490 | else: 491 | writeTxt.write(flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' 0' + '\n') 492 | elif i == packetUse - 1: 493 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(packetUse - 1 - 1) + '\n') 494 | else: 495 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 496 | else: 497 | if i == 0: 498 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 499 | elif i == packetUse - 1: 500 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(packetUse - 1 - 1) + '\n') 501 | else: 502 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 503 | firstAppIndex = None 504 | flowLen = [] 505 | flowCount += 1 506 | else: 507 | flowLen = [] 508 | else: 509 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 510 | if row[4].find('TLS') != -1: 511 | flowLen.append(row[5]) 512 | if row[6].find('Application Data') != -1 and firstAppIndex==None: 513 | firstAppIndex = len(flowLen) - 1 514 | else: 515 | if row[4].find('TLS') != -1: 516 | #print(row[5]) 517 | flowLen.append(str(int(row[5]) * -1)) 518 | readfile.close() 519 | except: 520 | readfile = open(readfilepath + readfileName, 'r') 521 | readCSV = csv.reader(readfile) 522 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 523 | flowLen = [] # 记录流长度,流里边的数据包 524 | firstAppIndex = None 525 | for row in readCSV: 526 | #print(row) 527 | if len(row) == 0: 528 | if len(flowLen) > 5: # 四个数据包以上才作为一条流 529 | if len(flowLen) <= packetUse: 530 | writeTxt.write( 531 | str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 532 | for i in range(len(flowLen)): 533 | if firstAppIndex != None and firstAppIndex >= 1 and firstAppIndex <= packetUse-1: 534 | if i == 0: 535 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + '1 ' + str(firstAppIndex) + '\n') 536 | elif i < firstAppIndex: 537 | writeTxt.write( 538 | flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' ' + str( 539 | firstAppIndex) + '\n') 540 | elif i == firstAppIndex: 541 | if i == len(flowLen) - 1: 542 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' 0' + '\n') 543 | else: 544 | writeTxt.write(flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str( 545 | i + 1) + ' 0' + '\n') 546 | elif i == len(flowLen) - 1: 547 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen) - 1 - 1) + '\n') 548 | else: 549 | writeTxt.write( 550 | flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 551 | else: 552 | if i == 0: 553 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 554 | elif i == len(flowLen) - 1: 555 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen) - 1 - 1) + '\n') 556 | else: 557 | writeTxt.write( 558 | flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 559 | else: 560 | writeTxt.write( 561 | str(packetUse) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 562 | for i in range(packetUse): 563 | if firstAppIndex != None and firstAppIndex <= packetUse-1: 564 | if i == 0: 565 | writeTxt.write(flowLen[i] + ' ' + '2 1 ' + str(firstAppIndex) + '\n') 566 | elif i < firstAppIndex: 567 | writeTxt.write( 568 | flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' ' + str( 569 | firstAppIndex) + '\n') 570 | elif i == firstAppIndex: 571 | if i == packetUse - 1: 572 | writeTxt.write(flowLen[i] + ' ' + '2 ' + str(i - 1) + ' 0' + '\n') 573 | else: 574 | writeTxt.write(flowLen[i] + ' ' + '3' + ' ' + str(i - 1) + ' ' + str(i + 1) + ' 0' + '\n') 575 | elif i == packetUse - 1: 576 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(packetUse - 1 - 1) + '\n') 577 | else: 578 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 579 | else: 580 | if i == 0: 581 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 582 | elif i == packetUse - 1: 583 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(packetUse - 1 - 1) + '\n') 584 | else: 585 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 586 | firstAppIndex = None 587 | flowLen = [] 588 | flowCount += 1 589 | else: 590 | flowLen = [] 591 | else: 592 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 593 | if row[4].find('TLS') != -1: 594 | # writefile.write(row[5] + ',' + row[6] + '\n') 595 | flowLen.append(row[5]) 596 | if row[6].find('Application Data') != -1 and firstAppIndex==None: 597 | firstAppIndex = len(flowLen) - 1 598 | else: 599 | if row[4].find('TLS') != -1: 600 | flowLen.append(str(int(row[5]) * -1)) 601 | readfile.close() 602 | writeTxt.close() 603 | #最后将流的数量写入第一行 604 | with open(writefileName, 'r+') as f: 605 | content = f.read() 606 | f.seek(0, 0) 607 | f.write(str(flowCount) + '\n' + content) 608 | 609 | 610 | # 抽取一个文件夹所有流量数据的双向流量的数据包长度图结构-图结构梯形风筝结构 611 | def extractDownLenandFlagBiTixing(readfilepath, writefileName): 612 | packetUse = 25 613 | writeTxt = open(writefileName, 'w') 614 | flowCount = 0 # 流数量 615 | import os 616 | fileList = os.listdir(readfilepath) 617 | for readfileName in fileList: 618 | print(readfileName) 619 | # 获取dapp名称 620 | dappNameTem = readfileName.split('-')[0] 621 | print(dappNameTem) 622 | if not dappNameTem in dappName: 623 | mapped = len(dappName) 624 | dappName[dappNameTem] = mapped 625 | print(dappName) 626 | # 将dappName写入文件 627 | import json 628 | json_str = json.dumps(dappName) 629 | with open('name_label.json', 'w') as json_file: 630 | json_file.write(json_str) 631 | 632 | try: 633 | readfile = open(readfilepath+readfileName, 'r', encoding='UTF-8') 634 | readCSV = csv.reader(readfile) 635 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 636 | flowLen = [] # 记录流长度,流里边的数据包 637 | firstAppIndex = None # 第一个下行ApplicationData数据包位置 638 | for row in readCSV: 639 | #print(row) 640 | if len(row) == 0: 641 | if len(flowLen) > 5 : # 四个数据包以上才作为一条流 642 | if len(flowLen) > packetUse: 643 | flowLen = flowLen[:packetUse] 644 | #print(flowLen) 645 | graphItems = genTixingGaphitems(flowLen) 646 | count = 0 647 | if len(graphItems) == 1: # 一条流中只有单向流量 648 | #print(flowLen) 649 | print('Hello') 650 | ''' 651 | writeTxt.write(str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 652 | for i in range(len(flowLen)): 653 | if i == 0: 654 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 655 | elif i == len(flowLen) - 1: 656 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen) - 1 - 1) + '\n') 657 | else: 658 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 659 | flowCount += 1 660 | ''' 661 | else: 662 | writeTxt.write(str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 663 | for level in range(len(graphItems)): 664 | if level == 0: # 第一层 665 | if len(graphItems[level]) == 1: 666 | if len(graphItems[level+1]) == 1: 667 | writeTxt.write(graphItems[level][0] + ' 1 ' + str(count + 1) + '\n') 668 | count += 1 669 | else: 670 | writeTxt.write(graphItems[level][0] + ' 2 ' + str(count + 1) + ' ' + str(count + len(graphItems[level+1])) + '\n') 671 | count += 1 672 | else: 673 | for i in range(len(graphItems[level])): 674 | if i == 0: 675 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count + 1) + ' ' + str(len(graphItems[level])) + '\n') 676 | count += 1 677 | elif i == len(graphItems[level]) - 1: 678 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + len(graphItems[level+1])) + '\n') 679 | count += 1 680 | else: 681 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 682 | count += 1 683 | elif level == len(graphItems) - 1: # 最后一层 684 | if len(graphItems[level]) == 1: 685 | if len(graphItems[level-1]) == 1: 686 | writeTxt.write(graphItems[level][0] + ' 1 ' + str(count - 1) + '\n') 687 | count += 1 688 | else: 689 | writeTxt.write(graphItems[level][0] + ' 2 ' + str(count - len(graphItems[level-1])) + ' ' + str(count - 1) + '\n') 690 | else: 691 | for i in range(len(graphItems[level])): 692 | if i == 0: 693 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - len(graphItems[level-1])) + ' ' + str(count + 1) + '\n') 694 | count += 1 695 | elif i == len(graphItems[level]) - 1: 696 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count - len(graphItems[level])) + '\n') 697 | count += 1 698 | else: 699 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 700 | count += 1 701 | else: # 中间层 702 | if len(graphItems[level]) == 1: # 中间层节点数为1 703 | if len(graphItems[level-1]) == 1 and len(graphItems[level+1]) == 1: 704 | writeTxt.write(graphItems[level][0] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 705 | count += 1 706 | elif len(graphItems[level-1]) == 1 and len(graphItems[level+1]) != 1: 707 | writeTxt.write(graphItems[level][0] + ' 3 ' + str(count - 1) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level+1])) + '\n') 708 | count += 1 709 | elif len(graphItems[level-1]) != 1 and len(graphItems[level+1]) == 1: 710 | writeTxt.write(graphItems[level][0] + ' 3 ' + str(count - len(graphItems[level-1])) + ' ' + str(count - 1) + ' ' + str(count + 1) + '\n') 711 | count += 1 712 | else: 713 | writeTxt.write(graphItems[level][0] + ' 4 ' + str(count - len(graphItems[level-1])) + ' ' + str(count - 1) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level+1])) + '\n') 714 | count += 1 715 | else: # 中间层节点数不为1 716 | if len(graphItems[level - 1]) == 1 and len(graphItems[level + 1]) == 1: 717 | for i in range(len(graphItems[level])): 718 | if i == 0: 719 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - 1) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level])) +'\n') 720 | count += 1 721 | elif i == len(graphItems[level]) - 1: 722 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - len(graphItems[level])) + ' ' + str(count - 1) + ' ' + str(count + 1) + '\n') 723 | count += 1 724 | else: 725 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 726 | count += 1 727 | elif len(graphItems[level - 1]) == 1 and len(graphItems[level + 1]) != 1: 728 | for i in range(len(graphItems[level])): 729 | if i == 0: 730 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - 1) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level])) +'\n') 731 | count += 1 732 | elif i == len(graphItems[level]) - 1: 733 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - 1) + ' ' + str(count - len(graphItems[level])) + ' ' + str(count + len(graphItems[level+1])) + '\n') 734 | count += 1 735 | else: 736 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 737 | count += 1 738 | elif len(graphItems[level-1]) != 1 and len(graphItems[level+1]) == 1: 739 | for i in range(len(graphItems[level])): 740 | if i == 0: 741 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - len(graphItems[level-1])) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level])) +'\n') 742 | count += 1 743 | elif i == len(graphItems[level]) - 1: 744 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - len(graphItems[level])) + ' ' + str(count - 1) + ' ' + str(count + 1) + '\n') 745 | count += 1 746 | else: 747 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 748 | count += 1 749 | else: 750 | for i in range(len(graphItems[level])): 751 | if i == 0: 752 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - len(graphItems[level-1])) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level])) +'\n') 753 | count += 1 754 | elif i == len(graphItems[level]) - 1: 755 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - len(graphItems[level])) + ' ' + str(count - 1) + ' ' + str(count + len(graphItems[level+1])) + '\n') 756 | count += 1 757 | else: 758 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 759 | count += 1 760 | #print(graphItems) 761 | flowCount += 1 762 | firstAppIndex = None 763 | flowLen = [] 764 | 765 | else: 766 | flowLen = [] 767 | else: 768 | #if row[3].find('172.217') != -1: 769 | # firstIP = '31.' 770 | #else: 771 | firstIP = row[3][:4] 772 | if firstIP.find('172.') != -1 or firstIP.find('10.') != -1 or firstIP.find('192.') != -1: 773 | if row[4].find('TLS') != -1 or row[4].find('SSL') != -1: 774 | flowLen.append(row[5]) 775 | if row[6].find('Application Data') != -1 and firstAppIndex == None: 776 | firstAppIndex = len(flowLen) - 1 777 | else: 778 | if int(row[5]) != 66 and int(row[5])!= 54 and int(row[5])!= 60: 779 | flowLen.append(str(int(row[5]) * -1)) 780 | ''' 781 | if row[4].find('TLS') != -1 or row[4].find('SSL') != -1: 782 | # print(row[5]) 783 | flowLen.append(str(int(row[5]) * -1)) 784 | ''' 785 | #print(row[3][:4]) 786 | ''' 787 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 788 | if row[4].find('TLS') != -1 or row[4].find('SSL') != -1: 789 | flowLen.append(row[5]) 790 | if row[6].find('Application Data') != -1 and firstAppIndex==None: 791 | firstAppIndex = len(flowLen) - 1 792 | else: 793 | if row[4].find('TLS') != -1 or row[4].find('SSL') != -1: 794 | #print(row[5]) 795 | flowLen.append(str(int(row[5]) * -1)) 796 | ''' 797 | readfile.close() 798 | except: 799 | readfile = open(readfilepath + readfileName, 'r') 800 | readCSV = csv.reader(readfile) 801 | # row = ['13576', '423.009129', '172.18.94.45', '104.24.3.10', 'TCP', '54', '53712 → 443 [ACK] Seq=1429 Ack=783934 Win=253952 Len=0'] 802 | flowLen = [] # 记录流长度,流里边的数据包 803 | firstAppIndex = None 804 | for row in readCSV: 805 | #print(row) 806 | if len(row) == 0: 807 | if len(flowLen) > 5: # 四个数据包以上才作为一条流 808 | if len(flowLen) > packetUse: 809 | flowLen = flowLen[:packetUse] 810 | #print(flowLen) 811 | graphItems = genTixingGaphitems(flowLen) 812 | count = 0 813 | if len(graphItems) == 1: 814 | #print(flowLen) 815 | print('Hello') 816 | ''' 817 | writeTxt.write(str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 818 | for i in range(len(flowLen)): 819 | if i == 0: 820 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + '1' + '\n') 821 | elif i == len(flowLen) - 1: 822 | writeTxt.write(flowLen[i] + ' ' + '1' + ' ' + str(len(flowLen) - 1 - 1) + '\n') 823 | else: 824 | writeTxt.write(flowLen[i] + ' ' + '2' + ' ' + str(i - 1) + ' ' + str(i + 1) + '\n') 825 | flowCount += 1 826 | ''' 827 | else: 828 | writeTxt.write(str(len(flowLen)) + ' ' + str(dappName[dappNameTem]) + '\n') # 写入数据流种数据包个数和类别 829 | for level in range(len(graphItems)): 830 | if level == 0: # 第一层 831 | if len(graphItems[level]) == 1: 832 | if len(graphItems[level+1]) == 1: 833 | writeTxt.write(graphItems[level][0] + ' 1 ' + str(count + 1) + '\n') 834 | count += 1 835 | else: 836 | writeTxt.write(graphItems[level][0] + ' 2 ' + str(count + 1) + ' ' + str(count + len(graphItems[level+1])) + '\n') 837 | count += 1 838 | else: 839 | for i in range(len(graphItems[level])): 840 | if i == 0: 841 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count + 1) + ' ' + str(len(graphItems[level])) + '\n') 842 | count += 1 843 | elif i == len(graphItems[level]) - 1: 844 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + len(graphItems[level+1])) + '\n') 845 | count += 1 846 | else: 847 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 848 | count += 1 849 | elif level == len(graphItems) - 1: # 最后一层 850 | if len(graphItems[level]) == 1: 851 | if len(graphItems[level-1]) == 1: 852 | writeTxt.write(graphItems[level][0] + ' 1 ' + str(count - 1) + '\n') 853 | count += 1 854 | else: 855 | writeTxt.write(graphItems[level][0] + ' 2 ' + str(count - len(graphItems[level-1])) + ' ' + str(count - 1) + '\n') 856 | else: 857 | for i in range(len(graphItems[level])): 858 | if i == 0: 859 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - len(graphItems[level-1])) + ' ' + str(count + 1) + '\n') 860 | count += 1 861 | elif i == len(graphItems[level]) - 1: 862 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count - len(graphItems[level])) + '\n') 863 | count += 1 864 | else: 865 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 866 | count += 1 867 | else: # 中间层 868 | if len(graphItems[level]) == 1: # 中间层节点数为1 869 | if len(graphItems[level-1]) == 1 and len(graphItems[level+1]) == 1: 870 | writeTxt.write(graphItems[level][0] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 871 | count += 1 872 | elif len(graphItems[level-1]) == 1 and len(graphItems[level+1]) != 1: 873 | writeTxt.write(graphItems[level][0] + ' 3 ' + str(count - 1) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level+1])) + '\n') 874 | count += 1 875 | elif len(graphItems[level-1]) != 1 and len(graphItems[level+1]) == 1: 876 | writeTxt.write(graphItems[level][0] + ' 3 ' + str(count - len(graphItems[level-1])) + ' ' + str(count - 1) + ' ' + str(count + 1) + '\n') 877 | count += 1 878 | else: 879 | writeTxt.write(graphItems[level][0] + ' 4 ' + str(count - len(graphItems[level-1])) + ' ' + str(count - 1) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level+1])) + '\n') 880 | count += 1 881 | else: # 中间层节点数不为1 882 | if len(graphItems[level - 1]) == 1 and len(graphItems[level + 1]) == 1: 883 | for i in range(len(graphItems[level])): 884 | if i == 0: 885 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - 1) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level])) +'\n') 886 | count += 1 887 | elif i == len(graphItems[level]) - 1: 888 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - len(graphItems[level])) + ' ' + str(count - 1) + ' ' + str(count + 1) + '\n') 889 | count += 1 890 | else: 891 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 892 | count += 1 893 | elif len(graphItems[level - 1]) == 1 and len(graphItems[level + 1]) != 1: 894 | for i in range(len(graphItems[level])): 895 | if i == 0: 896 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - 1) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level])) +'\n') 897 | count += 1 898 | elif i == len(graphItems[level]) - 1: 899 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - 1) + ' ' + str(count - len(graphItems[level])) + ' ' + str(count + len(graphItems[level+1])) + '\n') 900 | count += 1 901 | else: 902 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 903 | count += 1 904 | elif len(graphItems[level-1]) != 1 and len(graphItems[level+1]) == 1: 905 | for i in range(len(graphItems[level])): 906 | if i == 0: 907 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - len(graphItems[level-1])) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level])) +'\n') 908 | count += 1 909 | elif i == len(graphItems[level]) - 1: 910 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - len(graphItems[level])) + ' ' + str(count - 1) + ' ' + str(count + 1) + '\n') 911 | count += 1 912 | else: 913 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 914 | count += 1 915 | else: 916 | for i in range(len(graphItems[level])): 917 | if i == 0: 918 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - len(graphItems[level-1])) + ' ' + str(count + 1) + ' ' + str(count + len(graphItems[level])) +'\n') 919 | count += 1 920 | elif i == len(graphItems[level]) - 1: 921 | writeTxt.write(graphItems[level][i] + ' 3 ' + str(count - len(graphItems[level])) + ' ' + str(count - 1) + ' ' + str(count + len(graphItems[level+1])) + '\n') 922 | count += 1 923 | else: 924 | writeTxt.write(graphItems[level][i] + ' 2 ' + str(count - 1) + ' ' + str(count + 1) + '\n') 925 | count += 1 926 | #print(graphItems) 927 | flowCount += 1 928 | firstAppIndex = None 929 | flowLen = [] 930 | else: 931 | flowLen = [] 932 | else: 933 | #if row[3].find('172.217') != -1: 934 | # firstIP = '31.' 935 | #else: 936 | firstIP = row[3][:4] 937 | if firstIP.find('172.') != -1 or firstIP.find('10.') != -1 or firstIP.find('192.') != -1: 938 | if row[4].find('TLS') != -1 or row[4].find('SSL') != -1: 939 | flowLen.append(row[5]) 940 | if row[6].find('Application Data') != -1 and firstAppIndex == None: 941 | firstAppIndex = len(flowLen) - 1 942 | else: 943 | if int(row[5]) != 66 and int(row[5])!= 54 and int(row[5])!= 60: 944 | flowLen.append(str(int(row[5]) * -1)) 945 | ''' 946 | if row[4].find('TLS') != -1 or row[4].find('SSL') != -1: 947 | # print(row[5]) 948 | flowLen.append(str(int(row[5]) * -1)) 949 | ''' 950 | ''' 951 | if row[3].find('172.') != -1 or row[3].find('10.') != -1 or row[3].find('192.') != -1: 952 | if row[4].find('TLS') != -1 or row[4].find('SSL') != -1: 953 | # writefile.write(row[5] + ',' + row[6] + '\n') 954 | flowLen.append(row[5]) 955 | if row[6].find('Application Data') != -1 and firstAppIndex==None: 956 | firstAppIndex = len(flowLen) - 1 957 | else: 958 | if row[4].find('TLS') != -1 or row[4].find('SSL') != -1: 959 | flowLen.append(str(int(row[5]) * -1)) 960 | ''' 961 | readfile.close() 962 | writeTxt.close() 963 | #最后将流的数量写入第一行 964 | with open(writefileName, 'r+') as f: 965 | content = f.read() 966 | f.seek(0, 0) 967 | f.write(str(flowCount) + '\n' + content) 968 | 969 | 970 | def genTixingGaphitems(flowLen): 971 | graphItems = [] 972 | graphItemst = [] 973 | for i in range(len(flowLen)): 974 | if i == 0: 975 | graphItemst.append(flowLen[i]) 976 | elif int(flowLen[i]) > 0 and int(flowLen[i - 1]) > 0: 977 | graphItemst.append(flowLen[i]) 978 | elif int(flowLen[i]) < 0 and int(flowLen[i - 1]) < 0: 979 | graphItemst.append(flowLen[i]) 980 | else: 981 | graphItems.append(graphItemst) 982 | graphItemst = [] 983 | graphItemst.append(flowLen[i]) 984 | if len(graphItemst) != 0: 985 | graphItems.append(graphItemst) 986 | return graphItems 987 | 988 | 989 | def main(): 990 | #extractDownLenandFlagNF('data/ALL/', 'feature/DAPP.txt') 991 | extractDownLenandFlagBiTixing('data/ALL/', 'feature/DAPPBI.txt') 992 | #extractDownLenandFlagBi('data/ALL/', 'feature/DAPPBI.txt') 993 | #extractDownLenandFlagBi('data/FFFC/', 'feature/DAPPBIF.txt') 994 | #extractDownLenandFlagBiDuo('data/ALL/', 'feature/DAPPBI.txt') 995 | #extractDownLenandFlagBiDuo('data/TTTT/', 'feature/DAPPBI.txt') 996 | #extractDownLenandFlagGFlist('data/TTTT/', 'feature/DAPP.txt') 997 | 998 | 999 | if __name__=='__main__': 1000 | main() 1001 | --------------------------------------------------------------------------------