├── .gitattributes ├── README.md ├── SySeVR_dependences.zip ├── access_db_operate.py ├── all.py ├── complete_PDG.py ├── data_preprocess.py ├── extract_df.py ├── general_op.py ├── get_cfg_relation.py ├── make_label.py ├── points_get.py └── slice_op.py /.gitattributes: -------------------------------------------------------------------------------- 1 | SySeVR_dependences.zip filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Environments 2 | 3 | 4 | ### System 5 | 6 | Ubuntu 18.04 (Tested) 7 | 8 | ### Step 1 9 | 10 | - Joern 0.3.1 11 | - JDK 1.7 12 | - Neo4J 2.1.8 Community Edition 13 | - Gremlin for Neo4J 2.X 14 | - Apache Ant build tool 15 | - Python 2.7 16 | 17 | ### Step 2 18 | 19 | - Python 3.6 20 | - Tensorflow 1.6 21 | - Gensim 3.4 22 | 23 | ## Installation 24 | 25 | ### Joern 0.3.1 26 | 27 | - #### **JDK 1.7** 28 | 29 | 1. extract the tarball 30 | 31 | ```bash 32 | tar -xvf jdk-7u80-linux-x64.tar.gz -C /usr/loacl/java 33 | ``` 34 | 35 | 2. set environment variable 36 | 37 | > **/etc/profile** 38 | 39 | ```bash 40 | # Add These Content at the End of the File 41 | ####################################### 42 | JAVA_HOME=/usr/local/java/jdk1.7.0_80 43 | JRE_HOME=/usr/local/java/jdk1.7.0_80 44 | PATH=$PATH:$JRE_HOME/bin:$JAVA_HOME/bin 45 | 46 | export JAVA_HOME 47 | export JRE_HOME 48 | export PATH 49 | ####################################### 50 | 51 | update-alternatives --install "/usr/bin/java" "java" "/usr/local/java/jdk1.7.0_80/bin/java" 1 52 | 53 | update-alternatives --install "/usr/bin/javac" "javac" "/usr/local/java/jdk1.7.0_80/bin/javac" 1 54 | 55 | update-alternatives --install "/usr/bin/javaws" "javaws" "/usr/local/java/jdk1.7.0_80/bin/javaws" 1 56 | 57 | update-alternatives --set java /usr/local/java/jdk1.7.0_80/bin/java 58 | 59 | update-alternatives --set javac /usr/local/java/jdk1.7.0_80/bin/javac 60 | 61 | update-alternatives --set javaws /usr/local/java/jdk1.7.0_80/bin/javaws 62 | 63 | source /etc/profile 64 | ``` 65 | 66 | 3. verify 67 | 68 | ```bash 69 | java -version 70 | ``` 71 | **You should receive a message which displays** 72 | 73 | ```bash 74 | java version "1.7.0_80" 75 | Java(TM) SE Runtime Environment (build 1.7.0_80-b15) 76 | Java HotSpot(TM) 64-Bit Server VM (build 24.80-b11, mixed mode) 77 | ``` 78 | 79 | - #### **Neo4j 2.1.8 Community Edition** 80 | 81 | 1. extract the tarball 82 | 83 | ```bash 84 | unzip neo4j-community-2.1.8.zip 85 | mkdir -d /usr/local/neo4j 86 | mv /usr/local/neo4j ./Neo4j/neo4j-community-2.1.8/* 87 | ``` 88 | 89 | 2. modify configure files 90 | 91 | > **configure files are located in /usr/local/neo4j/conf** 92 | 93 | **neo4j-server.properties** 94 | 95 | ```bash 96 | # location of the database directory 97 | org.neo4j.server.database.location=/home/joern-0.3.1/.joernIndex/ 98 | # Let the webserver only listen on the specified IP. Default is localhost (only 99 | # accept local connections). Uncomment to allow any connection. Please see the 100 | # security section in the neo4j manual before modifying this. 101 | org.neo4j.server.webserver.address=0.0.0.0 102 | ``` 103 | 104 | **neo4j-wrapper.conf** 105 | 106 | ```bash 107 | # Java Heap Size: by default the Java heap size is dynamically 108 | # calculated based on available system resources. 109 | # Uncomment these lines to set specific initial and maximum 110 | # heap size in MB. 111 | wrapper.java.initmemory=512 112 | wrapper.java.maxmemory=10240 #as large as you can 113 | ``` 114 | 115 | 3. set environment variable 116 | 117 | > **/etc/profile** 118 | 119 | ```bash 120 | # Add These Content at the End of the File 121 | ####################################### 122 | NEO4J_HOME=/usr/local/neo4j 123 | PATH=$PATH:$NEO4J_HOME/bin 124 | 125 | export NEO4J_HOME 126 | export PATH 127 | ####################################### 128 | source /etc/profile 129 | ``` 130 | 131 | 4. start && verify 132 | 133 | ```bash 134 | /usr/local/neo4j/bin/neo4j console 135 | ``` 136 | 137 | - #### **Gremlin for Neo4J 2.X** 138 | 139 | > https://github.com/neo4j-contrib/gremlin-plugin 140 | 141 | ```bash 142 | unzip neo4j-gremlin-plugin-tp2-2.1.5-server-plugin.zip -d $NEO4J_HOME/plugins/gremlin-plugin 143 | ``` 144 | 145 | - #### **Apache Ant build tool** 146 | 147 | 1. extract the tarball 148 | 149 | ```bash 150 | mkdir /usr/local/ant 151 | unzip -d /usr/local/ant apache-ant-1.8.4-bin.zip 152 | ``` 153 | 154 | 2. set environment variable 155 | 156 | > **/etc/profile** 157 | 158 | ```bash 159 | # Add These Content at the End of the File 160 | ####################################### 161 | ANT_HOME=/usr/local/ant 162 | PATH=$PATH:$ANT_HOME/bin 163 | 164 | export ANT_HOME 165 | export PATH 166 | ####################################### 167 | 168 | source /etc/profile 169 | ``` 170 | 171 | 3. verify 172 | 173 | ```bash 174 | ant -version 175 | ``` 176 | 177 | - #### **Joern 0.3.1** 178 | 179 | > https://joern.readthedocs.io/en/latest/installation.html#system-requirements-and-dependencies 180 | 181 | 1. extract the tarball 182 | ```bash 183 | tar -xvf 0.3.1.tar.gz 184 | ``` 185 | 186 | 2. extract the tarball of build dependencies 187 | 188 | ```bash 189 | cd joern-0.3.1 190 | tar -xvf lib.tar.gz 191 | ``` 192 | 193 | 3. build the project 194 | 195 | ```bash 196 | cd joern-0.3.1 197 | ant 198 | ``` 199 | 200 | **The executable JAR file is located in joern-0.3.1/bin/joern.jar** 201 | 202 | 4. set environment variable (optional) 203 | 204 | > **/etc/profile** 205 | 206 | ```bash 207 | # Add These Content at the End of the File 208 | ####################################### 209 | JOERN_HOME=/home/joern-0.3.1/ 210 | 211 | export JOERN_HOME 212 | ####################################### 213 | ``` 214 | 215 | > **~/.bashrc** 216 | 217 | ```bash 218 | # Add These Content at the End of the File 219 | ####################################### 220 | alias joern='java -jar $JOERN/bin/joern.jar' 221 | ####################################### 222 | ``` 223 | 224 | ```bash 225 | source /etc/profile 226 | source ~/.bashrc 227 | ``` 228 | 229 | 5. build additional tools (optional) 230 | 231 | ``` 232 | cd joern-0.3.1 233 | ant tools 234 | ``` 235 | 236 | 6. install python-joern 237 | 238 | ```bash 239 | apt install python-setuptools python-dev python-pip 240 | ``` 241 | 242 | ```bash 243 | pip2 install py2neo==2.0 -i https://pypi.tuna.tsinghua.edu.cn/simple 244 | ``` 245 | 246 | ```bash 247 | pip2 install py2neo-gremlin -i https://pypi.tuna.tsinghua.edu.cn/simple 248 | ``` 249 | 250 | ```bash 251 | tar -xvf python-joern-0.3.1.tar.gz 252 | cd python-joern-0.3.1 253 | python2 setup.py install 254 | ``` 255 | 256 | 7. install joern-tools 257 | 258 | ```bash 259 | pip2 install chardet -i https://pypi.tuna.tsinghua.edu.cn/simple 260 | pip2 install pygraphviz -i https://pypi.tuna.tsinghua.edu.cn/simple 261 | ``` 262 | 263 | ```bash 264 | git clone https://github.com/fabsx00/joern-tools 265 | cd joern-tools 266 | python2 setup.py install 267 | ``` 268 | 269 | 8. verify 270 | 271 | ```bash 272 | joern-lookup 273 | ``` 274 | 275 | ## Using 276 | 277 | ### Step1 Generating Slices 278 | 279 | > Work Dir: /home/SySeVR-master/Implementation/source2slice 280 | > 281 | > Code Dir: /home/code 282 | > 283 | > Recommended Memory Size: >=16GB (according to your code size) 284 | > 285 | > If you want to slice the NVD and SARD, you may divide them into parts. 286 | 287 | 1. install dependences 288 | 289 | ```bash 290 | apt install python-igraph 291 | ``` 292 | 293 | 2. parse the source code 294 | 295 | >input: source codes 296 | > 297 | >output: .joernIndex 298 | 299 | ```bash 300 | rm -rf ./.joernIndex 301 | ``` 302 | 303 | ```bash 304 | java -Xmx16g -jar $JOERN_HOME/bin/joern.jar /home/code 305 | ``` 306 | 307 | This will create a neo4j database directory .joernIndex in this directory. 308 | 309 | 3. generate CFG 310 | 311 | >input: .joernIndex 312 | > 313 | >output: cfg_db 314 | 315 | ```bash 316 | # start neo4j at other terminal 317 | /usr/local/neo4j/bin/neo4j console 318 | ``` 319 | 320 | ```bash 321 | mkdir cfg_db 322 | python2 get_cfg_relation.py 323 | ``` 324 | 325 | 4. generate PDG 326 | 327 | >input: cfg_db .joernIndex 328 | > 329 | >output: pdg_db 330 | 331 | ```python 332 | # modify access_db_operate.py 333 | http.socket_timeout = 999999999 # a big number 334 | ``` 335 | 336 | ```bash 337 | mkdir pdg_db 338 | python2 complete_PDG.py 339 | ``` 340 | 341 | 5. generate call graph of functions 342 | 343 | >input: pdg_db .joernIndex 344 | > 345 | >output: dict_call2cfgNodeID_funcID 346 | 347 | ```bash 348 | mkdir dict_call2cfgNodeID_funcID 349 | python2 access_db_operate.py 350 | ``` 351 | 352 | 6. generate four kinds of SyCVs 353 | 354 | >input: dict_call2cfgNodeID_funcID 355 | > 356 | >outout: arrayuse_slice_points.pkl, integeroverflow_slice_points_new.pkl, pointuse_slice_points.pkl, sensifun_slice_points.pkl 357 | 358 | ```python 359 | # modify points_get.py near the 128 rows 360 | # change "location" to ",location" 361 | for i in list_usenodes: 362 | if str(i).find(",location")==-1: 363 | list_usenodes.remove(i) 364 | loc_list=[] 365 | final_list=[] 366 | for i in list_usenodes: 367 | #print(i) 368 | if ',location' in str(i): 369 | print(str(i)) 370 | location=str(i).split(",type:")[0].split(",location:")[1][1:-1].split(":") 371 | count=int(location[0]) 372 | loc_list.append(count) 373 | 374 | ``` 375 | 376 | ```bash 377 | python2 points_get.py 378 | ``` 379 | 380 | 7. extract slices 381 | 382 | >input: dict_call2cfgNodeID_funcID, arrayuse_slice_points.pkl, integeroverflow_slice_points_new.pkl, pointuse_slice_points.pkl, sensifun_slice_points.pkl 383 | > 384 | >output: api_slices.txt, arrayuse_slice.txt, integeroverflow_slices.txt, pointeruse_slice.txt 385 | 386 | ```python 387 | # modify save-file-path in extract_df.py 388 | store_filepath = "integeroverflow_slices.txt" 389 | store_filepath = "arraysuse_slices.txt" 390 | store_filepath = "pointersuse_slices.txt" 391 | store_filepath = "api_slices.txt" 392 | ``` 393 | 394 | ```python 395 | # add slice_op.py at 348 rows 396 | if not os.path.exists(path): 397 | i += 1 398 | continue 399 | ``` 400 | 401 | **Due to the limit of memory, you may execute four functions in extract_df.py one by one.** 402 | 403 | ```bash 404 | python2 extract_df.py 405 | ``` 406 | 407 | 8. get labels of slices 408 | 409 | >input: api_slices.txt, arrayuse_slice.txt, integeroverflow_slices.txt, pointeruse_slice.txt 410 | > 411 | >output: apt_slices_label.pkl, api_slices-vulline.pkl, array_slice_label.pkl, expr_slice_label.pkl, pointer_slice_label.pkl 412 | 413 | ```python 414 | # modify make_label.py at 70 rows 415 | # wrong format 416 | _dict_cwe2line = {} 417 | for _dict in dict: 418 | for key in _dict.keys(): 419 | if _dict[key] not in _dict_cwe2line_target.keys(): 420 | ``` 421 | 422 | ```bash 423 | python2 make_label.py 424 | ``` 425 | 426 | 9. combine labels with slices 427 | 428 | >input: apt_slices_label.pkl, api_slices-vulline.pkl, array_slice_label.pkl, expr_slice_label.pkl, pointer_slice_label.pkl, api_slices.txt, arrayuse_slice.txt, integeroverflow_slices.txt, pointeruse_slice.txt 429 | > 430 | >output: api_slices, arrayuse_slices.txt, integeroverflow_slices.txt, pointersuse_slices.txt 431 | 432 | ```bash 433 | mkdir slices label_source slice_label 434 | cp api_slices.txt arrayuse_slice.txt integeroverflow_slices.txt pointeruse_slice.txt ./slices 435 | cp apt_slices_label.pkl api_slices-vulline.pkl array_slice_label.pkl expr_slice_label.pkl pointer_slice_label.pkl ./label_source 436 | cd label_source 437 | mv expr_slice_label.pkl integeroverflow_slices.pkl 438 | mv apt_slices_label.pkl api_slices.pkl 439 | mv array_slice_label.pkl arrayuse_slice.pkl 440 | mv pointer_slice_label.pkl pointeruse_slice.pkl 441 | ``` 442 | 443 | ```bash 444 | python2 data_preprocess.py 445 | ``` 446 | 447 | 448 | 449 | ### Step2 Data Perprocess 450 | 451 | pass 452 | 453 | ### Step3 Deep Learning 454 | 455 | pass -------------------------------------------------------------------------------- /SySeVR_dependences.zip: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:1f2f3d797cec9b1e3162d26496acc23969622353de327679c9258292fba93f18 3 | size 236608606 4 | -------------------------------------------------------------------------------- /access_db_operate.py: -------------------------------------------------------------------------------- 1 | ## -*- coding: utf-8 -*- 2 | from joern.all import JoernSteps 3 | from igraph import * 4 | from general_op import * 5 | import pickle 6 | from py2neo.packages.httpstream import http 7 | http.socket_timeout = 999999999 8 | 9 | def get_all_use_bydefnode(db, node_id): 10 | query_str = "g.v(%d).in('USE')" % node_id 11 | results = db.runGremlinQuery(query_str) 12 | list_re = [] 13 | for re in results: 14 | if re.properties['type'] == 'Statement': 15 | continue 16 | else: 17 | list_re.append(re) 18 | 19 | return list_re 20 | 21 | 22 | def get_all_def_bydefnode(db, node_id): 23 | query_str = "g.v(%d).in('DEF')" % node_id 24 | results = db.runGremlinQuery(query_str) 25 | list_re = [] 26 | for re in results: 27 | if re.properties['type'] == 'Statement': 28 | continue 29 | else: 30 | list_re.append(re) 31 | 32 | return list_re 33 | 34 | 35 | def get_exprstmt_node(db): 36 | query_expr_str = "queryNodeIndex('type:ExpressionStatement')" 37 | #results = db.runGremlinQuery(query_expr_str) 38 | results_1 = db.runGremlinQuery(query_expr_str) 39 | 40 | query_iddecl_str = 'queryNodeIndex("type:IdentifierDeclStatement")' 41 | results_2 = db.runGremlinQuery(query_iddecl_str) 42 | 43 | results = results_1 + results_2 44 | 45 | return results 46 | 47 | 48 | def get_pointers_node(db): 49 | list_pointers_node = [] 50 | query_iddecl_str = 'queryNodeIndex("type:IdentifierDeclStatement")' 51 | 52 | results = db.runGremlinQuery(query_iddecl_str) 53 | 54 | if results != []: 55 | for re in results: 56 | code = re.properties['code'] 57 | if code.find(' = ') != -1: 58 | code = code.split(' = ')[0] 59 | 60 | if code.find('*') != -1: 61 | list_pointers_node.append(re) 62 | 63 | query_param_str = 'queryNodeIndex("type:Parameter")' 64 | results = db.runGremlinQuery(query_param_str) 65 | if results != []: 66 | for re in results: 67 | code = re.properties['code'] 68 | if code.find(' = ') != -1: 69 | code = code.split(' = ')[0] 70 | 71 | if code.find('*') != -1: 72 | list_pointers_node.append(re) 73 | 74 | return list_pointers_node 75 | 76 | 77 | def get_arrays_node(db): 78 | list_arrays_node = [] 79 | query_iddecl_str = "queryNodeIndex('type:IdentifierDeclStatement')" 80 | results = db.runGremlinQuery(query_iddecl_str) 81 | if results != []: 82 | for re in results: 83 | code = re.properties['code'] 84 | if code.find(' = ') != -1: 85 | code = code.split(' = ')[0] 86 | 87 | if code.find(' [ ') != -1: 88 | list_arrays_node.append(re) 89 | 90 | query_param_str = "queryNodeIndex('type:Parameter')" 91 | results = db.runGremlinQuery(query_param_str) 92 | if results != []: 93 | for re in results: 94 | code = re.properties['code'] 95 | if code.find(' = ') != -1: 96 | code = code.split(' = ')[0] 97 | 98 | if code.find(' [ ') != -1: 99 | list_arrays_node.append(re) 100 | 101 | return list_arrays_node 102 | 103 | 104 | def get_def_node(db, cfg_node_id): 105 | query_str = "g.v(%d).out('DEF')" % cfg_node_id 106 | results = db.runGremlinQuery(query_str) 107 | return results 108 | 109 | 110 | def getFunctionNodeByName(db, funcname): 111 | query_str = "queryNodeIndex('type:Function AND name:%s')" % funcname 112 | results = db.runGremlinQuery(query_str) 113 | return results 114 | 115 | 116 | def get_parameter_by_funcid(db, func_id): 117 | query_str = "g.v(%d).out('IS_FUNCTION_OF_CFG').out('CONTROLS').filter{ it.type == 'Parameter' }.id" % func_id 118 | results = db.runGremlinQuery(query_str) 119 | return results 120 | 121 | 122 | def isNodeExist(g, nodeName): 123 | if not g.vs: 124 | return False 125 | else: 126 | return nodeName in g.vs['name'] 127 | 128 | 129 | def getALLFuncNode(db): 130 | query_str = "queryNodeIndex('type:Function')" 131 | results = db.runGremlinQuery(query_str) 132 | return results 133 | 134 | 135 | def getFuncNode(db, func_name): 136 | query_str = 'getFunctionsByName("' + func_name + '")' 137 | func_node = db.runGremlinQuery(query_str) 138 | return func_node 139 | 140 | 141 | def getFuncFile(db, func_id): 142 | query_str = "g.v(%d).in('IS_FILE_OF').filepath" % func_id 143 | ret = db.runGremlinQuery(query_str) 144 | print ret 145 | return ret[0] 146 | 147 | 148 | def getCFGNodes(db, func_id): 149 | query_str = 'queryNodeIndex("functionId:%s AND isCFGNode:True")' % func_id 150 | cfgNodes = db.runGremlinQuery(query_str) 151 | 152 | return cfgNodes 153 | 154 | 155 | def getDDGEdges(db, func_id): 156 | query_str = """queryNodeIndex('functionId:%s AND isCFGNode:True').outE('REACHES')""" % (func_id) 157 | ddgEdges = db.runGremlinQuery(query_str) 158 | return ddgEdges 159 | 160 | 161 | def getCDGEdges(db, func_id): 162 | query_str = """queryNodeIndex('functionId:%s AND isCFGNode:True').outE('CONTROLS')""" % (func_id) 163 | cdgEdges = db.runGremlinQuery(query_str) 164 | return cdgEdges 165 | 166 | 167 | def getCFGEdges(db, func_id): 168 | query_str = """queryNodeIndex('functionId:%s AND isCFGNode:True').outE('FLOWS_TO')""" % (func_id) 169 | cfgEdges = db.runGremlinQuery(query_str) 170 | return cfgEdges 171 | 172 | 173 | def drawGraph(db, edges, func_entry_node, graph_type): 174 | g = Graph(directed=True) 175 | func_id = func_entry_node._id 176 | filepath = getFuncFile(db, func_id) 177 | 178 | for edge in edges: 179 | if edge.start_node.properties['code'] == 'ENTRY': 180 | startNode = str(edge.start_node.properties['functionId']) 181 | else: 182 | startNode = str(edge.start_node._id) 183 | 184 | if edge.start_node.properties['code'] == 'ERROR': 185 | continue 186 | 187 | if isNodeExist(g, startNode) == False: 188 | if edge.start_node.properties['code'] == 'ENTRY': 189 | node_prop = {'code': func_entry_node.properties['name'], 'type': func_entry_node.properties['type'], 190 | 'location': func_entry_node.properties['location'], 'filepath':filepath, 'functionId':str(edge.start_node.properties['functionId'])} 191 | else: 192 | node_prop = {'code': edge.start_node.properties['code'], 'type': edge.start_node.properties['type'], 193 | 'location': edge.start_node.properties['location'], 'filepath':filepath, 'functionId':str(edge.start_node.properties['functionId'])} 194 | g.add_vertex(startNode, **node_prop)#id is 'name' 195 | 196 | endNode = str(edge.end_node._id) 197 | if isNodeExist(g, endNode) == False: 198 | if graph_type == 'pdg' and edge.end_node.properties['code'] == 'EXIT': 199 | continue 200 | 201 | if edge.end_node.properties['code'] == 'ERROR': 202 | continue 203 | 204 | node_prop = {'code': edge.end_node.properties['code'], 'type': edge.end_node.properties['type'], 205 | 'location': edge.end_node.properties['location'], 'filepath':filepath, 'functionId':str(edge.end_node.properties['functionId'])} 206 | g.add_vertex(endNode, **node_prop) 207 | 208 | if graph_type == 'pdg': 209 | edge_prop = {'var': edge.properties['var']} 210 | else: 211 | edge_prop = {'var': edge.properties['flowLabel']} 212 | g.add_edge(startNode, endNode, **edge_prop) 213 | 214 | return g 215 | 216 | 217 | def translatePDGByNode(db, func_node): 218 | func_id = func_node._id 219 | ddgEdges = getDDGEdges(db, func_id) 220 | cdgEdges = getCDGEdges(db, func_id) 221 | Edges = ddgEdges + cdgEdges 222 | graph_type = 'pdg' 223 | g = drawGraph(db, Edges, func_node, graph_type) 224 | 225 | return g 226 | 227 | 228 | def translateCFGByNode(db, func_node): 229 | func_id = func_node._id 230 | Edges = getCFGEdges(db, func_id) 231 | graph_type = 'cfg' 232 | g = drawGraph(db, Edges, func_node, graph_type) 233 | 234 | return g 235 | 236 | 237 | def getUSENodesVar(db, func_id): 238 | query = "g.v(%s).out('USE').code" % func_id 239 | ret = db.runGremlinQuery(query) 240 | if ret == []: 241 | return False 242 | else: 243 | return ret 244 | 245 | 246 | def getDEFNodesVar(db, func_id): 247 | query = "g.v(%s).out('DEF').code" % func_id 248 | ret = db.runGremlinQuery(query) 249 | if ret == []: 250 | return False 251 | else: 252 | return ret 253 | 254 | 255 | def getUseDefVarByPDG(db, pdg): 256 | dict_cfg2use = {} 257 | dict_cfg2def = {} 258 | #print pdg 259 | #need_to_addedge_node = [] 260 | for node in pdg.vs: 261 | if node['type'] == 'Function': 262 | continue 263 | 264 | func_id = node['name'] 265 | use_node = getUSENodesVar(db, func_id) 266 | def_node = getDEFNodesVar(db, func_id) 267 | 268 | if node['type'] == 'Statement': 269 | if def_node == False: 270 | code = node['code'].replace('\n', ' ') 271 | if code.find(" = ") != -1: 272 | value = code.split(" = ")[0].strip().split(' ') 273 | if value[-1] == ']': 274 | newvalue = code.split(" [ ")[0].strip().split(' ') 275 | if '->' in newvalue: 276 | a_index = newvalue.index('->') 277 | n_value = ' '.join([newvalue[a_index-1], '->', newvalue[a_index+1]]) 278 | newvalue[a_index-1] = n_value 279 | del newvalue[a_index] 280 | del newvalue[a_index] 281 | 282 | def_node = newvalue 283 | 284 | else: 285 | if '->' in value: 286 | a_index = value.index('->') 287 | n_value = ' '.join([value[a_index-1], '->', value[a_index+1]]) 288 | ob_value = value[a_index-1] 289 | value[a_index-1] = n_value 290 | del value[a_index] 291 | del value[a_index] 292 | value.append(ob_value.replace('*', '')) 293 | 294 | def_node = value 295 | 296 | #need_to_addedge_node.append(node['name']) 297 | 298 | if use_node == False: 299 | if code.find(" = ") != -1: 300 | value = code.split(" = ")[1].strip().split(' ') 301 | newvalue = [] 302 | for v in value: 303 | if v == '*' or v == '+' or v == '-' or v == '->' or v == '(' or v == ')' or v == '[' or v == ']' or v == '&' or v == '.' or v == '::' or v == ';' or v == ',': 304 | continue 305 | else: 306 | newvalue.append(v.strip()) 307 | 308 | else: 309 | value = code.split(' ') 310 | newvalue = [] 311 | for v in value: 312 | if v == '*' or v == '+' or v == '-' or v == '->' or v == '(' or v == ')' or v == '[' or v == ']' or v == '&' or v == '.' or v == '::' or v == ';' or v == ',': 313 | continue 314 | else: 315 | newvalue.append(v.strip()) 316 | 317 | use_node = newvalue 318 | 319 | 320 | if use_node: 321 | use_node = [code.replace('*', '').replace('&', '').strip() for code in use_node] 322 | 323 | if def_node: 324 | def_node = [code.replace('*', '').replace('&', '').strip() for code in def_node] 325 | 326 | else:#add define node 327 | new_def_node = getReturnVarOfAPI(node['code'])#get modify value of api_func 328 | if node['name'] == '2078': 329 | print "new_def_node", new_def_node 330 | 331 | if new_def_node: 332 | def_node = [] 333 | for code in new_def_node: 334 | new_code = code.replace('*', '').replace('&', '').strip() 335 | def_node.append(new_code) 336 | 337 | if new_code not in use_node: 338 | use_node.append(new_code) 339 | 340 | if use_node: 341 | dict_cfg2use[node['name']] = use_node 342 | 343 | if def_node: 344 | dict_cfg2def[node['name']] = def_node 345 | 346 | return dict_cfg2use, dict_cfg2def 347 | 348 | 349 | def getFuncNodeByFile(db, filenodeID): 350 | query_str = 'g.v(%d).out("IS_FILE_OF")' % filenodeID 351 | results = db.runGremlinQuery(query_str) 352 | _list = [] 353 | for re in results: 354 | if re.properties['type'] == 'Function': 355 | _list.append(re) 356 | else: 357 | continue 358 | 359 | return _list 360 | 361 | 362 | def getAllFuncfileByTestID(db, testID): 363 | testID = '*/'+ testID + '/*' 364 | query_str = "queryNodeIndex('type:File AND filepath:%s').id" % testID 365 | results = db.runGremlinQuery(query_str) 366 | return results 367 | 368 | 369 | def get_calls_id(db, func_name): 370 | query_str = 'getCallsTo("%s").id' % func_name 371 | results = db.runGremlinQuery(query_str) 372 | return results 373 | 374 | 375 | def getCFGNodeByCallee(db, node_ast_id): 376 | #print "start" 377 | query_str = "g.v(%s).in('IS_AST_PARENT')" % node_ast_id 378 | results = db.runGremlinQuery(query_str) 379 | #print "end" 380 | if results == []: 381 | return None 382 | 383 | for node in results: 384 | if 'isCFGNode' in node.properties and node.properties['isCFGNode'] == 'True': 385 | return node 386 | else: 387 | node = getCFGNodeByCallee(db, node._id) 388 | 389 | return node 390 | 391 | 392 | def getCalleeNode(db, func_id): 393 | query_str = "queryNodeIndex('type:Callee AND functionId:%d')" % func_id 394 | results = db.runGremlinQuery(query_str) 395 | return results 396 | 397 | 398 | def get_all_calls_node(db, testID): 399 | list_all_funcID = [node._id for node in getFuncNodeInTestID(db, testID)] 400 | print "list_all_funcID", list_all_funcID 401 | print "lenth", len(list_all_funcID) 402 | if len(list_all_funcID)>130: 403 | print ">100" 404 | return False 405 | list_all_callee_node = [] 406 | for func_id in list_all_funcID:#allfile in a testID 407 | list_all_callee_node += getCalleeNode(db, func_id) 408 | 409 | if list_all_callee_node == []: 410 | return False 411 | else: 412 | return [(str(node._id), node.properties['code'], str(node.properties['functionId'])) for node in list_all_callee_node] 413 | 414 | 415 | def getFuncNodeInTestID(db, testID): 416 | list_all_file_id = getAllFuncfileByTestID(db, testID) 417 | if list_all_file_id == []: 418 | return False 419 | 420 | list_all_func_node = [] 421 | 422 | for file_id in list_all_file_id: 423 | list_func_node = getFuncNodeByFile(db, file_id) 424 | list_all_func_node += list_func_node 425 | 426 | return list_all_func_node 427 | 428 | 429 | def getClassByObjectAndFuncID(db, objectname, func_id): 430 | #print objectname, func_id 431 | all_cfg_node = getCFGNodes(db, func_id) 432 | for cfg_node in all_cfg_node: 433 | if cfg_node.properties['code'] == objectname and cfg_node.properties['type'] == 'Statement': 434 | print objectname, func_id, cfg_node.properties['code'], cfg_node._id 435 | query_str_1 = "queryNodeIndex('type:Statement AND code:%s AND functionId:%s')" % (objectname, func_id) 436 | results_1 = db.runGremlinQuery(query_str_1) 437 | if results_1 == []: 438 | return False 439 | else: 440 | ob_cfgNode = results_1[0] 441 | 442 | location_row = ob_cfgNode.properties['location'].split(':')[0] 443 | 444 | query_str_2 = "queryNodeIndex('type:ExpressionStatement AND functionId:%s')" % func_id 445 | results_2 = db.runGremlinQuery(query_str_2) 446 | if results_2 == []: 447 | return False 448 | 449 | classname = False 450 | for node in results_2: 451 | print node.properties['location'].split(':')[0], location_row 452 | if node.properties['location'].split(':')[0] == location_row: 453 | classname = node.properties['code'] 454 | break 455 | 456 | else: 457 | continue 458 | 459 | return classname 460 | 461 | elif cfg_node.properties['code'].find(' '+objectname+' = new') != -1: 462 | temp_value = cfg_node.properties['code'].split(' '+objectname+' = new')[1].replace('*', '').strip() 463 | if temp_value.split(' ')[0] != 'const': 464 | classname = temp_value.split(' ')[0] 465 | else: 466 | classname = temp_value.split(' ')[1] 467 | 468 | return classname 469 | 470 | 471 | def getDeleteNode(db, func_id): 472 | query_str = "queryNodeIndex('code:delete AND functionId:%d')" % func_id 473 | results = db.runGremlinQuery(query_str) 474 | return results 475 | 476 | 477 | def get_all_delete_node(db, testID): 478 | list_all_funcID = [node._id for node in getFuncNodeInTestID(db, testID)] 479 | print "list_all_funcID", list_all_funcID 480 | 481 | list_all_delete_node = [] 482 | for func_id in list_all_funcID:#allfile in a testID 483 | list_all_delete_node += getDeleteNode(db, func_id) 484 | 485 | if list_all_delete_node == []: 486 | return False 487 | else: 488 | return list_all_delete_node 489 | 490 | 491 | def getDeclNode(db, func_id): 492 | query_str = "queryNodeIndex('type:IdentifierDeclStatement AND functionId:%d')" % func_id 493 | results = db.runGremlinQuery(query_str) 494 | return results 495 | 496 | 497 | def get_all_iddecl_node(db, testID): 498 | list_all_funcID = [node._id for node in getFuncNodeInTestID(db, testID)] 499 | print "list_all_funcID", list_all_funcID 500 | 501 | list_all_decl_node = [] 502 | for func_id in list_all_funcID:#allfile in a testID 503 | list_all_decl_node += getDeclNode(db, func_id) 504 | 505 | if list_all_decl_node == []: 506 | return False 507 | else: 508 | return list_all_decl_node 509 | 510 | 511 | def getCallGraph(db, testID): 512 | list_all_func_node = getFuncNodeInTestID(db, testID) 513 | #print "list_all_func_node", list_all_func_node 514 | if list_all_func_node == []: 515 | return False 516 | 517 | call_g = Graph(directed=True) 518 | 519 | for func_node in list_all_func_node: 520 | #print(func_node) 521 | prop = {'funcname':func_node.properties['name'], 'type': func_node.properties['type'], 'filepath': func_node.properties['filepath']} 522 | call_g.add_vertex(str(func_node._id), **prop) 523 | 524 | 525 | list_all_callee = get_all_calls_node(db, testID)#we must limit result in testID, it already get callee node 526 | #print '3 ', list_all_callee 527 | if list_all_callee == False: 528 | return False 529 | 530 | for func_node in list_all_func_node: 531 | function_name = func_node.properties['name'] 532 | #print "function_name", function_name 533 | tag = False 534 | if function_name.find('::') != -1:#if is a function in class, have two problems 535 | func_name = function_name.split('::')[-1].strip() 536 | classname = function_name.split('::')[0].strip() 537 | 538 | if func_name == classname:#is a class::class, is a statementnode or a iddeclnode 539 | print 1 540 | list_callee_id = [] 541 | list_delete_node = get_all_delete_node(db, testID) 542 | if list_delete_node == False: 543 | continue 544 | 545 | for node in list_delete_node: 546 | functionID = node.properties["functionId"] 547 | all_cfg_node = getCFGNodes(db, functionID) 548 | delete_loc = node.properties['location'].split(':')[0] 549 | 550 | for cfg_node in all_cfg_node: 551 | if cfg_node.properties['location'] != None and cfg_node.properties['location'].split(':')[0] == delete_loc and cfg_node.properties['code'] != 'delete' and cfg_node.properties['code'] != '[' and cfg_node.properties['code'] != '[': 552 | objectname = cfg_node.properties['code'] 553 | ob_classname = getClassByObjectAndFuncID(db, objectname, functionID) 554 | pdg = getFuncPDGByfuncIDAndtestID(functionID, testID) 555 | if pdg == False: 556 | continue 557 | 558 | if ob_classname == classname: 559 | for p_n in pdg.vs: 560 | #print p_n['name'], str(node._id), str(cfg_node._id) 561 | if p_n['name'] == str(node._id): 562 | 563 | list_s = p_n.predecessors() 564 | for edge in pdg.es: 565 | if pdg.vs[edge.tuple[0]] in list_s and pdg.vs[edge.tuple[1]] == p_n and edge['var'] == objectname: 566 | #print (functionID, str(pdg.vs[edge.tuple[0]]['name'])) 567 | list_callee_id.append((str(functionID), str(pdg.vs[edge.tuple[0]]['name']))) 568 | else: 569 | continue 570 | 571 | elif p_n['name'] == str(cfg_node._id): 572 | list_s = p_n.predecessors() 573 | for edge in pdg.es: 574 | if pdg.vs[edge.tuple[0]] in list_s and pdg.vs[edge.tuple[1]] == p_n and edge['var'] == objectname: 575 | list_callee_id.append((functionID, str(pdg.vs[edge.tuple[0]]['name']))) 576 | else: 577 | continue 578 | 579 | else: 580 | continue 581 | 582 | 583 | else: 584 | continue 585 | 586 | elif func_name.replace('~', '') == classname:#is a class::~class 587 | list_callee_id = [] 588 | list_delete_node = get_all_delete_node(db, testID) 589 | if list_delete_node == False: 590 | continue 591 | 592 | for node in list_delete_node: 593 | functionID = node.properties["functionId"] 594 | all_cfg_node = getCFGNodes(db, functionID) 595 | delete_loc = node.properties['location'].split(':')[0] 596 | 597 | for cfg_node in all_cfg_node: 598 | if cfg_node.properties['location'] != None and cfg_node.properties['location'].split(':')[0] == delete_loc and cfg_node.properties['code'] != 'delete' and cfg_node.properties['code'] != '[' and cfg_node.properties['code'] != '[': 599 | objectname = cfg_node.properties['code'] 600 | #print objectname 601 | 602 | ob_classname = getClassByObjectAndFuncID(db, objectname, functionID) 603 | 604 | if ob_classname == classname: 605 | pdg = getFuncPDGByfuncIDAndtestID(functionID, testID) 606 | if pdg == False: 607 | continue 608 | 609 | for p_n in pdg.vs: 610 | if p_n['name'] == str(node._id): 611 | list_callee_id.append((functionID, str(node._id))) 612 | 613 | elif p_n['name'] == str(cfg_node._id): 614 | list_callee_id.append((functionID, str(cfg_node._id))) #delete and its object node 615 | 616 | else: 617 | continue 618 | 619 | 620 | else: 621 | continue 622 | 623 | else: 624 | print 3 625 | tag = 'func' 626 | list_callee_id = [] 627 | for _t in list_all_callee:#_t is a tuple, _t[0] is nodeid, 1 is funcname, 2 is func_id 628 | if _t[1].find('-> '+ func_name) != -1:#maybe is a class->funcname() 629 | objectname = _t[1].split(' -> '+ func_name)[0].strip() 630 | ob_classname = getClassByObjectAndFuncID(db, objectname, _t[2]) 631 | 632 | if ob_classname == classname: 633 | list_callee_id.append(_t[0]) 634 | 635 | else: 636 | continue 637 | 638 | else: 639 | continue 640 | 641 | 642 | else: 643 | tag = 'func' 644 | list_callee_id = [] 645 | for _t in list_all_callee: 646 | if _t[1] == function_name: 647 | list_callee_id.append(_t[0]) 648 | 649 | #print 4, list_callee_id 650 | if list_callee_id == []: 651 | continue 652 | 653 | else: 654 | #change ast node to cfgnode 655 | list_callee_CFGNode = [] 656 | if tag == 'func': 657 | #print 'z' 658 | for node_id in list_callee_id: 659 | #print 1 660 | callee_cfgnode = getCFGNodeByCallee(db, node_id) 661 | #print callee_cfgnode 662 | #print 2 663 | 664 | if callee_cfgnode == None: 665 | 666 | print 'ERROR', callee_cfgnode 667 | continue 668 | else: 669 | list_callee_CFGNode.append(callee_cfgnode) 670 | 671 | #print 'x' 672 | for node in list_callee_CFGNode: 673 | startNode = str(node.properties['functionId']) 674 | endNode = str(func_node._id) 675 | var = str(node._id) 676 | call_g = addDataEdge(call_g, startNode, endNode, var)#var is callee node id 677 | else: 678 | #print 'y' 679 | for node in list_callee_id: 680 | startNode = str(node[0]) 681 | endNode = str(func_node._id) 682 | var = str(node[1]) 683 | call_g = addDataEdge(call_g, startNode, endNode, var)#var is callee node id 684 | 685 | 686 | return call_g 687 | 688 | 689 | if __name__ == '__main__': 690 | j = JoernSteps() 691 | j.connectToDatabase() 692 | 693 | pdg_db_path = "pdg_db" 694 | list_testID = os.listdir(pdg_db_path) 695 | print list_testID 696 | for testID in list_testID: 697 | #if testID != '69055': 698 | # continue 699 | 700 | if os.path.exists(os.path.join("dict_call2cfgNodeID_funcID", str(testID))): 701 | continue 702 | 703 | call_g = getCallGraph(j, testID) 704 | if call_g == False: 705 | continue 706 | 707 | _dict = {} 708 | for edge in call_g.es: 709 | endnode = call_g.vs[edge.tuple[1]] 710 | 711 | if endnode['name'] not in _dict: 712 | _dict[endnode['name']] = [(edge['var'], call_g.vs[edge.tuple[0]]['name'])] 713 | 714 | else: 715 | _dict[endnode['name']].append((edge['var'], call_g.vs[edge.tuple[0]]['name'])) 716 | 717 | if not os.path.exists(os.path.join("dict_call2cfgNodeID_funcID", str(testID))): 718 | os.mkdir(os.path.join("dict_call2cfgNodeID_funcID", str(testID))) 719 | 720 | filepath = os.path.join("dict_call2cfgNodeID_funcID", str(testID), "dict.pkl") 721 | 722 | print _dict 723 | f = open(filepath, 'wb') 724 | pickle.dump(_dict, f, True) 725 | f.close() 726 | -------------------------------------------------------------------------------- /all.py: -------------------------------------------------------------------------------- 1 | from py2neo import Graph 2 | from py2neo.ext.gremlin import Gremlin 3 | import os 4 | 5 | DEFAULT_GRAPHDB_URL = "http://localhost:7474/db/data/" 6 | DEFAULT_STEP_DIR = os.path.dirname(__file__) + '/joernsteps/' 7 | 8 | class JoernSteps: 9 | 10 | def __init__(self): 11 | self._initJoernSteps() 12 | self.initCommandSent = False 13 | 14 | def setGraphDbURL(self, url): 15 | """ Sets the graph database URL. By default, 16 | http://localhost:7474/db/data/ is used.""" 17 | self.graphDbURL = url 18 | 19 | def addStepsDir(self, stepsDir): 20 | """Add an additional directory containing steps to be injected 21 | into the server""" 22 | self.stepsDirs.append(stepsDir) 23 | 24 | def connectToDatabase(self): 25 | """ Connects to the database server.""" 26 | self.graphDb = Graph(self.graphDbURL) 27 | self.gremlin = Gremlin(self.graphDb) 28 | 29 | def runGremlinQuery(self, query): 30 | 31 | """ Runs the specified gremlin query on the database. It is 32 | assumed that a connection to the database has been 33 | established. To allow the user-defined steps located in the 34 | joernsteps directory to be used in the query, these step 35 | definitions are prepended to the query.""" 36 | 37 | if not self.initCommandSent: 38 | self.initCommand = self._createInitCommand() 39 | self.initCommandSent = True 40 | finalQuery = self.initCommand 41 | else: 42 | finalQuery = "" 43 | finalQuery += query 44 | return self.gremlin.execute(finalQuery) 45 | 46 | def runCypherQuery(self, cmd): 47 | """ Runs the specified cypher query on the graph database.""" 48 | return cypher.execute(self.graphDb, cmd) 49 | 50 | def getGraphDbURL(self): 51 | return self.graphDbURL 52 | 53 | """ 54 | Create chunks from a list of ids. 55 | This method is useful when you want to execute many independent 56 | traversals on a large set of start nodes. In that case, you 57 | can retrieve the set of start node ids first, then use 'chunks' 58 | to obtain disjoint subsets that can be passed to idListToNodes. 59 | """ 60 | def chunks(self, idList, chunkSize): 61 | for i in xrange(0, len(idList), chunkSize): 62 | yield idList[i:i+chunkSize] 63 | 64 | def _initJoernSteps(self): 65 | self.graphDbURL = DEFAULT_GRAPHDB_URL 66 | self.stepsDirs = [DEFAULT_STEP_DIR] 67 | 68 | def _createInitCommand(self): 69 | 70 | initCommand = "" 71 | 72 | for stepsDir in self.stepsDirs: 73 | for (root, dirs, files) in os.walk(stepsDir, followlinks=True): 74 | files.sort() 75 | for f in files: 76 | filename = os.path.join(root, f) 77 | if not filename.endswith('.groovy'): continue 78 | initCommand += file(filename).read() + "\n" 79 | return initCommand 80 | -------------------------------------------------------------------------------- /complete_PDG.py: -------------------------------------------------------------------------------- 1 | ## coding:utf-8 2 | from access_db_operate import * 3 | import copy 4 | from general_op import * 5 | from py2neo.packages.httpstream import http 6 | http.socket_timeout = 9999 7 | 8 | def modifyDataEdgeVal(pdg): 9 | for edge in pdg.es: 10 | if edge['var'] == None: 11 | continue 12 | 13 | new_val = '' 14 | for c in edge['var']: 15 | if c == '*': 16 | continue 17 | else: 18 | new_val += c 19 | 20 | edge['var'] = new_val 21 | 22 | return pdg 23 | 24 | 25 | def modifyStmtNode(pdg): 26 | compare_row = 0 27 | dict_row2nodestmt = {} 28 | dict_row2nodeid = {} 29 | #only process statement node 30 | dict_static = {} 31 | 32 | i = 0 33 | while i < pdg.vcount(): 34 | if pdg.vs[i]['type'] == 'Statement' and pdg.vs[i]['code'] == 'static': 35 | raw = int(pdg.vs[i]['location'].split(':')[0]) 36 | col = int(pdg.vs[i]['location'].split(':')[1]) 37 | dict_static[raw] = (raw, col) 38 | pdg.delete_vertices(i) 39 | else: 40 | i += 1 41 | 42 | i = 0 43 | while i < pdg.vcount(): 44 | if pdg.vs[i]['type'] == 'Statement': 45 | row = int(pdg.vs[i]['location'].split(':')[0]) 46 | col = int(pdg.vs[i]['location'].split(':')[1]) 47 | _tuple = (pdg.vs[i]['code'], row, col, pdg.vs[i]['location']) 48 | 49 | if row not in dict_row2nodestmt.keys(): 50 | dict_row2nodestmt[row] = [_tuple] 51 | dict_row2nodeid[row] = pdg.vs[i]['name'] #to confirm delete order 52 | i += 1 53 | 54 | else: 55 | dict_row2nodestmt[row].append(_tuple) 56 | pdg.delete_vertices(i) 57 | 58 | 59 | else: 60 | i += 1 61 | 62 | #process single node but not statement node 63 | j = 0 64 | list_nodeindex_to_delete = [] 65 | while j < pdg.vcount(): 66 | if pdg.vs[j]['location'] != None: 67 | row = int(pdg.vs[j]['location'].split(':')[0]) 68 | col = int(pdg.vs[j]['location'].split(':')[1]) 69 | else: 70 | j += 1 71 | continue 72 | 73 | if row in dict_row2nodestmt.keys() and pdg.vs[j]['type'] != 'Statement': 74 | _tuple = (pdg.vs[j]['code'], row, col, pdg.vs[j]['location']) 75 | dict_row2nodestmt[row].append(_tuple) 76 | list_nodeindex_to_delete.append(pdg.vs[j]['name']) 77 | j += 1 78 | 79 | else: 80 | j += 1 81 | 82 | 83 | for key in dict_row2nodestmt.keys(): 84 | dict_row2nodestmt[key].sort(key=lambda e:e[2]) 85 | #print dict_row2nodestmt[key] 86 | nodename = dict_row2nodeid[key] 87 | nodeIndex = 0 88 | for node in pdg.vs: 89 | if node['name'] == nodename: 90 | break 91 | else: 92 | nodeIndex += 1 93 | 94 | location = dict_row2nodestmt[key][0][3] 95 | 96 | new_code = ' '.join([_t[0] for _t in dict_row2nodestmt[key]]).strip() 97 | 98 | #not consider ';' appear too much times 99 | pdg.vs[nodeIndex]['code'] = new_code 100 | pdg.vs[nodeIndex]['location'] = location 101 | pdg.vs[nodeIndex]['type'] = 'Statement' 102 | 103 | for d_name in list_nodeindex_to_delete: 104 | i = 0 105 | while i < pdg.vcount(): 106 | if pdg.vs[i]['name'] == d_name: 107 | pdg.delete_vertices(i) 108 | else: 109 | i += 1 110 | 111 | 112 | n = 0 113 | while n < pdg.vcount(): 114 | if pdg.vs[n]['location'] == None: 115 | n += 1 116 | continue 117 | 118 | raw = int(pdg.vs[n]['location'].split(':')[0]) 119 | col = int(pdg.vs[n]['location'].split(':')[1]) 120 | if raw in dict_static.keys() and col > dict_static[raw][1]: 121 | pdg.vs[n]['code'] = 'static ' + pdg.vs[n]['code'] 122 | 123 | n += 1 124 | 125 | list_node_index = [] 126 | for node in pdg.vs: 127 | if node['type'] == 'Statement': 128 | raw = int(node['location'].split(':')[0]) 129 | list_node_index.append((raw, node)) 130 | 131 | list_node_index.sort(key=lambda x:(x[0], x[1])) 132 | 133 | i = 1 134 | list_del_name = [] 135 | while i < len(list_node_index): 136 | if list_node_index[i][0]-list_node_index[i-1][0] == 1: 137 | list_node_index[i][1]['code'] = list_node_index[i-1][1]['code'] + '\n' + list_node_index[i][1]['code'] 138 | list_del_name.append(list_node_index[i-1][1]['name']) 139 | del list_node_index[i-1] 140 | else: 141 | i += 1 142 | 143 | _dict = {} 144 | for n in list_node_index: 145 | _dict[n[1]['name']] = n[1]['code'] 146 | 147 | j = 0 148 | while j < pdg.vcount(): 149 | if pdg.vs[j]['name'] in list_del_name: 150 | pdg.delete_vertices(j) 151 | elif pdg.vs[j]['name'] in _dict.keys(): 152 | pdg.vs[j]['code'] = _dict[pdg.vs[j]['name']] 153 | j += 1 154 | else: 155 | j += 1 156 | 157 | #for v in pdg.vs: 158 | # print v['code'], v['type'], v['name'] 159 | #exit() 160 | 161 | return pdg 162 | 163 | 164 | def getInitNodeOfDecl(pdg, list_sorted_pdgnode, node, var, dict_use, dict_def): 165 | index = list_sorted_pdgnode.index(node) 166 | list_init_node = [] 167 | for i in range(index+1, len(list_sorted_pdgnode)): 168 | if list_sorted_pdgnode[i]['type'] != 'IdentifierDeclStatement' and list_sorted_pdgnode[i]['name'] in dict_def.keys(): 169 | if var in dict_def[list_sorted_pdgnode[i]['name']]: 170 | if isEdgeExists(pdg, node['name'], list_sorted_pdgnode[i]['name'], var): 171 | continue 172 | else: 173 | list_init_node.append((list_sorted_pdgnode[i], i))#is init node and dataedge not exists 174 | 175 | elif list_sorted_pdgnode[i]['type'] != 'IdentifierDeclStatement' and list_sorted_pdgnode[i]['name'] not in dict_def.keys(): 176 | print list_sorted_pdgnode[i]['name'] 177 | if list_sorted_pdgnode[i]['name'] in dict_use.keys() and var in dict_use[list_sorted_pdgnode[i]['name']]: 178 | #print '2' 179 | if isEdgeExists(pdg, node['name'], list_sorted_pdgnode[i]['name'], var): 180 | continue 181 | else: 182 | list_init_node.append((list_sorted_pdgnode[i], i)) 183 | 184 | else: 185 | continue 186 | 187 | return list_init_node 188 | 189 | 190 | def completeDeclStmtOfPDG(pdg, dict_use, dict_def, dict_if2cfgnode, dict_cfgnode2if): 191 | list_sorted_pdgnode = sortedNodesByLoc(pdg.vs) 192 | dict_declnode2val = {} 193 | for node in pdg.vs: 194 | if (node['type'] == 'IdentifierDeclStatement' or node['type'] == 'Parameter' or node['type'] == 'Statement') and node['code'].find(' = ') == -1:#find not init node 195 | if node['type'] == 'IdentifierDeclStatement' or node['type'] == 'Parameter': 196 | list_var = dict_def[node['name']] 197 | else: 198 | list_var = getVarOfNode(node['code']) 199 | 200 | if list_var == False: 201 | continue 202 | 203 | else: 204 | for var in list_var: 205 | results = getInitNodeOfDecl(pdg, list_sorted_pdgnode, node, var, dict_use, dict_def) 206 | if results != []: 207 | for result in results: 208 | if node['name'] not in dict_cfgnode2if.keys():#startnode not belong to if 209 | startnode = node['name'] 210 | endnode = result[0]['name'] 211 | pdg = addDataEdge(pdg, startnode, endnode, var) 212 | 213 | else: 214 | list_if = dict_cfgnode2if[node['name']] 215 | list_not_scan = [] 216 | 217 | for ifstmt_n in list_if: 218 | tuple_statements = dict_if2cfgnode[ifstmt_n] 219 | 220 | if node['name'] in tuple_statements[0]: 221 | list_not_scan += tuple_statements[1] 222 | 223 | elif node['name'] in tuple_statements[1]: 224 | list_not_scan += tuple_statements[0] 225 | 226 | if result[0]['name'] not in list_not_scan: 227 | startnode = node['name'] 228 | endnode = result[0]['name'] 229 | pdg = addDataEdge(pdg, startnode, endnode, var) 230 | 231 | return pdg 232 | 233 | 234 | def get_nodes_before_exit(pdg, dict_if2cfgnode, dict_cfgnode2if): 235 | _dict = {} 236 | for key in dict_cfgnode2if.keys(): 237 | results = pdg.vs.select(name=key) 238 | if len(results) != 0 and (results[0]['type'] == 'BreakStatement' or results[0]['type'] == 'ReturnStatement' or results[0]['code'].find('exit ') != -1 or results[0]['type'] == 'GotoStatement'):# if stms have return 239 | if_name = '' 240 | if len(dict_cfgnode2if[key]) == 1: 241 | if_name = dict_cfgnode2if[key][0] 242 | else: 243 | if_name = get_ifname(key, dict_if2cfgnode, dict_cfgnode2if) 244 | 245 | print "key", key, if_name, dict_cfgnode2if[key] 246 | 247 | _list_name_0 = dict_if2cfgnode[if_name][0] 248 | _list_name_1 = dict_if2cfgnode[if_name][1] 249 | 250 | if key in _list_name_0: 251 | ret_index = _list_name_0.index(key) 252 | del _list_name_0[ret_index] #_list_name are set of nodes which under the same if with return node or exit or goto statement 253 | 254 | for name in _list_name_0: 255 | _dict[name] = key 256 | 257 | if key in _list_name_1: 258 | ret_index = _list_name_1.index(key) 259 | del _list_name_1[ret_index] #_list_name are set of nodes which under the same if with return node or exit or goto statement 260 | 261 | for name in _list_name_1: 262 | _dict[name] = key 263 | 264 | else: 265 | continue 266 | 267 | return _dict 268 | 269 | 270 | def completeDataEdgeOfPDG(pdg, dict_use, dict_def, dict_if2cfgnode, dict_cfgnode2if): 271 | #if a var in define list but there is not a edge between a node which use it and node which define it,not include id_decl 272 | list_sorted_pdgnode = sortedNodesByLoc(pdg.vs) 273 | exit2stmt_dict = get_nodes_before_exit(pdg, dict_if2cfgnode, dict_cfgnode2if) 274 | dict_declnode2val = {} 275 | 276 | for i in range(0, len(list_sorted_pdgnode)): 277 | if list_sorted_pdgnode[i]['type'] == 'IdentifierDeclStatement': 278 | continue 279 | 280 | if list_sorted_pdgnode[i]['name'] in dict_def.keys(): 281 | #print "list_sorted_pdgnode[i]['name']", list_sorted_pdgnode[i]['name'] 282 | list_def_var = dict_def[list_sorted_pdgnode[i]['name']] 283 | 284 | for def_var in list_def_var: 285 | for j in range(i+1, len(list_sorted_pdgnode)): 286 | if list_sorted_pdgnode[i]['name'] in exit2stmt_dict.keys(): 287 | exit_name = exit2stmt_dict[list_sorted_pdgnode[i]['name']] 288 | 289 | if list_sorted_pdgnode[j]['name'] == exit_name: 290 | break 291 | 292 | elif list_sorted_pdgnode[j]['name'] in dict_use.keys() and def_var in dict_use[list_sorted_pdgnode[j]['name']]: 293 | if list_sorted_pdgnode[i]['name'] not in dict_cfgnode2if.keys(): 294 | #must add 295 | startnode = list_sorted_pdgnode[i]['name'] 296 | endnode = list_sorted_pdgnode[j]['name'] 297 | addDataEdge(pdg, startnode, endnode, def_var) 298 | 299 | if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]: 300 | break 301 | 302 | elif list_sorted_pdgnode[i]['name'] in dict_cfgnode2if.keys() and list_sorted_pdgnode[j]['name'] not in dict_cfgnode2if.keys(): 303 | startnode = list_sorted_pdgnode[i]['name'] 304 | endnode = list_sorted_pdgnode[j]['name'] 305 | addDataEdge(pdg, startnode, endnode, def_var) 306 | 307 | if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]: 308 | break 309 | 310 | elif list_sorted_pdgnode[i]['name'] in dict_cfgnode2if.keys() and list_sorted_pdgnode[j]['name'] in dict_cfgnode2if.keys(): 311 | if_list = dict_cfgnode2if[list_sorted_pdgnode[i]['name']] 312 | _not_scan = [] 313 | for if_stmt in if_list: 314 | _tuple = dict_if2cfgnode[if_stmt] 315 | if list_sorted_pdgnode[i]['name'] in _tuple[0]: 316 | _not_scan += _tuple[1] 317 | else: 318 | _not_scan += _tuple[0] 319 | 320 | if list_sorted_pdgnode[j]['name'] not in _not_scan: 321 | startnode = list_sorted_pdgnode[i]['name'] 322 | endnode = list_sorted_pdgnode[j]['name'] 323 | addDataEdge(pdg, startnode, endnode, def_var) 324 | 325 | if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]: 326 | break 327 | 328 | else: 329 | if list_sorted_pdgnode[j]['name'] in dict_use.keys() and def_var in dict_use[list_sorted_pdgnode[j]['name']]: 330 | if list_sorted_pdgnode[i]['name'] not in dict_cfgnode2if.keys(): 331 | #must add 332 | startnode = list_sorted_pdgnode[i]['name'] 333 | endnode = list_sorted_pdgnode[j]['name'] 334 | addDataEdge(pdg, startnode, endnode, def_var) 335 | 336 | if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]: 337 | break 338 | 339 | elif list_sorted_pdgnode[i]['name'] in dict_cfgnode2if.keys() and list_sorted_pdgnode[j]['name'] not in dict_cfgnode2if.keys(): 340 | startnode = list_sorted_pdgnode[i]['name'] 341 | endnode = list_sorted_pdgnode[j]['name'] 342 | addDataEdge(pdg, startnode, endnode, def_var) 343 | 344 | if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]: 345 | break 346 | 347 | elif list_sorted_pdgnode[i]['name'] in dict_cfgnode2if.keys() and list_sorted_pdgnode[j]['name'] in dict_cfgnode2if.keys(): 348 | if_list = dict_cfgnode2if[list_sorted_pdgnode[i]['name']] 349 | _not_scan = [] 350 | for if_stmt in if_list: 351 | _tuple = dict_if2cfgnode[if_stmt] 352 | if list_sorted_pdgnode[i]['name'] in _tuple[0]: 353 | _not_scan += _tuple[1] 354 | else: 355 | _not_scan += _tuple[0] 356 | 357 | if list_sorted_pdgnode[j]['name'] not in _not_scan: 358 | startnode = list_sorted_pdgnode[i]['name'] 359 | endnode = list_sorted_pdgnode[j]['name'] 360 | addDataEdge(pdg, startnode, endnode, def_var) 361 | 362 | if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]: 363 | break 364 | 365 | 366 | else: 367 | continue 368 | 369 | return pdg 370 | 371 | 372 | def addDataEdgeOfObject(pdg, dict_if2cfgnode, dict_cfgnode2if): 373 | for node in pdg.vs: 374 | if node['code'].find(' = new ') != -1: 375 | objectname = node['code'].split(' = new ')[0].split(' ')[-1].strip() 376 | cur_name = node['name'] 377 | 378 | for pnode in pdg.vs: 379 | #print pnode['code'] 380 | if pnode['name'] == cur_name: 381 | continue 382 | 383 | if node['name'] not in dict_cfgnode2if.keys(): 384 | if pnode['code'].find(objectname + ' -> ') != -1: 385 | if pnode['code'].split(objectname + ' -> ')[0] == '': 386 | startnode = node['name'] 387 | endnode = pnode['name'] 388 | def_var = objectname 389 | addDataEdge(pdg, startnode, endnode, def_var) 390 | elif pnode['code'].split(objectname + ' -> ')[0][-1] == ' ': 391 | startnode = node['name'] 392 | endnode = pnode['name'] 393 | def_var = objectname 394 | addDataEdge(pdg, startnode, endnode, def_var) 395 | 396 | elif pnode['code'].find('delete ') != -1: 397 | startnode = node['name'] 398 | endnode = pnode['name'] 399 | def_var = objectname 400 | addDataEdge(pdg, startnode, endnode, def_var) 401 | 402 | else: 403 | continue 404 | 405 | else: 406 | list_if = dict_cfgnode2if[node['name']] 407 | list_not_scan = [] 408 | 409 | for ifstmt_n in list_if: 410 | tuple_statements = dict_if2cfgnode[ifstmt_n] 411 | 412 | if node['name'] in tuple_statements[0]: 413 | list_not_scan += tuple_statements[1] 414 | 415 | elif node['name'] in tuple_statements[1]: 416 | list_not_scan += tuple_statements[0] 417 | 418 | if pnode['code'].find(objectname + ' -> ') != -1 and pnode['name'] not in list_not_scan: 419 | if pnode['code'].split(objectname + ' -> ')[0] == '': 420 | startnode = node['name'] 421 | endnode = pnode['name'] 422 | def_var = objectname 423 | addDataEdge(pdg, startnode, endnode, def_var) 424 | elif pnode['code'].split(objectname + ' -> ')[0][-1] == ' ' : 425 | startnode = node['name'] 426 | endnode = pnode['name'] 427 | def_var = objectname 428 | addDataEdge(pdg, startnode, endnode, def_var) 429 | 430 | elif pnode['code'].find('delete ') != -1 and pnode['name'] not in list_not_scan: 431 | startnode = node['name'] 432 | endnode = pnode['name'] 433 | def_var = objectname 434 | addDataEdge(pdg, startnode, endnode, def_var) 435 | 436 | else: 437 | continue 438 | 439 | else: 440 | continue 441 | 442 | return pdg 443 | 444 | def deleteCDG(pdg): 445 | edge=pdg.es 446 | a=len(edge) 447 | list_d=[] 448 | print("delete cdg") 449 | for j in range(0,a): 450 | #print edge[j] 451 | if edge[j]['var']==None: 452 | list_d.append(j) 453 | a=list(reversed(list_d)) 454 | for i in a: 455 | pdg.delete_edges(edge[i]) 456 | return pdg 457 | 458 | def main(): 459 | j = JoernSteps() 460 | j.connectToDatabase() 461 | all_func_node = getALLFuncNode(j) 462 | for node in all_func_node: 463 | testID = getFuncFile(j, node._id).split('/')[-2] 464 | path = os.path.join("pdg_db", testID) 465 | 466 | store_file_name = node.properties['name'] + '_' + str(node._id) 467 | 468 | store_path = os.path.join(path, store_file_name) 469 | if os.path.exists(store_path): 470 | continue 471 | 472 | initpdg = translatePDGByNode(j, node)#get init PDG 473 | opt_pdg_1 = modifyStmtNode(initpdg)#merge every statement node 474 | 475 | cfg_path = os.path.join("cfg_db", testID, store_file_name) 476 | for _file in os.listdir(cfg_path): 477 | if _file == 'dict_if2cfgnode': 478 | fin = open(os.path.join(cfg_path, _file)) 479 | dict_if2cfgnode = pickle.load(fin) 480 | fin.close() 481 | 482 | elif _file == 'dict_cfgnode2if': 483 | fin = open(os.path.join(cfg_path, _file)) 484 | dict_cfgnode2if = pickle.load(fin) 485 | fin.close() 486 | 487 | else: 488 | print cfg_path 489 | fin = open(os.path.join(cfg_path, _file)) 490 | cfg = pickle.load(fin) 491 | fin.close() 492 | 493 | i = 0 494 | while i < opt_pdg_1.vcount(): 495 | if opt_pdg_1.vs[i]['type'] == 'Statement' and opt_pdg_1.vs[i]['name'] not in cfg.vs['name']: 496 | for n in cfg.vs: 497 | if opt_pdg_1.vs[i]['code'] == n['code'] and int(opt_pdg_1.vs[i]['location'].split(':')[0]) == int(n['location'].split(':')[0]): 498 | opt_pdg_1.vs[i]['name'] = n['name'] 499 | opt_pdg_1.vs[i]['location'] = n['location'] 500 | break 501 | else: 502 | continue 503 | 504 | i += 1 505 | 506 | d_use, d_def = getUseDefVarByPDG(j, opt_pdg_1)#get use and def nodedict of every cfgnode 507 | opt_pdg_2 = modifyDataEdgeVal(opt_pdg_1)#not distinguish pointer and buffer it points 508 | 509 | opt_pdg_3 = completeDeclStmtOfPDG(opt_pdg_2, d_use, d_def, dict_if2cfgnode, dict_cfgnode2if) 510 | 511 | opt_pdg_4 = completeDataEdgeOfPDG(opt_pdg_3, d_use, d_def, dict_if2cfgnode, dict_cfgnode2if)#add data edge to get more info 512 | 513 | opted_pdg_5 = addDataEdgeOfObject(opt_pdg_4, dict_if2cfgnode, dict_cfgnode2if) 514 | 515 | #opted_pdg=deleteCDG(opted_pdg_5) 516 | 517 | 518 | if not os.path.exists(path): 519 | os.mkdir(path) 520 | print store_path, path 521 | f = open(store_path, 'wb') 522 | pickle.dump(opted_pdg_5, f, True) 523 | f.close() 524 | 525 | 526 | if __name__ == '__main__': 527 | main() 528 | 529 | 530 | 531 | -------------------------------------------------------------------------------- /data_preprocess.py: -------------------------------------------------------------------------------- 1 | ## coding:utf-8 2 | 3 | import pickle 4 | import os 5 | 6 | slice_path = './slices/' 7 | label_path = './label_source/' 8 | folder_path = './slice_label/' 9 | for filename in os.listdir(slice_path): 10 | if filename.endswith('.txt') is False: 11 | continue 12 | print(filename) 13 | filepath = os.path.join(slice_path,filename) 14 | f = open(filepath,'r') 15 | slicelists = f.read().split('------------------------------') 16 | f.close() 17 | labelpath = os.path.join(label_path,filename[:-4]+'.pkl') 18 | f = open(labelpath,'rb') 19 | labellists = pickle.load(f) 20 | if isinstance(labellists,tuple): 21 | labellists = labellists[0] 22 | f.close() 23 | 24 | if slicelists[0] == '': 25 | del slicelists[0] 26 | if slicelists[-1] == '' or slicelists[-1] == '\n' or slicelists[-1] == '\r\n': 27 | del slicelists[-1] 28 | 29 | file_path = os.path.join(folder_path,filename) 30 | f = open(file_path,'a+') 31 | index = -1 32 | for slicelist in slicelists: 33 | index += 1 34 | sentences = slicelist.split('\n') 35 | if sentences[0] == '\r' or sentences[0] == '': 36 | del sentences[0] 37 | if sentences == []: 38 | continue 39 | if sentences[-1] == '': 40 | del sentences[-1] 41 | if sentences[-1] == '\r': 42 | del sentences[-1] 43 | labellist = labellists[index] 44 | for labels in labellist: 45 | if labels: 46 | label = 1 47 | else: 48 | label = 0 49 | for sentence in sentences: 50 | f.write(str(sentence)+'\n') 51 | f.write(str(label)+'\n') 52 | f.write('------------------------------'+'\n') 53 | f.close() 54 | print('\success!') 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /extract_df.py: -------------------------------------------------------------------------------- 1 | ## coding:utf-8 2 | from joern.all import JoernSteps 3 | from igraph import * 4 | from access_db_operate import * 5 | from slice_op import * 6 | from py2neo.packages.httpstream import http 7 | http.socket_timeout = 9999 8 | 9 | 10 | def get_slice_file_sequence(store_filepath, list_result, count, func_name, startline, filepath_all): 11 | list_for_line = [] 12 | statement_line = 0 13 | vulnline_row = 0 14 | list_write2file = [] 15 | 16 | for node in list_result: 17 | if node['type'] == 'Function': 18 | f2 = open(node['filepath'], 'r') 19 | content = f2.readlines() 20 | f2.close() 21 | raw = int(node['location'].split(':')[0])-1 22 | code = content[raw].strip() 23 | 24 | new_code = "" 25 | if code.find("#define") != -1: 26 | list_write2file.append(code + ' ' + str(raw+1) + '\n') 27 | continue 28 | 29 | while (len(code) >= 1 and code[-1] != ')' and code[-1] != '{'): 30 | if code.find('{') != -1: 31 | index = code.index('{') 32 | new_code += code[:index].strip() 33 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 34 | break 35 | 36 | else: 37 | new_code += code + '\n' 38 | raw += 1 39 | code = content[raw].strip() 40 | #print "raw", raw, code 41 | 42 | else: 43 | new_code += code 44 | new_code = new_code.strip() 45 | if new_code[-1] == '{': 46 | new_code = new_code[:-1].strip() 47 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 48 | #list_line.append(str(raw+1)) 49 | else: 50 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 51 | #list_line.append(str(raw+1)) 52 | 53 | elif node['type'] == 'Condition': 54 | raw = int(node['location'].split(':')[0])-1 55 | if raw in list_for_line: 56 | continue 57 | else: 58 | #print node['type'], node['code'], node['name'] 59 | f2 = open(node['filepath'], 'r') 60 | content = f2.readlines() 61 | f2.close() 62 | code = content[raw].strip() 63 | pattern = re.compile("(?:if|while|for|switch)") 64 | #print code 65 | res = re.search(pattern, code) 66 | if res == None: 67 | raw = raw - 1 68 | code = content[raw].strip() 69 | new_code = "" 70 | 71 | while (code[-1] != ')' and code[-1] != '{'): 72 | if code.find('{') != -1: 73 | index = code.index('{') 74 | new_code += code[:index].strip() 75 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 76 | #list_line.append(str(raw+1)) 77 | list_for_line.append(raw) 78 | break 79 | 80 | else: 81 | new_code += code + '\n' 82 | list_for_line.append(raw) 83 | raw += 1 84 | code = content[raw].strip() 85 | 86 | else: 87 | new_code += code 88 | new_code = new_code.strip() 89 | if new_code[-1] == '{': 90 | new_code = new_code[:-1].strip() 91 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 92 | #list_line.append(str(raw+1)) 93 | list_for_line.append(raw) 94 | 95 | else: 96 | list_for_line.append(raw) 97 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 98 | #list_line.append(str(raw+1)) 99 | 100 | else: 101 | res = res.group() 102 | if res == '': 103 | print filepath_all + ' ' + func_name + " error!" 104 | exit() 105 | 106 | elif res != 'for': 107 | new_code = res + ' ( ' + node['code'] + ' ) ' 108 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 109 | #list_line.append(str(raw+1)) 110 | 111 | else: 112 | new_code = "" 113 | if code.find(' for ') != -1: 114 | code = 'for ' + code.split(' for ')[1] 115 | 116 | while code != '' and code[-1] != ')' and code[-1] != '{': 117 | if code.find('{') != -1: 118 | index = code.index('{') 119 | new_code += code[:index].strip() 120 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 121 | #list_line.append(str(raw+1)) 122 | list_for_line.append(raw) 123 | break 124 | 125 | elif code[-1] == ';' and code[:-1].count(';') >= 2: 126 | new_code += code 127 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 128 | #list_line.append(str(raw+1)) 129 | list_for_line.append(raw) 130 | break 131 | 132 | else: 133 | new_code += code + '\n' 134 | list_for_line.append(raw) 135 | raw += 1 136 | code = content[raw].strip() 137 | 138 | else: 139 | new_code += code 140 | new_code = new_code.strip() 141 | if new_code[-1] == '{': 142 | new_code = new_code[:-1].strip() 143 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 144 | #list_line.append(str(raw+1)) 145 | list_for_line.append(raw) 146 | 147 | else: 148 | list_for_line.append(raw) 149 | list_write2file.append(new_code + ' ' + str(raw+1) + '\n') 150 | #list_line.append(str(raw+1)) 151 | 152 | elif node['type'] == 'Label': 153 | f2 = open(node['filepath'], 'r') 154 | content = f2.readlines() 155 | f2.close() 156 | raw = int(node['location'].split(':')[0])-1 157 | code = content[raw].strip() 158 | list_write2file.append(code + ' ' + str(raw+1) + '\n') 159 | #list_line.append(str(raw+1)) 160 | 161 | elif node['type'] == 'ForInit': 162 | continue 163 | 164 | elif node['type'] == 'Parameter': 165 | if list_result[0]['type'] != 'Function': 166 | row = node['location'].split(':')[0] 167 | list_write2file.append(node['code'] + ' ' + str(row) + '\n') 168 | #list_line.append(row) 169 | else: 170 | continue 171 | 172 | elif node['type'] == 'IdentifierDeclStatement': 173 | if node['code'].strip().split(' ')[0] == "undef": 174 | f2 = open(node['filepath'], 'r') 175 | content = f2.readlines() 176 | f2.close() 177 | raw = int(node['location'].split(':')[0])-1 178 | code1 = content[raw].strip() 179 | list_code2 = node['code'].strip().split(' ') 180 | i = 0 181 | while i < len(list_code2): 182 | if code1.find(list_code2[i]) != -1: 183 | del list_code2[i] 184 | else: 185 | break 186 | code2 = ' '.join(list_code2) 187 | 188 | list_write2file.append(code1 + ' ' + str(raw+1) + '\n' + code2 + ' ' + str(raw+2) + '\n') 189 | 190 | else: 191 | list_write2file.append(node['code'] + ' ' + node['location'].split(':')[0] + '\n') 192 | 193 | elif node['type'] == 'ExpressionStatement': 194 | row = int(node['location'].split(':')[0])-1 195 | if row in list_for_line: 196 | continue 197 | 198 | if node['code'] in ['\n', '\t', ' ', '']: 199 | list_write2file.append(node['code'] + ' ' + str(row+1) + '\n') 200 | #list_line.append(row+1) 201 | elif node['code'].strip()[-1] != ';': 202 | list_write2file.append(node['code'] + '; ' + str(row+1) + '\n') 203 | #list_line.append(row+1) 204 | else: 205 | list_write2file.append(node['code'] + ' ' + str(row+1) + '\n') 206 | #list_line.append(row+1) 207 | 208 | elif node['type'] == "Statement": 209 | row = node['location'].split(':')[0] 210 | list_write2file.append(node['code'] + ' ' + str(row) + '\n') 211 | #list_line.append(row+1) 212 | 213 | else: 214 | #print node['name'], node['code'], node['type'], node['filepath'] 215 | if node['location'] == None: 216 | continue 217 | f2 = open(node['filepath'], 'r') 218 | content = f2.readlines() 219 | f2.close() 220 | row = int(node['location'].split(':')[0])-1 221 | code = content[row].strip() 222 | if row in list_for_line: 223 | continue 224 | 225 | else: 226 | list_write2file.append(node['code'] + ' ' + str(row+1) + '\n') 227 | #list_line.append(str(row+1)) 228 | 229 | f = open(store_filepath, 'a') 230 | f.write(str(count) + ' ' + filepath_all + ' ' + func_name + ' ' + startline + '\n') 231 | for wb in list_write2file: 232 | f.write(wb) 233 | f.write('------------------------------' + '\n') 234 | f.close() 235 | 236 | 237 | def program_slice(pdg, startnodesID, slicetype, testID):#process startnodes as a list, because main func has many different arguments 238 | list_startnodes = [] 239 | if pdg == False or pdg == None: 240 | return [], [], [] 241 | 242 | for node in pdg.vs: 243 | #print node['functionId'] 244 | if node['name'] in startnodesID: 245 | list_startnodes.append(node) 246 | 247 | if list_startnodes == []: 248 | return [], [], [] 249 | 250 | if slicetype == 0:#backwords 251 | start_line = list_startnodes[0]['location'].split(':')[0] 252 | start_name = list_startnodes[0]['name'] 253 | startline_path = list_startnodes[0]['filepath'] 254 | results_back = program_slice_backwards(pdg, list_startnodes) 255 | 256 | not_scan_func_list = [] 257 | results_back, temp = process_cross_func(results_back, testID, 1, results_back, not_scan_func_list) 258 | 259 | 260 | return [results_back], start_line, startline_path 261 | 262 | elif slicetype == 1:#forwords 263 | print "start extract forword dataflow!" 264 | print list_startnodes, startnodesID 265 | start_line = list_startnodes[0]['location'].split(':')[0] 266 | start_name = list_startnodes[0]['name'] 267 | startline_path = list_startnodes[0]['filepath'] 268 | results_for = program_slice_forward(pdg, list_startnodes) 269 | 270 | not_scan_func_list = [] 271 | results_for, temp = process_cross_func(results_for, testID, 1, results_for, not_scan_func_list) 272 | 273 | return [results_for], start_line, startline_path 274 | 275 | else:#bi_direction 276 | print "start extract backwords dataflow!" 277 | 278 | start_line = list_startnodes[0]['location'].split(':')[0] 279 | start_name = list_startnodes[0]['name'] 280 | startline_path = list_startnodes[0]['filepath'] 281 | results_back = program_slice_backwards(pdg, list_startnodes)#results_back is a list of nodes 282 | 283 | results_for = program_slice_forward(pdg, list_startnodes) 284 | 285 | 286 | _list_name = [] 287 | for node_back in results_back: 288 | _list_name.append(node_back['name']) 289 | 290 | for node_for in results_for: 291 | if node_for['name'] in _list_name: 292 | continue 293 | else: 294 | results_back.append(node_for) 295 | 296 | results_back = sortedNodesByLoc(results_back) 297 | 298 | iter_times = 0 299 | start_list = [[results_back, iter_times]] 300 | i = 0 301 | not_scan_func_list = [] 302 | list_cross_func_back, not_scan_func_list = process_crossfuncs_back_byfirstnode(start_list, testID, i, not_scan_func_list) 303 | list_results_back = [l[0] for l in list_cross_func_back] 304 | 305 | all_result = [] 306 | for results_back in list_results_back: 307 | index = 1 308 | for a_node in results_back: 309 | if a_node['name'] == start_name: 310 | break 311 | else: 312 | index += 1 313 | 314 | list_to_crossfunc_back = results_back[:index] 315 | list_to_crossfunc_for = results_back[index:] 316 | 317 | list_to_crossfunc_back, temp = process_cross_func(list_to_crossfunc_back, testID, 0, list_to_crossfunc_back, not_scan_func_list) 318 | 319 | list_to_crossfunc_for, temp = process_cross_func(list_to_crossfunc_for, testID, 1, list_to_crossfunc_for, not_scan_func_list) 320 | 321 | all_result.append(list_to_crossfunc_back + list_to_crossfunc_for) 322 | 323 | 324 | return all_result, start_line, startline_path 325 | 326 | 327 | def api_slice(): 328 | count = 1 329 | store_filepath = "api_slices.txt" 330 | f = open("sensifunc_slice_points.pkl", 'rb') 331 | dict_unsliced_sensifunc = pickle.load(f) 332 | f.close() 333 | for key in dict_unsliced_sensifunc.keys():#key is testID 334 | 335 | for _t in dict_unsliced_sensifunc[key]: 336 | list_sensitive_funcid = _t[0] 337 | pdg_funcid = _t[1] 338 | sensitive_funcname = _t[2] 339 | 340 | if sensitive_funcname.find("main") != -1: 341 | continue #todo 342 | else: 343 | slice_dir = 2 344 | pdg = getFuncPDGById(key, pdg_funcid) 345 | if pdg == False: 346 | print 'error' 347 | exit() 348 | 349 | list_code, startline, startline_path = program_slice(pdg, list_sensitive_funcid, slice_dir, key) 350 | #print len(list_code) 351 | 352 | if list_code == []: 353 | fout = open("error.txt", 'a') 354 | fout.write(sensitive_funcname + ' ' + str(list_sensitive_funcid) + ' found nothing! \n') 355 | fout.close() 356 | else: 357 | for _list in list_code: 358 | get_slice_file_sequence(store_filepath, _list, count, sensitive_funcname, startline, startline_path) 359 | count += 1 360 | 361 | def pointers_slice(): 362 | count = 1 363 | store_filepath = "pointersuse_slices.txt" 364 | f = open("pointuse_slice_points.pkl", 'rb') 365 | dict_unsliced_pointers = pickle.load(f) 366 | print dict_unsliced_pointers 367 | f.close() 368 | 369 | #l = ['CVE-2013-4921', 'CVE-2013-4514', 'CVE-2015-1158', 'CVE-2015-1159', 'CVE-2005-3806', 'CVE-2012-6647', 'CVE-2012-2123', 'CVE-2015-0239', 'CVE-2013-2596', 'CVE-2008-5713', 'CVE-2015-2192', 'CVE-2015-2191', 'CVE-2006-5751', 'CVE-2014-1690', 'CVE-2012-5354', 'CVE-2008-3527', 'CVE-2004-1151', 'CVE-2011-0059', 'CVE-2008-3833', 'CVE-2010-4258', 'CVE-2014-2241', 'CVE-2011-2689', 'CVE-2011-2723', 'CVE-2014-4655', 'CVE-2014-4654', 'CVE-2010-0727', 'CVE-2014-4656', 'CVE-2014-4652', 'CVE-2009-2909', 'CVE-2008-1514', 'CVE-2014-3534', 'CVE-2014-3537', 'CVE-2012-1947', 'CVE-2012-5670', 'CVE-2011-1759', 'CVE-2011-1750', 'CVE-2007-0006', 'CVE-2010-4805', 'CVE-2013-2015', 'CVE-2014-3122', 'CVE-2011-0085', 'CVE-2011-0084', 'CVE-2011-0083', 'CVE-2007-6151', 'CVE-2009-3547', 'CVE-2012-0044', 'CVE-2014-8133', 'CVE-2009-3238', 'CVE-2012-0041', 'CVE-2009-3234', 'CVE-2013-4220', 'CVE-2014-0203', 'CVE-2011-1138', 'CVE-2005-3807', 'CVE-2014-3523', 'CVE-2013-0854', 'CVE-2010-3877', 'CVE-2013-0913', 'CVE-2013-1732', 'CVE-2014-8884', 'CVE-2013-1735', 'CVE-2013-1736', 'CVE-2013-0914', 'CVE-2010-2960', 'CVE-2010-2962', 'CVE-2010-2240', 'CVE-2009-0946', 'CVE-2012-3984', 'CVE-2010-1224', 'CVE-2014-1498', 'CVE-2012-6617', 'CVE-2012-6616', 'CVE-2010-0437', 'CVE-2010-1188', 'CVE-2012-2652', 'CVE-2006-4790', 'CVE-2013-0867', 'CVE-2013-0866', 'CVE-2014-1522', 'CVE-2013-0864', 'CVE-2013-0863', 'CVE-2010-3880', 'CVE-2013-0861', 'CVE-2013-0860', 'CVE-2014-3511', 'CVE-2013-0869', 'CVE-2013-0868', 'CVE-2008-5029', 'CVE-2006-4813', 'CVE-2011-0716', 'CVE-2013-1848', 'CVE-2008-5025', 'CVE-2011-0711', 'CVE-2011-0710', 'CVE-2013-0764', 'CVE-2005-2261', 'CVE-2010-2500', 'CVE-2013-0761', 'CVE-2012-1090', 'CVE-2014-0155', 'CVE-2012-1097', 'CVE-2009-3640', 'CVE-2011-3363', 'CVE-2011-3362', 'CVE-2015-2922', 'CVE-2012-0464', 'CVE-2014-2099', 'CVE-2014-9661', 'CVE-2014-9665', 'CVE-2014-8712', 'CVE-2014-8713', 'CVE-2014-8714', 'CVE-2014-7841', 'CVE-2014-7842', 'CVE-2012-3377', 'CVE-2014-1552', 'CVE-2012-0855', 'CVE-2009-0675', 'CVE-2012-5237', 'CVE-2010-4346', 'CVE-2014-1950', 'CVE-2012-5238', 'CVE-2009-1961', 'CVE-2014-9584', 'CVE-2010-2226', 'CVE-2015-0562', 'CVE-2013-0166', 'CVE-2014-5271', 'CVE-2014-5272', 'CVE-2014-3470', 'CVE-2015-0204', 'CVE-2008-1390', 'CVE-2011-1080', 'CVE-2012-1146', 'CVE-2011-3944', 'CVE-2011-2896', 'CVE-2012-3430', 'CVE-2008-3276', 'CVE-2008-3275', 'CVE-2008-3272', 'CVE-2012-2776', 'CVE-2013-4933', 'CVE-2013-4587', 'CVE-2009-0935', 'CVE-2011-1712', 'CVE-2013-0796', 'CVE-2010-4656', 'CVE-2010-2478', 'CVE-2015-0228', 'CVE-2009-0269', 'CVE-2013-1573', 'CVE-2013-4929', 'CVE-2013-6339', 'CVE-2012-3979', 'CVE-2010-4163', 'CVE-2012-3976', 'CVE-2012-2802', 'CVE-2010-4649', 'CVE-2012-3972', 'CVE-2010-4165', 'CVE-2009-0859', 'CVE-2009-3722', 'CVE-2012-4186', 'CVE-2012-4184', 'CVE-2009-3726', 'CVE-2012-2313', 'CVE-2011-2535', 'CVE-2011-2534', 'CVE-2011-2536', 'CVE-2010-3080', 'CVE-2012-0957', 'CVE-2011-3936', 'CVE-2012-1952', 'CVE-2011-3934', 'CVE-2012-1956', 'CVE-2012-1955', 'CVE-2010-3855', 'CVE-2010-3858', 'CVE-2012-1958', 'CVE-2013-4162', 'CVE-2012-3975', 'CVE-2009-3290', 'CVE-2012-4204', 'CVE-2012-0451', 'CVE-2012-4207', 'CVE-2014-1737', 'CVE-2013-0755', 'CVE-2014-1738', 'CVE-2012-3962', 'CVE-2013-0756', 'CVE-2013-0750', 'CVE-2010-4073', 'CVE-2005-2555', 'CVE-2010-2495', 'CVE-2012-2136', 'CVE-2012-2137', 'CVE-2010-2499', 'CVE-2015-3814', 'CVE-2015-3811', 'CVE-2005-2492', 'CVE-2015-3813', 'CVE-2015-3812', 'CVE-2013-0849', 'CVE-2014-3633', 'CVE-2014-3631', 'CVE-2012-0457', 'CVE-2012-0456', 'CVE-2005-4635', 'CVE-2013-7100', 'CVE-2011-2999', 'CVE-2011-2998', 'CVE-2010-2521', 'CVE-2011-2988', 'CVE-2006-5619', 'CVE-2009-3080', 'CVE-2010-4668', 'CVE-2013-2232', 'CVE-2013-2237', 'CVE-2014-7933', 'CVE-2011-1173', 'CVE-2013-4163', 'CVE-2013-3562', 'CVE-2013-3560', 'CVE-2010-3066', 'CVE-2015-5949', 'CVE-2005-3848', 'CVE-2006-2935', 'CVE-2006-2934', 'CVE-2010-1488', 'CVE-2005-3847', 'CVE-2009-4410', 'CVE-2013-4265', 'CVE-2013-4264', 'CVE-2009-3621', 'CVE-2013-0799', 'CVE-2013-1709', 'CVE-2011-3660', 'CVE-2011-3661', 'CVE-2015-2666', 'CVE-2013-0792', 'CVE-2013-4348', 'CVE-2015-0292', 'CVE-2013-2548', 'CVE-2012-1976', 'CVE-2013-6367', 'CVE-2006-1525', 'CVE-2010-0006', 'CVE-2010-1148', 'CVE-2014-8109', 'CVE-2010-0007', 'CVE-2013-1796', 'CVE-2013-0753', 'CVE-2011-4611', 'CVE-2013-1798', 'CVE-2008-2826', 'CVE-2011-4348', 'CVE-2013-6449', 'CVE-2014-1874', 'CVE-2010-0003', 'CVE-2011-1479', 'CVE-2013-1708', 'CVE-2013-7113', 'CVE-2013-1700', 'CVE-2013-1705', 'CVE-2013-1704', 'CVE-2013-1707', 'CVE-2010-1173', 'CVE-2010-2068', 'CVE-2006-1530', 'CVE-2012-2390', 'CVE-2009-1439', 'CVE-2012-2393', 'CVE-2011-3648', 'CVE-2012-6062', 'CVE-2015-4652', 'CVE-2011-1598', 'CVE-2013-4081', 'CVE-2007-4997', 'CVE-2013-4083', 'CVE-2013-4082', 'CVE-2011-1592', 'CVE-2012-6060', 'CVE-2009-1338', 'CVE-2006-4997', 'CVE-2013-7264', 'CVE-2012-5669', 'CVE-2006-6333', 'CVE-2013-1581', 'CVE-2013-6673', 'CVE-2012-0458', 'CVE-2013-0845', 'CVE-2010-3861', 'CVE-2012-4293', 'CVE-2012-4292', 'CVE-2012-4565', 'CVE-2009-4021', 'CVE-2014-6431', 'CVE-2014-6430', 'CVE-2014-4943', 'CVE-2012-4298', 'CVE-2011-1927', 'CVE-2011-1023', 'CVE-2007-1592', 'CVE-2009-0747', 'CVE-2009-0746', 'CVE-2011-1147', 'CVE-2012-5240', 'CVE-2014-1642', 'CVE-2012-2787', 'CVE-2012-2786', 'CVE-2012-0045', 'CVE-2012-2783', 'CVE-2013-4300', 'CVE-2012-2788', 'CVE-2006-2445', 'CVE-2011-0521', 'CVE-2006-2446', 'CVE-2011-2984', 'CVE-2015-0253', 'CVE-2014-8369', 'CVE-2014-0206', 'CVE-2006-2448', 'CVE-2008-3792', 'CVE-2011-2909', 'CVE-2010-2798', 'CVE-2009-1046', 'CVE-2014-2907', 'CVE-2014-3186', 'CVE-2013-3231', 'CVE-2013-3230', 'CVE-2011-2906', 'CVE-2013-3234', 'CVE-2007-1217', 'CVE-2014-1497', 'CVE-2011-2588', 'CVE-2013-1696', 'CVE-2011-2587', 'CVE-2013-1693', 'CVE-2012-2669', 'CVE-2011-2378', 'CVE-2011-2373', 'CVE-2008-4989', 'CVE-2011-2371', 'CVE-2010-4347', 'CVE-2014-4048', 'CVE-2011-3619', 'CVE-2010-4343', 'CVE-2010-4342', 'CVE-2010-4263', 'CVE-2013-2128', 'CVE-2013-5717', 'CVE-2014-9319', 'CVE-2014-9318', 'CVE-2013-2234', 'CVE-2013-7339', 'CVE-2014-9316', 'CVE-2013-5719', 'CVE-2013-1572', 'CVE-2013-1576', 'CVE-2011-4579', 'CVE-2010-1748', 'CVE-2013-1578', 'CVE-2012-0477', 'CVE-2014-3181', 'CVE-2014-3182', 'CVE-2014-3183', 'CVE-2014-3184', 'CVE-2014-3185', 'CVE-2006-5158', 'CVE-2013-0872', 'CVE-2013-0873', 'CVE-2013-0874', 'CVE-2013-3302', 'CVE-2013-0877', 'CVE-2013-0878', 'CVE-2011-3973', 'CVE-2009-3888', 'CVE-2013-4534', 'CVE-2015-6243', 'CVE-2015-6242', 'CVE-2013-4533', 'CVE-2013-4125', 'CVE-2014-8412', 'CVE-2013-4129', 'CVE-2015-6249', 'CVE-2011-1146', 'CVE-2011-1079', 'CVE-2015-6241', 'CVE-2010-1636', 'CVE-2014-0160', 'CVE-2013-0865', 'CVE-2012-6638', 'CVE-2010-3298', 'CVE-2012-6539', 'CVE-2010-1088', 'CVE-2014-9679', 'CVE-2010-1083', 'CVE-2014-9676', 'CVE-2012-6061', 'CVE-2010-1087', 'CVE-2010-1086', 'CVE-2010-1085', 'CVE-2009-3612', 'CVE-2015-3395', 'CVE-2013-7022', 'CVE-2013-7023', 'CVE-2013-7021', 'CVE-2013-7026', 'CVE-2013-7027', 'CVE-2013-7024', 'CVE-2013-5634', 'CVE-2012-3364', 'CVE-2012-0042', 'CVE-2008-0420', 'CVE-2011-1776', 'CVE-2010-3772', 'CVE-2005-4886', 'CVE-2014-2894', 'CVE-2011-1770', 'CVE-2010-3774', 'CVE-2005-3359', 'CVE-2013-1954', 'CVE-2014-5206', 'CVE-2012-2100', 'CVE-2014-4608', 'CVE-2009-2407', 'CVE-2005-3356', 'CVE-2011-1171', 'CVE-2010-2806', 'CVE-2013-7015', 'CVE-2010-2803', 'CVE-2014-3640', 'CVE-2009-2768', 'CVE-2010-2808', 'CVE-2009-0065', 'CVE-2013-4511', 'CVE-2008-3915', 'CVE-2010-2519', 'CVE-2012-4530', 'CVE-2014-2309', 'CVE-2014-7145', 'CVE-2010-3078', 'CVE-2007-6206', 'CVE-2007-4571', 'CVE-2010-2071', 'CVE-2013-1792', 'CVE-2011-2707', 'CVE-2011-3000', 'CVE-2011-2700', 'CVE-2011-3658', 'CVE-2013-4270', 'CVE-2011-3654', 'CVE-2011-3653', 'CVE-2014-9683', 'CVE-2005-3857', 'CVE-2014-1445', 'CVE-2013-5618', 'CVE-2013-1958', 'CVE-2009-2287', 'CVE-2013-0782', 'CVE-2011-1180', 'CVE-2011-1182', 'CVE-2013-6671', 'CVE-2013-3076', 'CVE-2013-5613', 'CVE-2013-5599', 'CVE-2009-0787', 'CVE-2011-1573', 'CVE-2010-2937', 'CVE-2007-1000', 'CVE-2013-2276', 'CVE-2013-5593', 'CVE-2013-4079', 'CVE-2011-1477', 'CVE-2013-5597', 'CVE-2009-0028', 'CVE-2014-1488', 'CVE-2008-4210', 'CVE-2014-1481', 'CVE-2014-1487', 'CVE-2010-2066', 'CVE-2013-5601', 'CVE-2015-3808', 'CVE-2015-3809', 'CVE-2013-7281', 'CVE-2014-3601', 'CVE-2011-0073', 'CVE-2013-4470', 'CVE-2013-0859', 'CVE-2012-4288', 'CVE-2012-4289', 'CVE-2012-0444', 'CVE-2011-2987', 'CVE-2013-6450', 'CVE-2012-4285', 'CVE-2012-4287', 'CVE-2014-9419', 'CVE-2013-6457', 'CVE-2013-2058', 'CVE-2010-4256', 'CVE-2010-4251', 'CVE-2014-2739', 'CVE-2014-6424', 'CVE-2011-0055', 'CVE-2011-0051', 'CVE-2014-2097', 'CVE-2012-3969', 'CVE-2012-1183', 'CVE-2012-1184', 'CVE-2010-4078', 'CVE-2013-0848', 'CVE-2010-4074', 'CVE-2008-5134', 'CVE-2010-4076', 'CVE-2012-3964', 'CVE-2012-3966', 'CVE-2010-4072', 'CVE-2009-3638', 'CVE-2013-2930', 'CVE-2014-9672', 'CVE-2012-6537', 'CVE-2012-4190', 'CVE-2014-1446', 'CVE-2014-2523', 'CVE-2014-1509', 'CVE-2014-6423', 'CVE-2014-1502', 'CVE-2012-1945', 'CVE-2012-1946', 'CVE-2010-3864', 'CVE-2012-1940', 'CVE-2013-0844', 'CVE-2012-1942', 'CVE-2014-0195', 'CVE-2010-3904', 'CVE-2013-7112', 'CVE-2010-3907', 'CVE-2009-1360', 'CVE-2014-7825', 'CVE-2006-1864', 'CVE-2013-4153', 'CVE-2013-4151', 'CVE-2013-4150', 'CVE-2013-1828', 'CVE-2013-4401', 'CVE-2014-6426', 'CVE-2011-1044', 'CVE-2009-1630', 'CVE-2012-0023', 'CVE-2011-4031', 'CVE-2012-2800', 'CVE-2012-2801', 'CVE-2010-2062', 'CVE-2012-2803', 'CVE-2013-7014', 'CVE-2008-2931', 'CVE-2012-6540', 'CVE-2013-4247', 'CVE-2011-3649', 'CVE-2015-3008', 'CVE-2012-0068', 'CVE-2012-6542', 'CVE-2011-3191', 'CVE-2014-9743', 'CVE-2013-1929', 'CVE-2009-2406', 'CVE-2006-0039', 'CVE-2009-1337', 'CVE-2014-1510', 'CVE-2011-3484', 'CVE-2006-0035', 'CVE-2013-5600', 'CVE-2012-2319', 'CVE-2013-1920', 'CVE-2013-7265', 'CVE-2010-3850', 'CVE-2013-7267', 'CVE-2014-2286', 'CVE-2011-4102', 'CVE-2012-5668', 'CVE-2011-4100', 'CVE-2010-3859', 'CVE-2015-4490', 'CVE-2010-4079', 'CVE-2014-1739', 'CVE-2012-0056', 'CVE-2011-1078', 'CVE-2011-4086', 'CVE-2014-0196', 'CVE-2013-3235', 'CVE-2013-6167', 'CVE-2014-8546', 'CVE-2015-3417', 'CVE-2011-3623', 'CVE-2014-0205', 'CVE-2014-6426', 'CVE-2014-6427', 'CVE-2014-6428', 'CVE-2014-6429', 'CVE-2005-2800', 'CVE-2014-7826', 'CVE-2014-1438', 'CVE-2012-6618', 'CVE-2012-6541', 'CVE-2015-0820', 'CVE-2015-0823', 'CVE-2011-1833', 'CVE-2009-1897', 'CVE-2012-2790', 'CVE-2012-2791', 'CVE-2012-2793', 'CVE-2012-2794', 'CVE-2009-3002', 'CVE-2009-3001', 'CVE-2012-2797', 'CVE-2011-2182', 'CVE-2011-2183', 'CVE-2011-2184', 'CVE-2013-5651', 'CVE-2009-2844', 'CVE-2009-2846', 'CVE-2014-3510', 'CVE-2014-8541', 'CVE-2013-3225', 'CVE-2013-3226', 'CVE-2014-8542', 'CVE-2014-2851', 'CVE-2014-8544', 'CVE-2013-3222', 'CVE-2010-3084', 'CVE-2014-8549', 'CVE-2013-4936', 'CVE-2013-3228', 'CVE-2013-3229', 'CVE-2011-3192', 'CVE-2013-0268', 'CVE-2013-1763', 'CVE-2011-1019', 'CVE-2013-1767', 'CVE-2012-2796', 'CVE-2013-1680', 'CVE-2012-0066', 'CVE-2011-1160', 'CVE-2011-0069', 'CVE-2013-2140', 'CVE-2011-2364', 'CVE-2011-2367', 'CVE-2014-2299', 'CVE-2011-1493', 'CVE-2011-2368', 'CVE-2011-1495', 'CVE-2014-0221', 'CVE-2013-4205', 'CVE-2013-2486', 'CVE-2013-2094', 'CVE-2013-7266', 'CVE-2015-6246', 'CVE-2013-4928', 'CVE-2011-0014', 'CVE-2013-2481', 'CVE-2011-1012', 'CVE-2011-1010', 'CVE-2008-5079', 'CVE-2010-4527', 'CVE-2010-4526', 'CVE-2015-6654', 'CVE-2013-4592', 'CVE-2013-4591', 'CVE-2010-1437', 'CVE-2011-2484', 'CVE-2011-2482', 'CVE-2014-8643', 'CVE-2006-0557', 'CVE-2011-3946', 'CVE-2011-3945', 'CVE-2014-8160', 'CVE-2014-9428', 'CVE-2011-3941', 'CVE-2013-1860', 'CVE-2014-9420', 'CVE-2011-3949', 'CVE-2013-4296', 'CVE-2013-4297', 'CVE-2013-2495', 'CVE-2012-2779', 'CVE-2013-4931', 'CVE-2013-4930', 'CVE-2013-6399', 'CVE-2013-4932', 'CVE-2013-4934', 'CVE-2014-1549', 'CVE-2009-2847', 'CVE-2013-0311', 'CVE-2013-0310', 'CVE-2013-0313', 'CVE-2011-1771', 'CVE-2010-1641', 'CVE-2014-0077', 'CVE-2012-6057', 'CVE-2012-6056', 'CVE-2012-6055', 'CVE-2012-6054', 'CVE-2012-6053', 'CVE-2012-3957', 'CVE-2014-5471', 'CVE-2014-9603', 'CVE-2009-3624', 'CVE-2014-9604', 'CVE-2012-6059', 'CVE-2012-6058', 'CVE-2013-7017', 'CVE-2013-2230', 'CVE-2014-2673', 'CVE-2014-2672', 'CVE-2013-7013', 'CVE-2013-7012', 'CVE-2013-7011', 'CVE-2011-0006', 'CVE-2013-0791', 'CVE-2013-0790', 'CVE-2013-0793', 'CVE-2012-5532', 'CVE-2013-0795', 'CVE-2014-8543', 'CVE-2013-7019', 'CVE-2013-7018', 'CVE-2009-2484', 'CVE-2009-4307', 'CVE-2013-1819', 'CVE-2012-1973', 'CVE-2014-8545', 'CVE-2009-4308', 'CVE-2012-2745', 'CVE-2011-3950', 'CVE-2011-1747', 'CVE-2014-8547', 'CVE-2011-1745', 'CVE-2011-2928', 'CVE-2014-2889', 'CVE-2010-0741', 'CVE-2011-3002', 'CVE-2011-3003', 'CVE-2012-6657', 'CVE-2009-1389', 'CVE-2009-4895', 'CVE-2008-5700', 'CVE-2009-1385', 'CVE-2006-5462', 'CVE-2006-5749', 'CVE-2013-7010', 'CVE-2008-3686', 'CVE-2014-1684', 'CVE-2012-3553', 'CVE-2009-1192', 'CVE-2015-3331', 'CVE-2008-2750', 'CVE-2009-4005', 'CVE-2015-3339', 'CVE-2010-4648', 'CVE-2009-2691', 'CVE-2011-2605', 'CVE-2014-7283', 'CVE-2011-4101', 'CVE-2013-7268', 'CVE-2013-7269', 'CVE-2013-2206', 'CVE-2011-0726', 'CVE-2010-3429', 'CVE-2014-2038', 'CVE-2009-1527', 'CVE-2014-1508', 'CVE-2004-0535', 'CVE-2011-2216', 'CVE-2012-0452', 'CVE-2011-4326', 'CVE-2011-4324', 'CVE-2011-2213', 'CVE-2011-4081', 'CVE-2011-4087', 'CVE-2012-0058', 'CVE-2008-1294', 'CVE-2010-4080', 'CVE-2010-4081', 'CVE-2009-4138', 'CVE-2010-4083', 'CVE-2011-2174', 'CVE-2014-4027', 'CVE-2011-3637', 'CVE-2009-3228', 'CVE-2009-0031', 'CVE-2013-1727', 'CVE-2013-1726', 'CVE-2011-1684', 'CVE-2010-4242', 'CVE-2014-6432', 'CVE-2007-3642', 'CVE-2013-4399', 'CVE-2010-4248', 'CVE-2014-3687', 'CVE-2012-3991', 'CVE-2007-4521', 'CVE-2014-0038', 'CVE-2010-3432', 'CVE-2013-6336', 'CVE-2013-2634', 'CVE-2013-2635', 'CVE-2013-2636', 'CVE-2009-1298', 'CVE-2012-0207', 'CVE-2009-2651', 'CVE-2006-2778', 'CVE-2012-2375', 'CVE-2013-0852', 'CVE-2006-1856', 'CVE-2006-1855', 'CVE-2013-0851', 'CVE-2013-0856', 'CVE-2010-3875', 'CVE-2010-3876', 'CVE-2013-0855', 'CVE-2013-1583', 'CVE-2013-1582', 'CVE-2013-1580', 'CVE-2013-1587', 'CVE-2013-1586', 'CVE-2013-1584', 'CVE-2013-6436', 'CVE-2013-1588', 'CVE-2013-4149', 'CVE-2010-1162', 'CVE-2010-4243', 'CVE-2010-2537', 'CVE-2013-0778', 'CVE-2010-2248', 'CVE-2013-0772', 'CVE-2013-0771', 'CVE-2010-0623', 'CVE-2013-0777', 'CVE-2010-2538', 'CVE-2013-0774', 'CVE-2011-0021', 'CVE-2011-3353', 'CVE-2012-0478', 'CVE-2014-3610', 'CVE-2014-3611', 'CVE-2012-0475', 'CVE-2012-0474', 'CVE-2012-0471', 'CVE-2012-0470', 'CVE-2014-9656', 'CVE-2014-9657', 'CVE-2005-4618', 'CVE-2006-3741', 'CVE-2014-8709', 'CVE-2014-9658', 'CVE-2013-0290', 'CVE-2013-3227', 'CVE-2012-4461', 'CVE-2009-1336', 'CVE-2014-7937', 'CVE-2012-1595', 'CVE-2012-1594', 'CVE-2015-0834', 'CVE-2015-0833', 'CVE-2013-4563', 'CVE-2012-0067', 'CVE-2006-6106', 'CVE-2011-2175', 'CVE-2011-2365', 'CVE-2014-4667', 'CVE-2005-2617', 'CVE-2010-0307', 'CVE-2014-4174', 'CVE-2013-5641', 'CVE-2013-5642', 'CVE-2011-1093', 'CVE-2013-6891', 'CVE-2014-3509', 'CVE-2013-1722', 'CVE-2010-2431', 'CVE-2013-3559', 'CVE-2013-3557', 'CVE-2013-4541', 'CVE-2014-8416', 'CVE-2014-8415', 'CVE-2014-8414', 'CVE-2011-1175', 'CVE-2013-1676', 'CVE-2011-1170', 'CVE-2011-0070', 'CVE-2013-1672', 'CVE-2011-2022', 'CVE-2012-1583', 'CVE-2013-1679', 'CVE-2013-1678', 'CVE-2011-0079', 'CVE-2012-6538', 'CVE-2014-2289', 'CVE-2014-2282', 'CVE-2014-2283', 'CVE-2010-3296', 'CVE-2011-1959', 'CVE-2011-3670', 'CVE-2010-3297', 'CVE-2013-4353', 'CVE-2009-1243', 'CVE-2009-1242', 'CVE-2010-2954', 'CVE-2010-2955', 'CVE-2014-9374', 'CVE-2008-4445', 'CVE-2012-2774', 'CVE-2013-2488', 'CVE-2013-1979', 'CVE-2011-4594', 'CVE-2009-0835', 'CVE-2013-6378', 'CVE-2011-4598', 'CVE-2011-2496', 'CVE-2012-6548', 'CVE-2014-5472', 'CVE-2013-2478', 'CVE-2009-2692', 'CVE-2013-2476', 'CVE-2011-4352', 'CVE-2012-2775', 'CVE-2009-2698', 'CVE-2014-8173', 'CVE-2013-3673', 'CVE-2013-3672', 'CVE-2013-3670', 'CVE-2011-1172', 'CVE-2013-3675', 'CVE-2013-3674', 'CVE-2012-6547', 'CVE-2009-0676', 'CVE-2013-6380', 'CVE-2013-6381', 'CVE-2012-6543', 'CVE-2013-6383', 'CVE-2013-4513', 'CVE-2013-4512', 'CVE-2009-4141', 'CVE-2012-4467', 'CVE-2013-4516', 'CVE-2013-4515', 'CVE-2013-1774', 'CVE-2013-2547', 'CVE-2011-1748', 'CVE-2008-4302', 'CVE-2011-1076', 'CVE-2011-1746', 'CVE-2008-4554', 'CVE-2014-3153', 'CVE-2014-9529', 'CVE-2013-2852', 'CVE-2005-3181', 'CVE-2011-1581', 'CVE-2015-3636', 'CVE-2011-1957', 'CVE-2013-1957', 'CVE-2014-5045', 'CVE-2010-3015', 'CVE-2012-1961', 'CVE-2013-0850', 'CVE-2011-2518', 'CVE-2013-4514', 'CVE-2013-4922', 'CVE-2013-4923', 'CVE-2013-4921', 'CVE-2013-4927', 'CVE-2013-4924', 'CVE-2010-5313', 'CVE-2010-4650', 'CVE-2010-4158', 'CVE-2014-0069', 'CVE-2010-4157', 'CVE-2013-2850', 'CVE-2010-2492'] 370 | #l = ['CVE-2016-5278', 'CVE-2015-5154', 'CVE-2016-9576', 'CVE-2016-2808', 'CVE-2016-1930', 'CVE-2016-2532', 'CVE-2015-4521', 'CVE-2015-4522', 'CVE-2015-7203', 'CVE-2016-5126', 'CVE-2017-6348', 'CVE-2015-8961', 'CVE-2015-8962', 'CVE-2016-5275', 'CVE-2016-4439', 'CVE-2016-7908', 'CVE-2016-7154', 'CVE-2015-4036', 'CVE-2015-3456', 'CVE-2015-2740', 'CVE-2016-3134', 'CVE-2015-5283', 'CVE-2016-9776', 'CVE-2016-7155', 'CVE-2016-9101', 'CVE-2016-7156', 'CVE-2016-2818', 'CVE-2015-8363', 'CVE-2015-7194', 'CVE-2016-6511', 'CVE-2016-5264', 'CVE-2015-5307', 'CVE-2015-4002', 'CVE-2016-9373', 'CVE-2016-1583', 'CVE-2016-7180', 'CVE-2016-1935', 'CVE-2015-2729', 'CVE-2016-5238', 'CVE-2016-1714', 'CVE-2015-0829', 'CVE-2016-2838', 'CVE-2016-2529', 'CVE-2015-4511', 'CVE-2016-4082', 'CVE-2015-4513', 'CVE-2015-4512', 'CVE-2015-2739', 'CVE-2015-8953', 'CVE-2013-4542', 'CVE-2016-9923', 'CVE-2016-2327', 'CVE-2016-2329', 'CVE-2016-2328', 'CVE-2016-1970', 'CVE-2015-8817', 'CVE-2016-1974', 'CVE-2016-5829', 'CVE-2016-2847', 'CVE-2016-7161', 'CVE-2017-6474', 'CVE-2017-6470', 'CVE-2016-2530', 'CVE-2016-6507', 'CVE-2016-6506', 'CVE-2015-4517', 'CVE-2015-0830', 'CVE-2016-6508', 'CVE-2016-4002', 'CVE-2015-8785', 'CVE-2015-4500', 'CVE-2015-4501', 'CVE-2016-4006', 'CVE-2015-4504', 'CVE-2015-4487', 'CVE-2015-2724', 'CVE-2015-2725', 'CVE-2016-8909', 'CVE-2016-2330', 'CVE-2016-4805', 'CVE-2015-7178', 'CVE-2015-7179', 'CVE-2015-7176', 'CVE-2015-7177', 'CVE-2015-7174', 'CVE-2015-7175', 'CVE-2016-9104', 'CVE-2015-3906', 'CVE-2016-5280', 'CVE-2016-6513', 'CVE-2016-2814', 'CVE-2016-2819', 'CVE-2015-8365', 'CVE-2016-4952', 'CVE-2016-8910', 'CVE-2016-7910', 'CVE-2016-7913', 'CVE-2016-7912', 'CVE-2017-6214', 'CVE-2016-10154', 'CVE-2016-6351', 'CVE-2016-9685', 'CVE-2016-1957', 'CVE-2016-1956', 'CVE-2016-6213', 'CVE-2016-1953', 'CVE-2016-1952', 'CVE-2016-7425', 'CVE-2015-6252', 'CVE-2015-1872', 'CVE-2015-8663', 'CVE-2015-8662', 'CVE-2015-8661', 'CVE-2014-5388', 'CVE-2016-4080', 'CVE-2015-4482', 'CVE-2015-1339', 'CVE-2016-5728', 'CVE-2015-4484', 'CVE-2015-1333', 'CVE-2016-4998', 'CVE-2016-2550', 'CVE-2016-9103', 'CVE-2016-3156','CVE-2016-4952', 'CVE-2016-9923', 'CVE-2016-9685', 'CVE-2016-2329', 'CVE-2015-0829', 'CVE-2016-2838', 'CVE-2016-2529', 'CVE-2015-4511', 'CVE-2015-4513', 'CVE-2015-4512', 'CVE-2015-4517', 'CVE-2016-4082', 'CVE-2015-8953', 'CVE-2015-4511', 'CVE-2015-0829', 'CVE-2016-2838', 'CVE-2016-2529', 'CVE-2015-0824', 'CVE-2016-2837', 'CVE-2016-2836', 'CVE-2016-2523', 'CVE-2016-2522', 'CVE-2016-7179', 'CVE-2016-7177', 'CVE-2016-7176', 'CVE-2016-8658', 'CVE-2015-3209', 'CVE-2016-7170', 'CVE-2016-2824', 'CVE-2016-5106', 'CVE-2016-5107', 'CVE-2016-6833', 'CVE-2015-5366', 'CVE-2016-6835', 'CVE-2016-4454', 'CVE-2015-6526', 'CVE-2015-7220', 'CVE-2015-7221', 'CVE-2016-5276', 'CVE-2016-5277', 'CVE-2016-5274', 'CVE-2015-4651', 'CVE-2017-5547', 'CVE-2016-9793', 'CVE-2015-7180', 'CVE-2016-3955', 'CVE-2014-0182', 'CVE-2015-3810', 'CVE-2016-4453', 'CVE-2015-4493', 'CVE-2016-6828', 'CVE-2015-2710', 'CVE-2015-7217', 'CVE-2016-7094', 'CVE-2016-4441', 'CVE-2015-5156', 'CVE-2015-4473', 'CVE-2016-4079', 'CVE-2015-5364', 'CVE-2015-6820', 'CVE-2015-3815', 'CVE-2015-7198', 'CVE-2015-7199', 'CVE-2015-5158', 'CVE-2017-6353', 'CVE-2016-9376', 'CVE-2015-2712', 'CVE-2016-4568', 'CVE-2015-2716', 'CVE-2016-5258', 'CVE-2016-5259', 'CVE-2015-7512', 'CVE-2015-8554', 'CVE-2015-7201', 'CVE-2015-7202', 'CVE-2016-5254', 'CVE-2016-5255', 'CVE-2016-5256', 'CVE-2016-5257', 'CVE-2016-2213', 'CVE-2017-5548', 'CVE-2016-0718', 'CVE-2016-7042', 'CVE-2015-0827', 'CVE-2015-0826', 'CVE-2016-4568', 'CVE-2015-2716', 'CVE-2016-7466', 'CVE-2016-8666', 'CVE-2015-8743', 'CVE-2016-3062', 'CVE-2015-3214', 'CVE-2016-5258', 'CVE-2016-5259', 'CVE-2015-7512', 'CVE-2015-8554', 'CVE-2015-7201', 'CVE-2015-7202', 'CVE-2016-5254', 'CVE-2016-5255', 'CVE-2016-5256', 'CVE-2016-5257', 'CVE-2016-2213', 'CVE-2017-5548', 'CVE-2016-0718', 'CVE-2016-1981', 'CVE-2015-2726', 'CVE-2016-5400', 'CVE-2016-5403', 'CVE-2016-2857', 'CVE-2015-1779', 'CVE-2016-7042', 'CVE-2015-0827', 'CVE-2015-0826', 'CVE-2016-4568', 'CVE-2015-2716', 'CVE-2016-7466', 'CVE-2016-8666', 'CVE-2015-8743', 'CVE-2016-1977', 'CVE-2016-3062', 'CVE-2015-3214', 'CVE-2015-0824', 'CVE-2016-2837', 'CVE-2016-2836', 'CVE-2016-2523', 'CVE-2016-2522', 'CVE-2016-7179', 'CVE-2016-7177', 'CVE-2016-7176', 'CVE-2016-8658', 'CVE-2015-3209', 'CVE-2016-7170'] 371 | #print len(list(set(l))) 372 | #exit() 373 | l = ['CVE-2015-4521', 'CVE-2015-4482', 'CVE-2016-2824', 'CVE-2015-4487', 'CVE-2014-2894', 'CVE-2015-4484', 'CVE-2016-4002', 'CVE-2015-2729', 'CVE-2015-4500', 'CVE-2015-4501', 'CVE-2016-5238', 'CVE-2014-5263', 'CVE-2015-2726', 'CVE-2013-4526', 'CVE-2014-0223', 'CVE-2013-4527', 'CVE-2016-2814', 'CVE-2015-7178', 'CVE-2015-7179', 'CVE-2013-4530', 'CVE-2013-4533', 'CVE-2015-8662', 'CVE-2015-7176', 'CVE-2016-1714', 'CVE-2015-7174', 'CVE-2015-7175', 'CVE-2016-9104', 'CVE-2016-5280', 'CVE-2016-9101', 'CVE-2016-9103', 'CVE-2016-2819', 'CVE-2016-2818', 'CVE-2015-0829', 'CVE-2016-4952', 'CVE-2015-4511', 'CVE-2015-4512', 'CVE-2015-4517', 'CVE-2014-9676', 'CVE-2013-6399', 'CVE-2016-8910', 'CVE-2013-0866', 'CVE-2013-4542', 'CVE-2015-3395', 'CVE-2016-6351', 'CVE-2016-9923', 'CVE-2013-0860', 'CVE-2016-1957', 'CVE-2016-1956', 'CVE-2013-7020', 'CVE-2013-7021', 'CVE-2016-1953', 'CVE-2016-1952', 'CVE-2013-0868', 'CVE-2014-8541', 'CVE-2016-1970', 'CVE-2014-9319', 'CVE-2014-8542', 'CVE-2016-7421', 'CVE-2014-8544', 'CVE-2014-8547', 'CVE-2016-7161', 'CVE-2014-7937', 'CVE-2015-1872', 'CVE-2014-9317', 'CVE-2014-9316', 'CVE-2014-7933', 'CVE-2016-5258', 'CVE-2016-5259', 'CVE-2015-7512', 'CVE-2014-3640', 'CVE-2016-5106', 'CVE-2016-5107', 'CVE-2016-4453', 'CVE-2015-7203', 'CVE-2016-6835', 'CVE-2016-4454', 'CVE-2016-5256', 'CVE-2016-5257', 'CVE-2015-7220', 'CVE-2015-7221', 'CVE-2015-4504', 'CVE-2016-7170', 'CVE-2016-5278', 'CVE-2015-7180', 'CVE-2016-1981', 'CVE-2016-8909', 'CVE-2016-2836', 'CVE-2016-2857', 'CVE-2013-0858', 'CVE-2014-0182', 'CVE-2013-0856', 'CVE-2013-0857', 'CVE-2016-5403', 'CVE-2014-2099', 'CVE-2014-2098', 'CVE-2015-1779', 'CVE-2016-6833', 'CVE-2014-2097', 'CVE-2015-4493', 'CVE-2015-0825', 'CVE-2015-0824', 'CVE-2016-2837', 'CVE-2015-0826', 'CVE-2016-5254', 'CVE-2016-4441', 'CVE-2015-7194', 'CVE-2015-6820', 'CVE-2013-4149', 'CVE-2015-7198', 'CVE-2015-7199', 'CVE-2015-2710', 'CVE-2015-2712', 'CVE-2013-7022', 'CVE-2013-7023', 'CVE-2013-0845', 'CVE-2016-7466', 'CVE-2015-7202', 'CVE-2013-4151', 'CVE-2013-4150', 'CVE-2016-8668'] 374 | for key in dict_unsliced_pointers.keys():#key is testID 375 | print key 376 | if key in l: 377 | continue 378 | 379 | for _t in dict_unsliced_pointers[key]: 380 | list_pointers_funcid = _t[0] 381 | pdg_funcid = _t[1] 382 | print key, pdg_funcid 383 | pointers_name = str(_t[2]) 384 | 385 | 386 | slice_dir = 2 387 | pdg = getFuncPDGById(key, pdg_funcid) 388 | if pdg == False: 389 | print 'error' 390 | exit() 391 | 392 | list_code, startline, startline_path = program_slice(pdg, list_pointers_funcid, slice_dir, key) 393 | 394 | if list_code == []: 395 | fout = open("error.txt", 'a') 396 | fout.write(pointers_name + ' ' + str(list_pointers_funcid) + ' found nothing! \n') 397 | fout.close() 398 | else: 399 | for _list in list_code: 400 | get_slice_file_sequence(store_filepath, _list, count, pointers_name, startline, startline_path) 401 | count += 1 402 | 403 | 404 | def arrays_slice(): 405 | count = 1 406 | store_filepath = "arraysuse_slices.txt" 407 | f = open("arrayuse_slice_points.pkl", 'rb') 408 | dict_unsliced_pointers = pickle.load(f) 409 | f.close() 410 | #l = ['CVE-2010-2068', 'CVE-2015-1158', 'CVE-2006-1530', 'CVE-2012-2802', 'CVE-2010-4165', 'CVE-2014-3523', 'CVE-2012-6062', 'CVE-2013-1672', 'CVE-2007-4997', 'CVE-2013-4082', 'CVE-2012-4186', 'CVE-2013-4512', 'CVE-2013-6450', 'CVE-2011-2534', 'CVE-2014-1690', 'CVE-2011-2536', 'CVE-2012-2319', 'CVE-2012-0957', 'CVE-2011-3936', 'CVE-2004-1151', 'CVE-2013-4929', 'CVE-2010-3296', 'CVE-2011-4102', 'CVE-2012-5668', 'CVE-2011-4100', 'CVE-2011-1959', 'CVE-2012-3969', 'CVE-2012-1183', 'CVE-2011-0726', 'CVE-2013-0756', 'CVE-2004-0535', 'CVE-2010-2495', 'CVE-2012-2393', 'CVE-2015-3811', 'CVE-2012-2776', 'CVE-2009-2909', 'CVE-2014-3633', 'CVE-2014-1508', 'CVE-2011-2529', 'CVE-2014-3537', 'CVE-2012-1947', 'CVE-2013-0844', 'CVE-2012-1942', 'CVE-2014-0195', 'CVE-2012-4293', 'CVE-2012-4292', 'CVE-2008-1390', 'CVE-2011-0021', 'CVE-2012-3991', 'CVE-2007-4521', 'CVE-2009-0746', 'CVE-2011-1147', 'CVE-2012-5240', 'CVE-2013-2634', 'CVE-2014-8133', 'CVE-2006-2778', 'CVE-2012-4288', 'CVE-2015-0253', 'CVE-2012-0444', 'CVE-2013-1726', 'CVE-2013-7112', 'CVE-2006-1856', 'CVE-2013-0850', 'CVE-2011-3623', 'CVE-2013-1582', 'CVE-2013-1732', 'CVE-2014-8884', 'CVE-2013-0772', 'CVE-2014-9374', 'CVE-2014-1497', 'CVE-2014-0221', 'CVE-2013-1696', 'CVE-2011-1833', 'CVE-2013-1693', 'CVE-2013-0872', 'CVE-2012-2790', 'CVE-2012-2791', 'CVE-2012-2796', 'CVE-2012-0477', 'CVE-2012-2652', 'CVE-2006-4790', 'CVE-2013-0867', 'CVE-2013-4932', 'CVE-2013-0860', 'CVE-2014-3511', 'CVE-2014-3510', 'CVE-2013-0868', 'CVE-2014-8541', 'CVE-2014-2739', 'CVE-2014-9319', 'CVE-2006-4813', 'CVE-2014-8544', 'CVE-2011-3973', 'CVE-2013-1848', 'CVE-2014-9316', 'CVE-2012-1594', 'CVE-2013-1573', 'CVE-2012-0068', 'CVE-2015-0833', 'CVE-2010-1748', 'CVE-2012-0067', 'CVE-2011-3362', 'CVE-2014-3182', 'CVE-2013-5641', 'CVE-2013-5642', 'CVE-2011-3484', 'CVE-2013-6891', 'CVE-2014-8712', 'CVE-2014-8713', 'CVE-2014-8714', 'CVE-2013-4534', 'CVE-2010-2431', 'CVE-2014-8412', 'CVE-2011-1175', 'CVE-2012-5237', 'CVE-2011-1173', 'CVE-2012-5238', 'CVE-2014-4611', 'CVE-2015-0564', 'CVE-2014-5271', 'CVE-2011-0055', 'CVE-2014-3470', 'CVE-2014-8643', 'CVE-2015-0204', 'CVE-2014-2286', 'CVE-2012-6537', 'CVE-2011-3945', 'CVE-2011-3944', 'CVE-2011-2896', 'CVE-2010-2955', 'CVE-2013-2495', 'CVE-2013-4931', 'CVE-2013-4933', 'CVE-2012-2775', 'CVE-2013-4934', 'CVE-2013-4936', 'CVE-2011-4594', 'CVE-2014-6424', 'CVE-2013-0311', 'CVE-2011-4598', 'CVE-2006-2935', 'CVE-2011-4352', 'CVE-2012-1184', 'CVE-2005-3356', 'CVE-2012-6059', 'CVE-2012-6058', 'CVE-2011-3950', 'CVE-2014-9672', 'CVE-2010-2803', 'CVE-2013-7011', 'CVE-2013-3674', 'CVE-2009-0676', 'CVE-2013-6380', 'CVE-2009-2768', 'CVE-2015-3008', 'CVE-2013-0796', 'CVE-2009-2484', 'CVE-2013-4264', 'CVE-2013-4928', 'CVE-2014-8542', 'CVE-2012-6540', 'CVE-2015-0228', 'CVE-2013-7008', 'CVE-2013-7009'] 411 | l = [] 412 | for key in dict_unsliced_pointers.keys():#key is testID 413 | 414 | if key in l: 415 | continue 416 | 417 | for _t in dict_unsliced_pointers[key]: 418 | list_pointers_funcid = _t[0] 419 | pdg_funcid = _t[1] 420 | print pdg_funcid 421 | arrays_name = str(_t[2]) 422 | 423 | 424 | slice_dir = 2 425 | pdg = getFuncPDGById(key, pdg_funcid) 426 | if pdg == False: 427 | print 'error' 428 | exit() 429 | 430 | list_code, startline, startline_path = program_slice(pdg, list_pointers_funcid, slice_dir, key) 431 | 432 | if list_code == []: 433 | fout = open("error.txt", 'a') 434 | fout.write(arrays_name + ' ' + str(list_pointers_funcid) + ' found nothing! \n') 435 | fout.close() 436 | else: 437 | for _list in list_code: 438 | get_slice_file_sequence(store_filepath, _list, count, arrays_name, startline, startline_path) 439 | count += 1 440 | 441 | 442 | def integeroverflow_slice(): 443 | count = 1 444 | store_filepath = "integeroverflow_slices.txt" 445 | f = open("integeroverflow_slice_points_new.pkl", 'rb') 446 | dict_unsliced_expr = pickle.load(f) 447 | f.close() 448 | 449 | l = ['CVE-2016-5259', 'CVE-2015-7512', 'CVE-2014-3640', 'CVE-2016-5106', 'CVE-2016-5107', 'CVE-2016-4453', 'CVE-2015-4475', 'CVE-2016-6835', 'CVE-2016-4454', 'CVE-2016-5257', 'CVE-2015-7220', 'CVE-2015-7221', 'CVE-2016-5278', 'CVE-2016-1981', 'CVE-2015-2726', 'CVE-2016-2857', 'CVE-2013-0858', 'CVE-2014-0182', 'CVE-2013-0856', 'CVE-2013-0857', 'CVE-2016-5403', 'CVE-2014-2099', 'CVE-2014-2098', 'CVE-2015-1779', 'CVE-2016-6833', 'CVE-2014-2097', 'CVE-2015-7203', 'CVE-2015-7194', 'CVE-2015-6820', 'CVE-2015-7199', 'CVE-2015-2710', 'CVE-2016-4952', 'CVE-2015-2712', 'CVE-2013-7022', 'CVE-2013-7023', 'CVE-2013-0845', 'CVE-2016-7466', 'CVE-2015-7202', 'CVE-2013-4151', 'CVE-2013-4150', 'CVE-2016-8668'] 450 | for key in dict_unsliced_expr.keys():#key is testID 451 | if key in l: 452 | continue 453 | for _t in dict_unsliced_expr[key]: 454 | list_expr_funcid = _t[0] 455 | pdg_funcid = _t[1] 456 | print pdg_funcid 457 | expr_name = str(_t[2]) 458 | 459 | 460 | slice_dir = 2 461 | pdg = getFuncPDGById(key, pdg_funcid) 462 | if pdg == False: 463 | print 'error' 464 | exit() 465 | 466 | list_code, startline, startline_path = program_slice(pdg, list_expr_funcid, slice_dir, key) 467 | 468 | if list_code == []: 469 | fout = open("error.txt", 'a') 470 | fout.write(expr_name + ' ' + str(list_expr_funcid) + ' found nothing! \n') 471 | fout.close() 472 | else: 473 | for _list in list_code: 474 | get_slice_file_sequence(store_filepath, _list, count, expr_name, startline, startline_path) 475 | count += 1 476 | 477 | 478 | if __name__ == "__main__": 479 | # api_slice() 480 | # pointers_slice() 481 | # arrays_slice() 482 | integeroverflow_slice() 483 | print("All Done.") 484 | -------------------------------------------------------------------------------- /general_op.py: -------------------------------------------------------------------------------- 1 | ## coding:utf-8 2 | import os 3 | import pickle 4 | import re 5 | 6 | list_destparam_0_cpyapi = ['sprintf', 'gets', 'fgets', '_memccpy', '_mbscpy', '_strncpy', 'wmemset', 'vasprintf', 'asprintf', 'wcsncpy', 'lstrcpy', '_wcsncpy', '_snprintf', 'memcpy', 'memmove', '_tcscpy', '_snwprintf', 'strcpy', 'CopyMemory', 'wcsncat', 'vsprintf', 'lstrcpyn', 'vsnprintf', '_mbsncat', 'wmemmove', 'memset', 'wmemcpy', 'strcat', 'fprintf', '_strncat', '_tcsncpy', '_mbsnbcpy', 'strncpy', 'strncat', 'wcscpy', 'snprintf', 'lstrcat'] 7 | list_scanf_api = ['vfscanf', 'fscanf', 'vscanf', 'scanf', 'vsscanf', 'sscanf', 'swscanf'] 8 | list_key_words = [] 9 | 10 | 11 | def del_ctrl_edge(pdg): 12 | i = 0 13 | while i < pdg.ecount(): 14 | if pdg.es[i]['var'] == None: 15 | pdg.delete_edges(i) 16 | else: 17 | i += 1 18 | 19 | return pdg 20 | 21 | 22 | def get_ifname(node_id, dict_if2cfgnode, dict_cfgnode2if): 23 | if_name = '' 24 | min_count = 10000000 25 | for if_n in dict_cfgnode2if[node_id]: 26 | if len(dict_if2cfgnode[if_n]) < min_count: 27 | min_count = len(dict_if2cfgnode[if_n]) 28 | if_name = if_n 29 | else: 30 | continue 31 | 32 | return if_name 33 | 34 | 35 | def isFuncCall(node): 36 | result = getCalleeName(node) 37 | if result != []: 38 | return result 39 | else: 40 | return False 41 | 42 | 43 | def getCalleeName(slicenode): 44 | #get functions' name 45 | code = slicenode['code'] 46 | if slicenode['type'] == "Function": 47 | return [] 48 | 49 | pattern = "((?:_|[A-Za-z])\w*(?:\s(?:\.|::|\->|)\s(?:_|[A-Za-z])\w*)*)\s\(" 50 | result = re.findall(pattern, code) 51 | 52 | i = 0 53 | while i < len(result): 54 | if result[i] in list_key_words: 55 | del result[i] 56 | else: 57 | i += 1 58 | 59 | return result #return is funcnamelist 60 | 61 | 62 | def getFuncPDGBynodeIDAndtestID(list_cfgNodeID_funcID, testID): 63 | _dict = {} 64 | for _tuple in list_cfgNodeID_funcID: 65 | cfgNodeID = _tuple[0] 66 | func_id = _tuple[1] 67 | path = os.path.join('pdg_db', testID) 68 | for _file in os.listdir(path): 69 | if _file.split('_')[-1] == func_id: 70 | fpath = os.path.join(path, _file) 71 | fin = open(fpath, 'rb') 72 | pdg = pickle.load(fin) 73 | _dict[cfgNodeID] = pdg 74 | fin.close() 75 | break 76 | 77 | return _dict 78 | 79 | 80 | def getFuncPDGBynodeIDAndtestID_noctrl(list_cfgNodeID_funcID, testID): 81 | _dict = {} 82 | for _tuple in list_cfgNodeID_funcID: 83 | cfgNodeID = _tuple[0] 84 | func_id = _tuple[1] 85 | for _dir in os.listdir("pdg/"): 86 | list_testid = os.listdir(os.path.join("pdg/", _dir)) 87 | 88 | if testID not in list_testid: 89 | continue 90 | 91 | else: 92 | path = os.path.join("pdg/", _dir, testID) 93 | for _file in os.listdir(path): 94 | if _file.split('_')[-1] == func_id: 95 | fpath = os.path.join(path, _file) 96 | fin = open(fpath, 'rb') 97 | pdg = pickle.load(fin) 98 | _dict[cfgNodeID] = pdg 99 | fin.close() 100 | break 101 | 102 | return _dict 103 | 104 | 105 | def getFuncPDGByfuncIDAndtestID(func_ID, testID): 106 | path = os.path.join('pdg_db', testID) 107 | pdg = False 108 | for _file in os.listdir(path): 109 | if _file.split('_')[-1] == str(func_ID): 110 | fpath = os.path.join(path, _file) 111 | fin = open(fpath, 'rb') 112 | pdg = pickle.load(fin) 113 | fin.close() 114 | break 115 | 116 | return pdg 117 | 118 | 119 | def getFuncPDGByfuncIDAndtestID_noctrl(func_ID, testID, _type): 120 | pdg = False 121 | for _dir in os.listdir("pdg/"): 122 | list_testid = os.listdir(os.path.join("pdg/", _dir)) 123 | 124 | if testID not in list_testid: 125 | continue 126 | 127 | else: 128 | path = os.path.join("pdg/", _dir, testID) 129 | for _file in os.listdir(path): 130 | if _file.split('_')[-1] == str(func_ID): 131 | fpath = os.path.join(path, _file) 132 | fin = open(fpath, 'rb') 133 | pdg = pickle.load(fin) 134 | fin.close() 135 | break 136 | 137 | return pdg 138 | 139 | 140 | def getReturnVarOfAPI(code): 141 | for api in list_destparam_0_cpyapi: 142 | if code.find(api + ' ') != -1: 143 | _list = code.split(api + ' ') 144 | if _list[0] == '' and _list[1][0] == '(': 145 | var = _list[1].split(',')[0].replace('(', '').strip() 146 | if var.find(' & ') > -1: 147 | var = var.split(' & ')[1] 148 | 149 | if var.find(' + ') != -1: 150 | var = var.split(' + ')[0] 151 | if var.find(' . ') != -1: 152 | _list = [var] 153 | var_1 = [] 154 | while var.find(' . ') != -1: 155 | var_1.append(var.split(' . ')[0]) 156 | _list.append(' . '.join(var_1)) 157 | var = ' . '.join(var.split(' . ')[1:]) 158 | 159 | return _list 160 | 161 | elif var.find(' -> ') != -1: 162 | _list = [var] 163 | var_1 = [] 164 | while var.find(' -> ') != -1: 165 | var_1.append(var.split(' -> ')[0]) 166 | _list.append(' -> '.join(var_1)) 167 | var = ' -> '.join(var.split(' -> ')[1:]) 168 | 169 | return _list 170 | 171 | else: 172 | return [var] 173 | 174 | elif var.find(' - ') != -1: 175 | var = var.split(' - ')[0] 176 | if var.find(' . ') != -1: 177 | _list = [var] 178 | var_1 = [] 179 | while var.find(' . ') != -1: 180 | var_1.append(var.split(' . ')[0]) 181 | _list.append(' . '.join(var_1)) 182 | var = ' . '.join(var.split(' . ')[1:]) 183 | 184 | return _list 185 | 186 | elif var.find(' -> ') != -1: 187 | _list = [var] 188 | var_1 = [] 189 | while var.find(' -> ') != -1: 190 | var_1.append(var.split(' -> ')[0]) 191 | _list.append(' -> '.join(var_1)) 192 | var = ' -> '.join(var.split(' -> ')[1:]) 193 | 194 | return _list 195 | 196 | else: 197 | return [var] 198 | 199 | elif var.find(' * ') != -1: 200 | temp = var.split(' * ')[1] 201 | if temp[0] == ')': 202 | var = temp[1:].strip() 203 | else: 204 | var = var.split(' * ')[0] 205 | 206 | if var.find(' . ') != -1: 207 | _list = [var] 208 | var_1 = [] 209 | while var.find(' . ') != -1: 210 | var_1.append(var.split(' . ')[0]) 211 | _list.append(' . '.join(var_1)) 212 | var = ' . '.join(var.split(' . ')[1:]) 213 | 214 | return _list 215 | 216 | elif var.find(' -> ') != -1: 217 | _list = [var] 218 | var_1 = [] 219 | while var.find(' -> ') != -1: 220 | var_1.append(var.split(' -> ')[0]) 221 | _list.append(' -> '.join(var_1)) 222 | var = ' -> '.join(var.split(' -> ')[1:]) 223 | 224 | return _list 225 | 226 | else: 227 | return [var] 228 | 229 | elif var.find(' . ') != -1: 230 | _list = [var] 231 | var_1 = [] 232 | while var.find(' . ') != -1: 233 | var_1.append(var.split(' . ')[0]) 234 | _list.append(' . '.join(var_1)) 235 | var = ' . '.join(var.split(' . ')[1:]) 236 | 237 | return _list 238 | 239 | elif var.find(' -> ') != -1: 240 | _list = [var] 241 | var_1 = [] 242 | while var.find(' -> ') != -1: 243 | var_1.append(var.split(' -> ')[0]) 244 | _list.append(' -> '.join(var_1)) 245 | var = ' -> '.join(var.split(' -> ')[1:]) 246 | 247 | return _list 248 | 249 | else: 250 | return [var] 251 | 252 | else: 253 | continue 254 | 255 | for scanfapi in list_scanf_api: 256 | if scanfapi in ['fscanf', 'sscanf', 'swscanf', 'vfscanf', 'vsscanf']: 257 | if code.find(scanfapi + ' ') != -1: 258 | _list = code.split(scanfapi+' ') 259 | if _list[0] == '' and _list[1][0] == '(': 260 | list_var = _list[1].split(',')[2:] 261 | list_var = [var.replace('(', '').strip() for var in list_var] 262 | new_list_var = [] 263 | for var in list_var: 264 | if var.find(' & ') > -1: 265 | var = var.split(' & ')[1] 266 | 267 | if var.find(' + ') > -1: 268 | var = var.split(' + ')[0] 269 | if var.find(' . ') != -1: 270 | _list = [var] 271 | var_1 = [] 272 | while var.find(' . ') != -1: 273 | var_1.append(var.split(' . ')[0]) 274 | _list.append(' . '.join(var_1)) 275 | var = ' . '.join(var.split(' . ')[1:]) 276 | 277 | new_list_var += _list 278 | 279 | elif var.find(' -> ') != -1: 280 | _list = [var] 281 | var_1 = [] 282 | while var.find(' -> ') != -1: 283 | var_1.append(var.split(' -> ')[0]) 284 | _list.append(' -> '.join(var_1)) 285 | var = ' -> '.join(var.split(' -> ')[1:]) 286 | 287 | new_list_var += _list 288 | 289 | else: 290 | new_list_var.append(var) 291 | 292 | elif var.find(' - ') != -1: 293 | var = var.split(' - ')[0] 294 | if var.find(' . ') != -1: 295 | _list = [var] 296 | var_1 = [] 297 | while var.find(' . ') != -1: 298 | var_1.append(var.split(' . ')[0]) 299 | _list.append(' . '.join(var_1)) 300 | var = ' . '.join(var.split(' . ')[1:]) 301 | 302 | new_list_var += _list 303 | 304 | elif var.find(' -> ') != -1: 305 | _list = [var] 306 | var_1 = [] 307 | while var.find(' -> ') != -1: 308 | var_1.append(var.split(' -> ')[0]) 309 | _list.append(' -> '.join(var_1)) 310 | var = ' -> '.join(var.split(' -> ')[1:]) 311 | 312 | new_list_var += _list 313 | 314 | else: 315 | new_list_var.append(var) 316 | 317 | elif var.find(' * ') != -1: 318 | temp = var.split(' * ')[1] 319 | if temp[0] == ')': 320 | var = temp[1:].strip() 321 | else: 322 | var = var.split(' * ')[0] 323 | 324 | if var.find(' . ') != -1: 325 | _list = [var] 326 | var_1 = [] 327 | while var.find(' . ') != -1: 328 | var_1.append(var.split(' . ')[0]) 329 | _list.append(' . '.join(var_1)) 330 | var = ' . '.join(var.split(' . ')[1:]) 331 | 332 | new_list_var += _list 333 | 334 | elif var.find(' -> ') != -1: 335 | _list = [var] 336 | var_1 = [] 337 | while var.find(' -> ') != -1: 338 | var_1.append(var.split(' -> ')[0]) 339 | _list.append(' -> '.join(var_1)) 340 | var = ' -> '.join(var.split(' -> ')[1:]) 341 | 342 | new_list_var += _list 343 | 344 | else: 345 | new_list_var.append(var) 346 | 347 | elif var.find(' . ') != -1: 348 | _list = [var] 349 | var_1 = [] 350 | while var.find(' . ') != -1: 351 | var_1.append(var.split(' . ')[0]) 352 | _list.append(' . '.join(var_1)) 353 | var = ' . '.join(var.split(' . ')[1:]) 354 | 355 | new_list_var += _list 356 | 357 | elif var.find(' -> ') != -1: 358 | _list = [var] 359 | var_1 = [] 360 | while var.find(' -> ') != -1: 361 | var_1.append(var.split(' -> ')[0]) 362 | _list.append(' -> '.join(var_1)) 363 | var = ' -> '.join(var.split(' -> ')[1:]) 364 | 365 | new_list_var += _list 366 | 367 | else: 368 | new_list_var.append(var) 369 | 370 | return new_list_var 371 | 372 | 373 | elif scanfapi in ['scanf', 'vscanf']: 374 | if code.find(scanfapi) != -1: 375 | _list = code.split(scanfapi + ' ') 376 | if _list[0] == '' and _list[1][0] == '(': 377 | list_var = _list[1].split(',')[1:] 378 | list_var = [var.replace('(', '').strip() for var in list_var] 379 | new_list_var = [] 380 | for var in list_var: 381 | if var.find(' & ') > -1: 382 | var = var.split(' & ')[1] 383 | 384 | if var.find(' + ') != -1: 385 | var = var.split(' + ')[0] 386 | if var.find(' . ') != -1: 387 | _list = [var] 388 | var_1 = [] 389 | while var.find(' . ') != -1: 390 | var_1.append(var.split(' . ')[0]) 391 | _list.append(' . '.join(var_1)) 392 | var = ' . '.join(var.split(' . ')[1:]) 393 | 394 | new_list_var += _list 395 | 396 | else: 397 | new_list_var.append(var) 398 | 399 | elif var.find(' - ') != -1: 400 | var = var.split(' - ')[0] 401 | if var.find(' . ') != -1: 402 | _list = [var] 403 | var_1 = [] 404 | while var.find(' . ') != -1: 405 | var_1.append(var.split(' . ')[0]) 406 | _list.append(' . '.join(var_1)) 407 | var = ' . '.join(var.split(' . ')[1:]) 408 | 409 | new_list_var += _list 410 | 411 | else: 412 | new_list_var.append(var) 413 | 414 | elif var.find(' * ') != -1: 415 | temp = var.split(' * ')[1] 416 | if temp[0] == ')': 417 | var = temp[1:].strip() 418 | else: 419 | var = var.split(' * ')[0] 420 | 421 | if var.find(' . ') != -1: 422 | _list = [var] 423 | var_1 = [] 424 | while var.find(' . ') != -1: 425 | var_1.append(var.split(' . ')[0]) 426 | _list.append(' . '.join(var_1)) 427 | var = ' . '.join(var.split(' . ')[1:]) 428 | 429 | new_list_var += _list 430 | 431 | else: 432 | new_list_var.append(var) 433 | 434 | elif var.find(' . ') != -1: 435 | _list = [var] 436 | var_1 = [] 437 | while var.find(' . ') != -1: 438 | var_1.append(var.split(' . ')[0]) 439 | _list.append(' . '.join(var_1)) 440 | var = ' . '.join(var.split(' . ')[1:]) 441 | 442 | new_list_var += _list 443 | 444 | elif var.find(' -> ') != -1: 445 | _list = [var] 446 | var_1 = [] 447 | while var.find(' -> ') != -1: 448 | var_1.append(var.split(' -> ')[0]) 449 | _list.append(' -> '.join(var_1)) 450 | var = ' -> '.join(var.split(' -> ')[1:]) 451 | 452 | new_list_var += _list 453 | 454 | else: 455 | new_list_var.append(var) 456 | 457 | return new_list_var 458 | 459 | return False 460 | 461 | 462 | def isEdgeExists(pdg, startnode, endnode, var): 463 | for edge in pdg.es: 464 | if pdg.vs[edge.tuple]['name'][0] == startnode and pdg.vs[edge.tuple]['name'][1] == endnode and edge['var'] == var: 465 | return True 466 | else: 467 | continue 468 | 469 | return False 470 | 471 | 472 | def addDataEdge(pdg, startnode, endnode, var): 473 | if isEdgeExists(pdg, startnode, endnode, var): 474 | return pdg 475 | 476 | edge_prop = {'var': var} 477 | pdg.add_edge(startnode, endnode, **edge_prop) 478 | return pdg 479 | 480 | 481 | def getVarOfNode(code): 482 | list_var = [] 483 | if code.find(' = ') != -1: 484 | _list = code.split(' = ')[0].split(' ') 485 | if ']' in _list: 486 | index_right = _list.index(']') 487 | index_left = _list.index('[') 488 | 489 | i = 0 490 | while i < len(_list): 491 | if i < index_left or i > index_right: 492 | list_var.append(_list[i]) 493 | i += 1 494 | 495 | elif code[-1] == ';': 496 | code = code[:-1].strip() 497 | if '(' in code: 498 | list_var = False 499 | else: 500 | list_value = code.split(',')#-1 is ; 501 | for _list in list_value: 502 | _list = code.split(' ') 503 | if '[' in _list: 504 | index = _list.index('[') 505 | var = _list[index-1] 506 | list_var.append(var) 507 | 508 | else: 509 | var = _list[-1] 510 | list_var.append(var) 511 | 512 | else: 513 | if '(' in code: 514 | list_var = False 515 | else: 516 | list_value = code.split(',')#-1 is ; 517 | for _list in list_value: 518 | _list = code.split(' ') 519 | if '[' in _list: 520 | index = _list.index('[') 521 | var = _list[index-1] 522 | list_var.append(var) 523 | 524 | else: 525 | var = _list[-1] 526 | list_var.append(var) 527 | 528 | return list_var 529 | 530 | 531 | def sortedNodesByLoc(list_node): 532 | _list = [] 533 | for node in list_node: 534 | if node['location'] == None: 535 | row = 'inf' 536 | col = 'inf' 537 | else: 538 | row, col = [int(node['location'].split(':')[0]), int(node['location'].split(':')[1])] 539 | _list.append((row, col, node)) 540 | 541 | _list.sort(key=lambda x: (x[0], x[1])) 542 | 543 | 544 | list_ordered_nodes = [_tuple[2] for _tuple in _list] 545 | 546 | return list_ordered_nodes 547 | 548 | 549 | def getFuncPDGById(testID, pdg_funcid): 550 | file_dir = os.path.join("pdg_db", testID) 551 | 552 | for _file in os.listdir(file_dir): 553 | func_id = _file.split('_')[-1] 554 | 555 | if func_id == pdg_funcid: 556 | pdg_path = os.path.join(file_dir, _file) 557 | f = open(pdg_path, 'rb') 558 | pdg = pickle.load(f) 559 | f.close() 560 | 561 | return pdg 562 | 563 | 564 | def getFuncPDGById_noctrl(testID, pdg_funcid): 565 | for _dir in os.listdir("pdg/"): 566 | list_testid = os.listdir(os.path.join("pdg/", _dir)) 567 | 568 | if testID not in list_testid: 569 | continue 570 | else: 571 | file_dir = os.path.join("pdg/", _dir, testID) 572 | 573 | for _file in os.listdir(file_dir): 574 | func_id = _file.split('_')[-1] 575 | 576 | if func_id == pdg_funcid: 577 | pdg_path = os.path.join(file_dir, _file) 578 | f = open(pdg_path, 'rb') 579 | pdg = pickle.load(f) 580 | f.close() 581 | 582 | return pdg 583 | 584 | 585 | def getFuncPDGByNameAndtestID(func_name, testID): 586 | path = os.path.join('pdg_db', testID) 587 | pdg = False 588 | for _file in os.listdir(path): 589 | if '_'.join(_file.split('_')[:-1]) == func_name: 590 | fpath = os.path.join(path, _file) 591 | fin = open(fpath, 'rb') 592 | pdg = pickle.load(fin) 593 | fin.close() 594 | break 595 | 596 | return pdg 597 | 598 | 599 | def getFuncPDGByNameAndtestID_noctrl(func_name, testID): 600 | pdg = False 601 | for _dir in os.listdir("pdg_db/"): 602 | list_testid = os.listdir(os.path.join("pdg_db/", _dir)) 603 | 604 | if testID not in list_testid: 605 | continue 606 | 607 | else: 608 | path = os.path.join('pdg', _dir, testID) 609 | for _file in os.listdir(path): 610 | if '_'.join(_file.split('_')[:-1]) == func_name: 611 | fpath = os.path.join(path, _file) 612 | fin = open(fpath, 'rb') 613 | pdg = pickle.load(fin) 614 | fin.close() 615 | break 616 | 617 | return pdg 618 | 619 | 620 | def isNewOrDelOp(node, testID): 621 | if node['code'].find(' = new ') != -1: 622 | 623 | tempvalue = node['code'].split(' = new ')[1].replace('*', '') 624 | if tempvalue.split(' ')[0] != 'const': 625 | classname = tempvalue.split(' ')[0].strip() 626 | funcname = classname + ' :: ' + classname 627 | return funcname 628 | 629 | else: 630 | classname = tempvalue.split(' ')[1].strip() 631 | funcname = classname + ' :: ' + classname 632 | return funcname 633 | 634 | elif node['code'].find('delete ') != -1: 635 | objectname = node['code'].split('delete ')[1].replace(';', '').strip() 636 | list_s = [] 637 | functionID = node['functionId'] 638 | pdg = getFuncPDGByfuncIDAndtestID(functionID, testID) 639 | for n in pdg.vs: 640 | 641 | if n['name'] == node['name']: 642 | list_s = n.predecessors() 643 | 644 | for edge in pdg.es: 645 | if pdg.vs[edge.tuple[0]] in list_s and pdg.vs[edge.tuple[1]] == n and edge['var'] == objectname: 646 | 647 | start_n = pdg.vs[edge.tuple[0]] 648 | if start_n['code'].find(' = new ') != -1: 649 | 650 | tempvalue = start_n['code'].split(' = new ')[1].replace('*', '') 651 | if tempvalue.split(' ')[0] != 'const': 652 | classname = tempvalue.split(' ')[0].strip() 653 | funcname = classname + ' :: ~' + classname 654 | return funcname 655 | 656 | else: 657 | classname = tempvalue.split(' ')[1].strip() 658 | funcname = classname + ' :: ~' + classname 659 | return funcname 660 | 661 | else: 662 | continue 663 | 664 | return False 665 | 666 | 667 | def isNewOrDelOp_noctrl(node, testID, _type): 668 | if node['code'].find(' = new ') != -1: 669 | tempvalue = node['code'].split(' = new ')[1].replace('*', '') 670 | if tempvalue.split(' ')[0] != 'const': 671 | classname = tempvalue.split(' ')[0].strip() 672 | funcname = classname + ' :: ' + classname 673 | return funcname 674 | 675 | else: 676 | classname = tempvalue.split(' ')[1].strip() 677 | funcname = classname + ' :: ' + classname 678 | return funcname 679 | 680 | elif node['code'].find('delete ') != -1: 681 | objectname = node['code'].split('delete ')[1].replace(';', '').strip() 682 | list_s = [] 683 | functionID = node['functionId'] 684 | 685 | if _type: 686 | pdg = getFuncPDGByfuncIDAndtestID_noctrl(functionID, testID, _type) 687 | else: 688 | pdg = getFuncPDGByfuncIDAndtestID(functionID, testID) 689 | 690 | for n in pdg.vs: 691 | 692 | if n['name'] == node['name']: 693 | list_s = n.predecessors() 694 | 695 | for edge in pdg.es: 696 | if pdg.vs[edge.tuple[0]] in list_s and pdg.vs[edge.tuple[1]] == n and edge['var'] == objectname: 697 | 698 | start_n = pdg.vs[edge.tuple[0]] 699 | if start_n['code'].find(' = new ') != -1: 700 | tempvalue = start_n['code'].split(' = new ')[1].replace('*', '') 701 | if tempvalue.split(' ')[0] != 'const': 702 | classname = tempvalue.split(' ')[0].strip() 703 | funcname = classname + ' :: ~' + classname 704 | return funcname 705 | 706 | else: 707 | classname = tempvalue.split(' ')[1].strip() 708 | funcname = classname + ' :: ~' + classname 709 | return funcname 710 | 711 | else: 712 | continue 713 | 714 | return False 715 | -------------------------------------------------------------------------------- /get_cfg_relation.py: -------------------------------------------------------------------------------- 1 | # -- coding: utf-8 -- 2 | from access_db_operate import * 3 | from complete_PDG import * 4 | import re 5 | from py2neo.packages.httpstream import http 6 | http.socket_timeout = 9999 7 | 8 | 9 | def getSubCFGGraph(startNode, list_node, not_scan_list): 10 | #print "startNode", startNode['code'] 11 | #print "" 12 | if startNode['name'] in not_scan_list or startNode['code'] == 'EXIT': 13 | return list_node, not_scan_list 14 | 15 | else: 16 | list_node.append(startNode) 17 | not_scan_list.append(startNode['name']) 18 | 19 | successors = startNode.successors() 20 | if successors != []: 21 | for p_node in successors: 22 | #print "P_node", p_node['name'], p_node['code'] 23 | list_node, not_scan_list = getSubCFGGraph(p_node, list_node, not_scan_list) 24 | 25 | return list_node, not_scan_list 26 | 27 | print 28 | def getCtrlRealtionOfCFG(cfg): 29 | 30 | list_ifstmt_nodes = [] 31 | for node in cfg.vs: 32 | if node['type'] == 'Condition': 33 | filepath = node['filepath'] 34 | location_row = int(node['location'].split(':')[0]) 35 | fin = open(filepath, 'r') 36 | content = fin.readlines() 37 | fin.close() 38 | src_code = content[location_row-1] 39 | 40 | pattern = re.compile("(?:if|while|for|switch)") 41 | #print src_code, node['name'] 42 | result = re.search(pattern, src_code) 43 | if result == None: 44 | res = 'for' 45 | else: 46 | res = result.group() 47 | if res == '': 48 | print "error!" 49 | exit() 50 | 51 | elif res == 'if': 52 | list_ifstmt_nodes.append(node) 53 | 54 | else: 55 | continue 56 | 57 | else: 58 | continue 59 | 60 | _dict = {} 61 | for if_node in list_ifstmt_nodes: 62 | list_truestmt_nodes = [] 63 | list_falsestmt_nodes = [] 64 | for es in cfg.es: 65 | if cfg.vs[es.tuple[0]] == if_node and es['var'] == 'True': 66 | 67 | start_node = cfg.vs[es.tuple[1]] 68 | 69 | not_scan_list = [if_node['name']] 70 | list_truestmt_nodes, temp = getSubCFGGraph(start_node, list_truestmt_nodes, not_scan_list) 71 | 72 | elif cfg.vs[es.tuple[0]] == if_node and es['var'] == 'False': 73 | 74 | start_node = cfg.vs[es.tuple[1]] 75 | not_scan_list = [if_node['name']] 76 | list_falsestmt_nodes, temp = getSubCFGGraph(start_node, list_falsestmt_nodes, not_scan_list) 77 | 78 | else: 79 | continue 80 | 81 | _share_list = [] 82 | for t_node in list_truestmt_nodes: 83 | if t_node in list_falsestmt_nodes: 84 | _share_list.append(t_node) 85 | else: 86 | continue 87 | 88 | if _share_list != []: 89 | i = 0 90 | while i < len(list_truestmt_nodes): 91 | if list_truestmt_nodes[i] in _share_list: 92 | del list_truestmt_nodes[i] 93 | else: 94 | i += 1 95 | 96 | i = 0 97 | while i < len(list_falsestmt_nodes): 98 | if list_falsestmt_nodes[i] in _share_list: 99 | del list_falsestmt_nodes[i] 100 | else: 101 | i += 1 102 | 103 | _dict[if_node['name']] = ([t_node['name'] for t_node in list_truestmt_nodes], [f_node['name'] for f_node in list_falsestmt_nodes]) 104 | 105 | else: 106 | filepath = cfg.vs[0]['filepath'] 107 | fin = open(filepath, 'r') 108 | content = fin.readlines() 109 | fin.close() 110 | 111 | if_line = int(if_node['location'].split(':')[0])-1 112 | #print list_truestmt_nodes 113 | if list_truestmt_nodes == []: 114 | continue 115 | sorted_list_truestmt_nodes = sortedNodesByLoc(list_truestmt_nodes) 116 | 117 | true_stmt_start = sorted_list_truestmt_nodes[0] 118 | start_line = int(true_stmt_start['location'].split(':')[0]) 119 | str_if_stmts = '\n'.join(content[if_line:start_line]) 120 | 121 | if '{' in str_if_stmts: 122 | if sorted_list_truestmt_nodes[-1]['location'] == None: 123 | end_line = int(sorted_list_truestmt_nodes[-2]['location'].split(':')[0]) 124 | else: 125 | end_line = int(sorted_list_truestmt_nodes[-1]['location'].split(':')[0]) 126 | 127 | list_stmt = content[if_line:end_line] 128 | left_brace = 0 129 | i = 0 130 | index = 0 131 | tag = 0 132 | for stmt in list_stmt: 133 | for c in stmt: 134 | if c == '{': 135 | left_brace += 1 136 | 137 | elif c == '}': 138 | left_brace -= 1 139 | 140 | if left_brace == 0: 141 | tag = 1 142 | break 143 | 144 | if tag == 1: 145 | break 146 | else: 147 | index += 1 148 | 149 | real_end_line = int(if_node['location'].split(':')[0]) + index 150 | 151 | list_real_true_stmt = [] 152 | 153 | for node in sorted_list_truestmt_nodes: 154 | if node['location'] == None: 155 | continue 156 | 157 | if int(node['location'].split(':')[0]) >= if_line+1 and int(node['location'].split(':')[0]) <= real_end_line: 158 | list_real_true_stmt.append(node) 159 | 160 | else: 161 | list_real_true_stmt = [true_stmt_start] 162 | 163 | if list_falsestmt_nodes == []: 164 | continue 165 | sorted_list_falsestmt_nodes = sortedNodesByLoc(list_falsestmt_nodes) 166 | 167 | 168 | false_stmt_start = sorted_list_falsestmt_nodes[0] 169 | if sorted_list_truestmt_nodes[-1]['location'] != None: 170 | start_line = int(sorted_list_truestmt_nodes[-1]['location'].split(':')[0]) 171 | else: 172 | start_line = int(sorted_list_truestmt_nodes[-2]['location'].split(':')[0]) 173 | end_line = int(false_stmt_start['location'].split(':')[0]) 174 | 175 | 176 | str_else_stmts = '\n'.join(content[start_line:end_line]) 177 | 178 | if 'else' in str_else_stmts: 179 | else_line = 0 180 | for line in content[start_line:end_line]: 181 | if 'else' in line: 182 | break 183 | else: 184 | else_line += 1 185 | 186 | real_else_line = start_line + else_line + 1 187 | str_else_stmts = str_else_stmts.split('else')[1] 188 | 189 | if '{' in str_else_stmts: 190 | if sorted_list_falsestmt_nodes[-1]['location'] != None: 191 | end_line = int(sorted_list_falsestmt_nodes[-1]['location'].split(':')[0]) 192 | elif sorted_list_falsestmt_nodes[-2]['location'] != None: 193 | end_line = int(sorted_list_falsestmt_nodes[-2]['location'].split(':')[0]) 194 | else: 195 | end_line = int(sorted_list_falsestmt_nodes[-3]['location'].split(':')[0]) 196 | list_stmt = content[real_else_line-1:end_line] 197 | left_brace = 0 198 | i = 0 199 | index = 0 200 | tag = 0 201 | for stmt in list_stmt: 202 | for c in stmt: 203 | if c == '{': 204 | left_brace += 1 205 | 206 | elif c == '}': 207 | left_brace -= 1 208 | 209 | if left_brace == 0: 210 | tag = 1 211 | break 212 | 213 | if tag == 1: 214 | break 215 | else: 216 | index += 1 217 | 218 | real_end_line = int(if_node['location'].split(':')[0]) + index 219 | #print "real_end_line", real_end_line 220 | list_real_false_stmt = [] 221 | 222 | for node in sorted_list_falsestmt_nodes: 223 | if node['location'] == None: 224 | continue 225 | 226 | if int(node['location'].split(':')[0]) >= if_line+1 and int(node['location'].split(':')[0]) <= real_end_line: 227 | list_real_false_stmt.append(node) 228 | 229 | else: 230 | list_real_false_stmt = [false_stmt_start] 231 | print "false_stmt_start", false_stmt_start['name'] 232 | 233 | else: 234 | list_real_false_stmt = [] 235 | 236 | _dict[if_node['name']] = ([t_node['name'] for t_node in list_real_true_stmt], [f_node['name'] for f_node in list_real_false_stmt]) 237 | 238 | 239 | return _dict 240 | 241 | 242 | def completeDataEdgeOfCFG(cfg): 243 | list_ordered_list = sortedNodesByLoc(cfg.vs) 244 | 245 | for node in list_ordered_list: 246 | if node['type'] == 'Statement': 247 | list_pre = node.predecessors() 248 | list_su = node.successors() 249 | 250 | if list_pre == [] or list_pre == None: 251 | index = list_ordered_list.index(node) 252 | start_node = list_ordered_list[index-1]['name'] 253 | end_node = node['name'] 254 | var = None 255 | addDataEdge(cfg, start_node, end_node, var) 256 | 257 | if list_su == [] or list_su == None: 258 | index = list_ordered_list.index(node) 259 | start_node = node['name'] 260 | end_node = list_ordered_list[index+1]['name'] 261 | var = None 262 | addDataEdge(cfg, start_node, end_node, var) 263 | 264 | return cfg 265 | 266 | 267 | def main(): 268 | j = JoernSteps() 269 | j.connectToDatabase() 270 | all_func_node = getALLFuncNode(j) 271 | for node in all_func_node: 272 | testID = getFuncFile(j, node._id).split('/')[-2] 273 | path = os.path.join("cfg_db", testID) 274 | 275 | store_file_name = node.properties['name'] + '_' + str(node._id) 276 | store_path = os.path.join(path, store_file_name) 277 | 278 | initcfg = translateCFGByNode(j, node)#get init CFG 279 | opt_cfg_1 = modifyStmtNode(initcfg) 280 | cfg = completeDataEdgeOfCFG(opt_cfg_1) 281 | _dict = getCtrlRealtionOfCFG(cfg) 282 | 283 | _dict_node2ifstmt = {} 284 | for key in _dict.keys(): 285 | _list = _dict[key][0] + _dict[key][1] 286 | for v in _list: 287 | if v not in _dict_node2ifstmt.keys(): 288 | _dict_node2ifstmt[v] = [key] 289 | 290 | else: 291 | _dict_node2ifstmt[v].append(key) 292 | 293 | for key in _dict_node2ifstmt.keys(): 294 | _dict_node2ifstmt[key] = list(set(_dict_node2ifstmt[key])) 295 | 296 | if not os.path.exists(path): 297 | os.mkdir(path) 298 | 299 | if not os.path.exists(store_path): 300 | os.mkdir(store_path) 301 | else: 302 | continue 303 | 304 | filename = 'cfg' 305 | cfg_store_path = os.path.join(store_path, filename) 306 | fout = open(cfg_store_path, 'wb') 307 | pickle.dump(cfg, fout, True) 308 | fout.close() 309 | 310 | filename = 'dict_if2cfgnode' 311 | dict_store_path_1 = os.path.join(store_path, filename) 312 | fout = open(dict_store_path_1, 'wb') 313 | pickle.dump(_dict, fout, True) 314 | fout.close() 315 | 316 | filename = 'dict_cfgnode2if' 317 | dict_store_path_2 = os.path.join(store_path, filename) 318 | fout = open(dict_store_path_2, 'wb') 319 | pickle.dump(_dict_node2ifstmt, fout, True) 320 | fout.close() 321 | 322 | print node.properties['name'] 323 | print _dict 324 | print _dict_node2ifstmt 325 | print '' 326 | 327 | 328 | if __name__ == '__main__': 329 | main() 330 | -------------------------------------------------------------------------------- /make_label.py: -------------------------------------------------------------------------------- 1 | ## coding:utf-8 2 | import os 3 | import pickle 4 | 5 | 6 | f = open("dict_cwe2father.pkl", 'rb') 7 | dict_cwe2father = pickle.load(f) 8 | f.close() 9 | 10 | #print dict_cwe2father['CWE-787'] 11 | 12 | f = open("label_vec_type.pkl", 'rb') 13 | label_vec_type = pickle.load(f) 14 | f.close() 15 | 16 | f = open("dict_testcase2code.pkl",'rb') 17 | dict_testcase2code = pickle.load(f) 18 | f.close() 19 | 20 | 21 | def get_label_veclist(list_cwe): 22 | list_label = [0] * len(label_vec_type) 23 | for cweid in list_cwe: 24 | index = label_vec_type.index(cweid) 25 | list_label[index] = 1 26 | 27 | return list_label 28 | 29 | 30 | def get_label_cwe(cweid, label_cwe): 31 | if cweid in label_vec_type: 32 | label_cwe.append(cweid) 33 | return label_cwe 34 | 35 | else: 36 | if cweid == 'CWE-1000': 37 | label_cwe = label_vec_type 38 | else: 39 | fathercweid = dict_cwe2father[cweid] 40 | 41 | for _id in fathercweid: 42 | label_cwe = get_label_cwe(_id, label_cwe) 43 | 44 | return label_cwe 45 | 46 | 47 | def make_label(path, dict_vuln2testcase, _type): 48 | f = open(path, 'r') 49 | context = f.read().split('------------------------------')[:-1] 50 | f.close() 51 | 52 | context[0] = '\n' + context[0] 53 | 54 | list_all_label = [] 55 | list_all_vulline = [] 56 | for _slice in context: 57 | vulline = [] 58 | index_line = _slice.split('\n')[1] 59 | list_codes = _slice.split('\n')[2:-1] 60 | case_name = index_line.split(' ')[1] 61 | key_name = '/'.join(index_line.split(' ')[1].split('/')[-2:]) 62 | print index_line 63 | 64 | if key_name in dict_vuln2testcase.keys(): 65 | list_codeline = [code.split(' ')[-1] for code in list_codes] 66 | dict = dict_vuln2testcase[key_name] 67 | 68 | _dict_cwe2line_target = {} 69 | _dict_cwe2line = {} 70 | for _dict in dict: 71 | for key in _dict.keys(): 72 | if _dict[key] not in _dict_cwe2line_target.keys(): 73 | _dict_cwe2line_target[_dict[key]] = [key] 74 | else: 75 | _dict_cwe2line_target[_dict[key]].append(key) 76 | 77 | 78 | for line in list_codeline: 79 | line = line.strip() 80 | if line in _dict.keys(): 81 | if not ' '.join((list_codes[list_codeline.index(line)].strip()).split(' ')[:-1]) == dict_testcase2code[key_name+"/"+line].strip(): 82 | continue 83 | cweid = _dict[line] 84 | vulline.append(list_codeline.index(line)) 85 | 86 | if cweid not in _dict_cwe2line.keys(): 87 | _dict_cwe2line[cweid] = [line] 88 | else: 89 | _dict_cwe2line[cweid].append(line) 90 | 91 | if _type: 92 | list_vuln_cwe = [] 93 | for key in _dict_cwe2line.keys(): 94 | if key == 'Any...': 95 | continue 96 | if len(_dict_cwe2line[key]) == len(_dict_cwe2line_target[key]): 97 | label_cwe = [] 98 | label_cwe = get_label_cwe(key, label_cwe) 99 | list_vuln_cwe += label_cwe 100 | 101 | else: 102 | list_vuln_cwe = [] 103 | for key in _dict_cwe2line.keys(): 104 | if key == 'Any...': 105 | continue 106 | label_cwe = [] 107 | label_cwe = get_label_cwe(key, label_cwe) 108 | list_vuln_cwe += label_cwe 109 | 110 | if list_vuln_cwe == []: 111 | list_label = [0] * len(label_vec_type) 112 | else: 113 | list_vuln_cwe = list(set(list_vuln_cwe)) 114 | list_label = get_label_veclist(list_vuln_cwe) 115 | 116 | else: 117 | list_label = [0] * len(label_vec_type) 118 | 119 | list_all_label.append(list_label) 120 | list_all_vulline.append(vulline) 121 | 122 | return list_all_label, list_all_vulline 123 | 124 | 125 | def main(): 126 | f = open("dict_flawline2filepath.pkl", 'rb') 127 | dict_vuln2testcase = pickle.load(f) 128 | f.close() 129 | _type = False 130 | time = '4' 131 | # lang = 'C/test_data/' + time 132 | lang = '' 133 | 134 | path = os.path.join(lang, 'api_slices.txt') 135 | list_all_apilabel, list_all_vulline = make_label(path, dict_vuln2testcase, _type) 136 | dec_path = os.path.join(lang, 'api_slices_label.pkl') 137 | f = open(dec_path, 'wb') 138 | pickle.dump(list_all_apilabel, f, True) 139 | f.close() 140 | dec_path = os.path.join(lang, 'api_slices_vulline.pkl') 141 | f = open(dec_path, 'wb') 142 | pickle.dump(list_all_vulline, f) 143 | f.close() 144 | 145 | path = os.path.join(lang, 'arraysuse_slices.txt') 146 | list_all_arraylabel = make_label(path, dict_vuln2testcase, _type) 147 | dec_path = os.path.join(lang, 'array_slice_label.pkl') 148 | f = open(dec_path, 'wb') 149 | pickle.dump(list_all_arraylabel, f, True) 150 | f.close() 151 | 152 | path = os.path.join(lang, 'pointersuse_slices.txt') 153 | list_all_pointerlabel = make_label(path, dict_vuln2testcase, _type) 154 | dec_path = os.path.join(lang, 'pointer_slice_label.pkl') 155 | f = open(dec_path, 'wb') 156 | pickle.dump(list_all_pointerlabel, f, True) 157 | f.close() 158 | 159 | path = os.path.join(lang, 'integeroverflow_slices.txt') 160 | list_all_exprlabel = make_label(path, dict_vuln2testcase, _type) 161 | dec_path = os.path.join(lang, 'expr_slice_label.pkl') 162 | f = open(dec_path, 'wb') 163 | pickle.dump(list_all_exprlabel, f, True) 164 | f.close() 165 | 166 | 167 | if __name__ == '__main__': 168 | main() 169 | -------------------------------------------------------------------------------- /points_get.py: -------------------------------------------------------------------------------- 1 | ## coding:utf-8 2 | from access_db_operate import * 3 | 4 | 5 | def get_all_sensitiveAPI(db): 6 | fin = open("sensitive_func.pkl", 'rb') 7 | list_sensitive_funcname = pickle.load(fin) 8 | fin.close() 9 | 10 | _dict = {} 11 | for func_name in list_sensitive_funcname: 12 | list_callee_cfgnodeID = [] 13 | if func_name.find('main') != -1: 14 | list_main_func = [] 15 | list_mainfunc_node = getFunctionNodeByName(db, func_name) 16 | 17 | if list_mainfunc_node != []: 18 | file_path = getFuncFile(db, list_mainfunc_node[0]._id) 19 | testID = file_path.split('/')[-2] 20 | for mainfunc in list_mainfunc_node: 21 | list_parameters = get_parameter_by_funcid(db, mainfunc._id) 22 | 23 | if list_parameters != []: 24 | list_callee_cfgnodeID.append([testID, ([str(v) for v in list_parameters], str(mainfunc._id), func_name)]) 25 | 26 | else: 27 | continue 28 | 29 | else: 30 | list_callee_id = get_calls_id(db, func_name) 31 | if list_callee_id == []: 32 | continue 33 | 34 | 35 | for _id in list_callee_id: 36 | cfgnode = getCFGNodeByCallee(db, _id) 37 | if cfgnode != None: 38 | file_path = getFuncFile(db, int(cfgnode.properties['functionId'])) 39 | testID = file_path.split('/')[-2] 40 | list_callee_cfgnodeID.append([testID, ([str(cfgnode._id)], str(cfgnode.properties['functionId']), func_name)]) 41 | 42 | if list_callee_cfgnodeID != []: 43 | for _l in list_callee_cfgnodeID: 44 | if _l[0] in _dict.keys(): 45 | _dict[_l[0]].append(_l[1]) 46 | else: 47 | _dict[_l[0]] = [_l[1]] 48 | 49 | else: 50 | continue 51 | 52 | return _dict 53 | 54 | 55 | def get_all_pointer(db): 56 | _dict = {} 57 | list_pointers_node = get_pointers_node(db) 58 | for cfgnode in list_pointers_node: 59 | file_path = getFuncFile(db, int(cfgnode.properties['functionId'])) 60 | testID = file_path.split('/')[-2] 61 | pointer_defnode = get_def_node(db, cfgnode._id) 62 | pointer_name = [] 63 | for node in pointer_defnode: 64 | name = node.properties['code'].replace('*', '').strip() 65 | if name not in pointer_name: 66 | pointer_name.append(name) 67 | 68 | if testID in _dict.keys(): 69 | _dict[testID].append(([str(cfgnode._id)], str(cfgnode.properties['functionId']), pointer_name)) 70 | else: 71 | _dict[testID] = [([str(cfgnode._id)], str(cfgnode.properties['functionId']), pointer_name)] 72 | 73 | return _dict 74 | 75 | 76 | def get_all_array(db): 77 | _dict = {} 78 | list_arrays_node = get_arrays_node(db) 79 | for cfgnode in list_arrays_node: 80 | file_path = getFuncFile(db, int(cfgnode.properties['functionId'])) 81 | testID = file_path.split('/')[-2] 82 | array_defnode = get_def_node(db, cfgnode._id) 83 | array_name = [] 84 | for node in array_defnode: 85 | name = node.properties['code'].replace('[', '').replace(']', '').replace('*', '').strip() 86 | if name not in array_name: 87 | array_name.append(name) 88 | 89 | if testID in _dict.keys(): 90 | _dict[testID].append(([str(cfgnode._id)], str(cfgnode.properties['functionId']), array_name)) 91 | else: 92 | _dict[testID] = [([str(cfgnode._id)], str(cfgnode.properties['functionId']), array_name)] 93 | 94 | return _dict 95 | 96 | 97 | def get_all_pointer_use(db): 98 | _dict = {} 99 | list_pointers_node = get_pointers_node(db) 100 | for cfgnode in list_pointers_node: 101 | file_path = getFuncFile(db, int(cfgnode.properties['functionId'])) 102 | testID = file_path.split('/')[-2] 103 | pointer_defnode = get_def_node(db, cfgnode._id) 104 | 105 | _temp_list = [] 106 | for node in pointer_defnode: 107 | name = node.properties['code'].strip() 108 | list_usenodes = get_all_use_bydefnode(db, node._id) 109 | list_defnodes = get_all_def_bydefnode(db, node._id) 110 | 111 | i = 0 112 | while i < len(list_defnodes): 113 | if list_defnodes[i]._id == cfgnode._id: 114 | del list_defnodes[i] 115 | else: 116 | i += 1 117 | 118 | list_usenodes += list_defnodes 119 | 120 | print len(list_usenodes) 121 | for i in list_usenodes: 122 | if str(i).find(",location")==-1: 123 | list_usenodes.remove(i) 124 | loc_list=[] 125 | final_list=[] 126 | for i in list_usenodes: 127 | #print(i) 128 | if ',location' in str(i): 129 | print(str(i)) 130 | location=str(i).split(",type:")[0].split(",location:")[1][1:-1].split(":") 131 | count=int(location[0]) 132 | loc_list.append(count) 133 | print loc_list 134 | if len(loc_list)!=0: 135 | a=loc_list.index(max(loc_list)) 136 | final_list.append(list_usenodes[a]) 137 | for use_node in final_list: 138 | if use_node._id in _temp_list: 139 | continue 140 | else: 141 | _temp_list.append(use_node._id) 142 | 143 | if testID in _dict.keys(): 144 | _dict[testID].append(([str(use_node._id)], str(use_node.properties['functionId']), name)) 145 | else: 146 | _dict[testID] = [([str(use_node._id)], str(use_node.properties['functionId']), name)] 147 | 148 | return _dict 149 | 150 | 151 | def get_all_array_use(db): 152 | _dict = {} 153 | list_arrays_node = get_arrays_node(db) 154 | for cfgnode in list_arrays_node: 155 | file_path = getFuncFile(db, int(cfgnode.properties['functionId'])) 156 | testID = file_path.split('/')[-2] 157 | array_defnode = get_def_node(db, cfgnode._id) 158 | _temp_list = [] 159 | for node in array_defnode: 160 | name = node.properties['code'].strip() 161 | list_usenodes = get_all_use_bydefnode(db, node._id) 162 | list_defnodes = get_all_def_bydefnode(db, node._id) 163 | 164 | i = 0 165 | while i < len(list_defnodes): 166 | if list_defnodes[i]._id == cfgnode._id: 167 | del list_defnodes[i] 168 | else: 169 | i += 1 170 | 171 | list_usenodes += list_defnodes 172 | 173 | for use_node in list_usenodes: 174 | if use_node._id in _temp_list: 175 | continue 176 | else: 177 | _temp_list.append(use_node._id) 178 | 179 | if testID in _dict.keys(): 180 | _dict[testID].append(([str(use_node._id)], str(use_node.properties['functionId']), name)) 181 | else: 182 | _dict[testID] = [([str(use_node._id)], str(use_node.properties['functionId']), name)] 183 | 184 | return _dict 185 | 186 | 187 | def get_all_integeroverflow_point(db): 188 | _dict = {} 189 | list_exprstmt_node = get_exprstmt_node(db) 190 | for cfgnode in list_exprstmt_node: 191 | if cfgnode.properties['code'].find(' = ') > -1: 192 | code = cfgnode.properties['code'].split(' = ')[-1] 193 | pattern = re.compile("((?:_|[A-Za-z])\w*(?:\s(?:\+|\-|\*|\/)\s(?:_|[A-Za-z])\w*)+)") 194 | result = re.search(pattern, code) 195 | 196 | if result == None: 197 | continue 198 | else: 199 | file_path = getFuncFile(db, int(cfgnode.properties['functionId'])) 200 | testID = file_path.split('/')[-2] 201 | name = cfgnode.properties['code'].strip() 202 | 203 | if testID in _dict.keys(): 204 | _dict[testID].append(([str(cfgnode._id)], str(cfgnode.properties['functionId']), name)) 205 | else: 206 | _dict[testID] = [([str(cfgnode._id)], str(cfgnode.properties['functionId']), name)] 207 | 208 | else: 209 | code = cfgnode.properties['code'] 210 | pattern = re.compile("(?:\s\/\s(?:_|[A-Za-z])\w*\s)") 211 | result = re.search(pattern, code) 212 | if result == None: 213 | continue 214 | 215 | else: 216 | file_path = getFuncFile(db, int(cfgnode.properties['functionId'])) 217 | testID = file_path.split('/')[-2] 218 | name = cfgnode.properties['code'].strip() 219 | 220 | if testID in _dict.keys(): 221 | _dict[testID].append(([str(cfgnode._id)], str(cfgnode.properties['functionId']), name)) 222 | else: 223 | _dict[testID] = [([str(cfgnode._id)], str(cfgnode.properties['functionId']), name)] 224 | 225 | return _dict 226 | 227 | 228 | if __name__ == '__main__': 229 | j = JoernSteps() 230 | j.connectToDatabase() 231 | 232 | # _dict = get_all_sensitiveAPI(j) 233 | # f = open("sensifunc_slice_points.pkl", 'wb') 234 | # pickle.dump(_dict, f, True) 235 | # f.close() 236 | # print _dict 237 | 238 | # _dict = get_all_pointer_use(j) 239 | # f = open("pointuse_slice_points.pkl", 'wb') 240 | # pickle.dump(_dict, f, True) 241 | # f.close() 242 | # print _dict 243 | 244 | # _dict = get_all_array_use(j) 245 | # f = open("arrayuse_slice_points.pkl", 'wb') 246 | # pickle.dump(_dict, f, True) 247 | # f.close() 248 | # print _dict 249 | 250 | _dict = get_all_integeroverflow_point(j) 251 | f = open("integeroverflow_slice_points_new.pkl", 'wb') 252 | pickle.dump(_dict, f, True) 253 | f.close() 254 | 255 | 256 | -------------------------------------------------------------------------------- /slice_op.py: -------------------------------------------------------------------------------- 1 | from general_op import * 2 | 3 | 4 | def sub_slice_backwards(startnode, list_node, not_scan_list): 5 | if startnode['name'] in not_scan_list: 6 | return list_node, not_scan_list 7 | 8 | else: 9 | list_node.append(startnode) 10 | not_scan_list.append(startnode['name']) 11 | 12 | predecessors = startnode.predecessors() 13 | 14 | if predecessors != []: 15 | for p_node in predecessors: 16 | list_node, not_scan_list = sub_slice_backwards(p_node, list_node, not_scan_list) 17 | 18 | return list_node, not_scan_list 19 | 20 | 21 | def program_slice_backwards(pdg, list_startNode):#startNode is a list 22 | list_all_node = [] 23 | not_scan_list = [] 24 | for startNode in list_startNode: 25 | list_node = [startNode] 26 | not_scan_list.append(startNode['name']) 27 | predecessors = startNode.predecessors() 28 | if predecessors != []: 29 | for p_node in predecessors: 30 | list_node, not_scan_list = sub_slice_backwards(p_node, list_node, not_scan_list) 31 | 32 | list_all_node += list_node 33 | 34 | #Add function define line 35 | if startNode['functionId'] in not_scan_list: 36 | continue 37 | for node in pdg.vs: 38 | if node['name'] == startNode['functionId']: 39 | list_all_node.append(node) 40 | not_scan_list.append(node['name']) 41 | break 42 | 43 | print("list_all_node:", list_all_node) 44 | list_ordered_node = sortedNodesByLoc(list_all_node) 45 | 46 | _list_re = [] 47 | a = 0 48 | while a < len(list_ordered_node): 49 | if list_ordered_node[a]['name'] not in _list_re: 50 | _list_re.append(list_ordered_node[a]['name']) 51 | a += 1 52 | else: 53 | del list_ordered_node[a] 54 | return list_ordered_node 55 | 56 | 57 | def sub_slice_forward(startnode, list_node, not_scan_list): 58 | if startnode['name'] in not_scan_list: 59 | return list_node, not_scan_list 60 | 61 | else: 62 | list_node.append(startnode) 63 | not_scan_list.append(startnode['name']) 64 | 65 | successors = startnode.successors() 66 | if successors != []: 67 | for p_node in successors: 68 | list_node, not_scan_list = sub_slice_forward(p_node, list_node, not_scan_list) 69 | 70 | return list_node, not_scan_list 71 | 72 | 73 | def program_slice_forward(pdg, list_startNode):#startNode is a list of parameters, only consider data dependency 74 | pdg = del_ctrl_edge(pdg) 75 | 76 | list_all_node = [] 77 | not_scan_list = [] 78 | for startNode in list_startNode: 79 | list_node = [startNode] 80 | not_scan_list.append(startNode['name']) 81 | successors = startNode.successors() 82 | 83 | if successors != []: 84 | for p_node in successors: 85 | list_node, not_scan_list = sub_slice_forward(p_node, list_node, not_scan_list) 86 | 87 | list_all_node += list_node 88 | 89 | list_ordered_node = sortedNodesByLoc(list_all_node) 90 | 91 | a = 0 92 | _list_re = [] 93 | while a < len(list_ordered_node): 94 | if list_ordered_node[a]['name'] not in _list_re: 95 | _list_re.append(list_ordered_node[a]['name']) 96 | a += 1 97 | else: 98 | del list_ordered_node[a] 99 | 100 | return list_ordered_node 101 | 102 | 103 | def process_cross_func(to_scan_list, testID, slicetype, list_result_node, not_scan_func_list): 104 | if to_scan_list == []: 105 | return list_result_node, not_scan_func_list 106 | 107 | for node in to_scan_list: 108 | if node['name'] in not_scan_func_list: 109 | continue 110 | 111 | ret = isNewOrDelOp(node, testID) 112 | if ret: 113 | funcname = ret 114 | pdg = getFuncPDGByNameAndtestID(funcname, testID) 115 | 116 | 117 | if pdg == False: 118 | not_scan_func_list.append(node['name']) 119 | continue 120 | 121 | else: 122 | result_list = sortedNodesByLoc(pdg.vs) 123 | 124 | not_scan_func_list.append(node['name']) 125 | 126 | index = 0 127 | for result_node in list_result_node: 128 | if result_node['name'] == node['name']: 129 | break 130 | else: 131 | index += 1 132 | 133 | list_result_node = list_result_node[:index+1] + result_list + list_result_node[index+1:] 134 | 135 | list_result_node, not_scan_func_list = process_cross_func(result_list, testID, slicetype, list_result_node, not_scan_func_list) 136 | 137 | 138 | else: 139 | ret = isFuncCall(node)#if funccall ,if so ,return funcnamelist 140 | if ret: 141 | 142 | for funcname in ret: 143 | if funcname.find('->') != -1: 144 | real_funcname = funcname.split('->')[-1].strip() 145 | objectname = funcname.split('->')[0].strip() 146 | 147 | funcID = node['functionId'] 148 | src_pdg = getFuncPDGByfuncIDAndtestID(funcID, testID) 149 | if src_pdg == False: 150 | continue 151 | 152 | classname = False 153 | for src_pnode in src_pdg.vs: 154 | if src_pnode['code'].find(objectname) != -1 and src_pnode['code'].find(' new ') != -1: 155 | tempvalue = src_pnode['code'].split(' new ')[1].replace('*', '').strip() 156 | if tempvalue.split(' ')[0] != 'const': 157 | classname = tempvalue.split(' ')[0] 158 | else: 159 | classname = tempvalue.split(' ')[1] 160 | 161 | break 162 | 163 | if classname == False: 164 | continue 165 | 166 | funcname = classname + ' :: ' + real_funcname 167 | pdg = getFuncPDGByNameAndtestID_noctrl(funcname, testID) 168 | 169 | 170 | elif funcname.find('.') != -1: 171 | real_funcname = funcname.split('.')[-1].strip() 172 | objectname = funcname.split('.')[0].strip() 173 | 174 | funcID = node['functionId'] 175 | src_pdg = getFuncPDGByNameAndtestID_noctrl(funcID, testID) 176 | if src_pdg == False: 177 | continue 178 | classname = False 179 | for src_pnode in src_pdg.vs: 180 | if src_pnode['code'].find(objectname) != -1 and src_pnode['code'].find(' new ') != -1: 181 | tempvalue = src_pnode['code'].split(' new ')[1].replace('*', '').strip() 182 | if tempvalue.split(' ')[0] != 'const': 183 | classname = tempvalue.split(' ')[0] 184 | else: 185 | classname = tempvalue.split(' ')[1] 186 | 187 | break 188 | 189 | if classname == False: 190 | continue 191 | 192 | funcname = classname + ' :: ' + real_funcname 193 | pdg = getFuncPDGByNameAndtestID(funcname, testID) 194 | 195 | elif funcname.find('.') != -1: 196 | real_funcname = funcname.split('.')[-1].strip() 197 | objectname = funcname.split('.')[0].strip() 198 | 199 | funcID = node['functionId'] 200 | src_pdg = getFuncPDGByfuncIDAndtestID(funcID, testID) 201 | classname = False 202 | for src_pnode in src_pdg.vs: 203 | if src_pnode['code'].find(objectname) != -1 and src_pnode['code'].find(' new ') != -1: 204 | tempvalue = src_pnode['code'].split(' new ')[1].replace('*', '').strip() 205 | if tempvalue.split(' ')[0] != 'const': 206 | classname = tempvalue.split(' ')[0] 207 | else: 208 | classname = tempvalue.split(' ')[1] 209 | 210 | break 211 | 212 | if classname == False: 213 | continue 214 | 215 | funcname = classname + ' :: ' + real_funcname 216 | pdg = getFuncPDGByNameAndtestID(funcname, testID) 217 | 218 | else: 219 | pdg = getFuncPDGByNameAndtestID(funcname, testID) 220 | 221 | if pdg == False: 222 | not_scan_func_list.append(node['name']) 223 | continue 224 | 225 | else: 226 | if slicetype == 0: 227 | ret_node = [] 228 | for vertex in pdg.vs: 229 | if vertex['type'] == 'ReturnStatement': 230 | ret_node.append(vertex) 231 | 232 | result_list = program_slice_backwards(pdg, ret_node) 233 | not_scan_func_list.append(node['name']) 234 | 235 | index = 0 236 | for result_node in list_result_node: 237 | if result_node['name'] == node['name']: 238 | break 239 | else: 240 | index += 1 241 | 242 | list_result_node = list_result_node[:index+1] + result_list + list_result_node[index+1:] 243 | 244 | list_result_node, not_scan_func_list = process_cross_func(result_list, testID, slicetype, list_result_node, not_scan_func_list) 245 | 246 | elif slicetype == 1: 247 | param_node = [] 248 | FuncEntryNode = False 249 | for vertex in pdg.vs: 250 | if vertex['type'] == 'Parameter': 251 | param_node.append(vertex) 252 | elif vertex['type'] == 'Function': 253 | FuncEntryNode = vertex 254 | 255 | if param_node != []: 256 | result_list = program_slice_forward(pdg, param_node) 257 | else: 258 | result_list = sortedNodesByLoc(pdg.vs) 259 | 260 | not_scan_func_list.append(node['name']) 261 | index = 0 262 | 263 | for result_node in list_result_node: 264 | if result_node['name'] == node['name']: 265 | break 266 | else: 267 | index += 1 268 | 269 | if FuncEntryNode != False: 270 | result_list.insert(0, FuncEntryNode) 271 | 272 | list_result_node = list_result_node[:index+1] + result_list + list_result_node[index+1:] 273 | 274 | list_result_node, not_scan_func_list = process_cross_func(result_list, testID, slicetype, list_result_node, not_scan_func_list) 275 | 276 | 277 | return list_result_node, not_scan_func_list 278 | 279 | 280 | def process_crossfuncs_back_byfirstnode(list_tuple_results_back, testID, i, not_scan_func_list): 281 | #is not a good way in time, list_tuple_results_back=[(results_back, itertimes)] 282 | while i < len(list_tuple_results_back): 283 | iter_time = list_tuple_results_back[i][1] 284 | if iter_time == 3 or iter_time == -1:#allow cross 3 funcs: 285 | i += 1 286 | continue 287 | 288 | else: 289 | list_node = list_tuple_results_back[i][0] 290 | 291 | if len(list_node) == 1: 292 | i += 1 293 | continue 294 | 295 | if list_node[1]['type'] == 'Parameter': 296 | func_name = list_node[0]['name'] 297 | path = os.path.join('dict_call2cfgNodeID_funcID', testID, 'dict.pkl') 298 | 299 | if not os.path.exists(path): 300 | i += 1 301 | continue 302 | 303 | fin = open(path, 'rb') 304 | _dict = pickle.load(fin) 305 | fin.close() 306 | 307 | if func_name not in _dict.keys(): 308 | list_tuple_results_back[i][1] = -1 309 | i += 1 310 | continue 311 | 312 | else: 313 | list_cfgNodeID = _dict[func_name] 314 | dict_func_pdg = getFuncPDGBynodeIDAndtestID(list_cfgNodeID, testID) 315 | iter_time += 1 316 | _new_list = [] 317 | for item in dict_func_pdg.items(): 318 | targetPDG = item[1] 319 | startnode = [] 320 | for n in targetPDG.vs: 321 | if n['name'] == item[0]:#is id 322 | startnode = [n] 323 | break 324 | 325 | if startnode == []: 326 | continue 327 | ret_list = program_slice_backwards(targetPDG, startnode) 328 | not_scan_func_list.append(startnode[0]['name']) 329 | 330 | ret_list = ret_list + list_node 331 | _new_list.append([ret_list, iter_time]) 332 | 333 | if _new_list != []: 334 | del list_tuple_results_back[i] 335 | list_tuple_results_back = list_tuple_results_back + _new_list 336 | list_tuple_results_back, not_scan_func_list = process_crossfuncs_back_byfirstnode(list_tuple_results_back, testID, i, not_scan_func_list) 337 | else: 338 | list_tuple_results_back[i][1] = -1 339 | i += 1 340 | continue 341 | 342 | 343 | else: 344 | funcname = list_node[0]['code'] 345 | if funcname.find("::") > -1: 346 | 347 | 348 | path = os.path.join('dict_call2cfgNodeID_funcID', testID, 'dict.pkl')#get funname and it call place 349 | 350 | #Modified by kingnop 351 | if not os.path.exists(path): 352 | i += 1 353 | continue 354 | #################### 355 | 356 | fin = open(path, 'rb') 357 | _dict = pickle.load(fin) 358 | fin.close() 359 | 360 | 361 | 362 | func_name = list_node[0]['name'] 363 | if func_name not in _dict.keys(): 364 | list_tuple_results_back[i][1] = -1 365 | i += 1 366 | continue 367 | 368 | else: 369 | list_cfgNodeID = _dict[func_name] 370 | dict_func_pdg = getFuncPDGBynodeIDAndtestID(list_cfgNodeID, testID) 371 | 372 | iter_time += 1 373 | _new_list = [] 374 | for item in dict_func_pdg.items(): 375 | targetPDG = item[1] 376 | startnode = [] 377 | for n in targetPDG.vs: 378 | if n['name'] == item[0]:#is id 379 | startnode = [n] 380 | break 381 | if startnode == []: 382 | continue 383 | ret_list = program_slice_backwards(targetPDG, startnode) 384 | not_scan_func_list.append(startnode[0]['name']) 385 | 386 | 387 | ret_list = ret_list + list_node 388 | _new_list.append([ret_list, iter_time]) 389 | 390 | if _new_list != []: 391 | del list_tuple_results_back[i] 392 | list_tuple_results_back = list_tuple_results_back + _new_list 393 | list_tuple_results_back, not_scan_func_list = process_crossfuncs_back_byfirstnode(list_tuple_results_back, testID, i, not_scan_func_list) 394 | 395 | else: 396 | list_tuple_results_back[i][1] = -1 397 | i += 1 398 | continue 399 | 400 | else: 401 | i += 1 402 | continue 403 | 404 | return list_tuple_results_back, not_scan_func_list 405 | --------------------------------------------------------------------------------