├── .gitattributes
├── README.md
├── SySeVR_dependences.zip
├── access_db_operate.py
├── all.py
├── complete_PDG.py
├── data_preprocess.py
├── extract_df.py
├── general_op.py
├── get_cfg_relation.py
├── make_label.py
├── points_get.py
└── slice_op.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | SySeVR_dependences.zip filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Environments
  2 | 
  3 | 
  4 | ### System
  5 | 
  6 | Ubuntu 18.04 (Tested)
  7 | 
  8 | ### Step 1
  9 | 
 10 | - Joern 0.3.1
 11 |   - JDK 1.7
 12 |   - Neo4J 2.1.8 Community Edition
 13 |   - Gremlin for Neo4J 2.X
 14 |   - Apache Ant build tool
 15 | - Python 2.7
 16 | 
 17 | ### Step 2
 18 | 
 19 | - Python 3.6
 20 | - Tensorflow 1.6
 21 | - Gensim 3.4
 22 | 
 23 | ## Installation
 24 | 
 25 | ### Joern 0.3.1
 26 | 
 27 | - #### **JDK 1.7**
 28 | 
 29 |   1. extract the tarball
 30 | 
 31 |      ```bash
 32 |      tar -xvf jdk-7u80-linux-x64.tar.gz -C /usr/loacl/java
 33 |      ```
 34 | 
 35 |   2. set environment variable
 36 | 
 37 |      > **/etc/profile**
 38 | 
 39 |      ```bash
 40 |      # Add These Content at the End of the File
 41 |      #######################################
 42 |      JAVA_HOME=/usr/local/java/jdk1.7.0_80
 43 |      JRE_HOME=/usr/local/java/jdk1.7.0_80
 44 |      PATH=$PATH:$JRE_HOME/bin:$JAVA_HOME/bin
 45 |      
 46 |      export JAVA_HOME
 47 |      export JRE_HOME
 48 |      export PATH
 49 |      #######################################
 50 |      
 51 |      update-alternatives --install "/usr/bin/java" "java" "/usr/local/java/jdk1.7.0_80/bin/java" 1
 52 |      
 53 |      update-alternatives --install "/usr/bin/javac" "javac" "/usr/local/java/jdk1.7.0_80/bin/javac" 1
 54 |      
 55 |      update-alternatives --install "/usr/bin/javaws" "javaws" "/usr/local/java/jdk1.7.0_80/bin/javaws" 1
 56 |      
 57 |      update-alternatives --set java /usr/local/java/jdk1.7.0_80/bin/java
 58 |      
 59 |      update-alternatives --set javac /usr/local/java/jdk1.7.0_80/bin/javac
 60 |      
 61 |      update-alternatives --set javaws /usr/local/java/jdk1.7.0_80/bin/javaws
 62 |      
 63 |      source /etc/profile
 64 |      ```
 65 | 
 66 |   3. verify
 67 | 
 68 |      ```bash
 69 |      java -version
 70 |      ```
 71 |      **You should receive a message which displays**
 72 | 
 73 |      ```bash
 74 |      java version "1.7.0_80"
 75 |      Java(TM) SE Runtime Environment (build 1.7.0_80-b15)
 76 |      Java HotSpot(TM) 64-Bit Server VM (build 24.80-b11, mixed mode)
 77 |      ```
 78 | 
 79 | - #### **Neo4j 2.1.8 Community Edition**
 80 | 
 81 |   1. extract the tarball
 82 | 
 83 |      ```bash
 84 |      unzip neo4j-community-2.1.8.zip
 85 |      mkdir -d /usr/local/neo4j
 86 |   mv /usr/local/neo4j ./Neo4j/neo4j-community-2.1.8/*
 87 |      ```
 88 | 
 89 |   2. modify configure files
 90 | 
 91 |      > **configure files are located in /usr/local/neo4j/conf**
 92 | 
 93 |      **neo4j-server.properties**
 94 | 
 95 |      ```bash
 96 |      # location of the database directory
 97 |      org.neo4j.server.database.location=/home/joern-0.3.1/.joernIndex/
 98 |      # Let the webserver only listen on the specified IP. Default is localhost (only
 99 |      # accept local connections). Uncomment to allow any connection. Please see the
100 |      # security section in the neo4j manual before modifying this.
101 |      org.neo4j.server.webserver.address=0.0.0.0
102 |      ```
103 | 
104 |      **neo4j-wrapper.conf**
105 | 
106 |      ```bash
107 |      # Java Heap Size: by default the Java heap size is dynamically
108 |      # calculated based on available system resources.
109 |      # Uncomment these lines to set specific initial and maximum
110 |      # heap size in MB.
111 |      wrapper.java.initmemory=512
112 |      wrapper.java.maxmemory=10240 #as large as you can
113 |      ```
114 |      
115 |   3. set environment variable
116 |      
117 |      > **/etc/profile**
118 |      
119 |      ```bash
120 |      # Add These Content at the End of the File
121 |      #######################################
122 |      NEO4J_HOME=/usr/local/neo4j
123 |      PATH=$PATH:$NEO4J_HOME/bin
124 |      
125 |      export NEO4J_HOME
126 |      export PATH
127 |      #######################################
128 |      source /etc/profile
129 |      ```
130 |      
131 |   4. start && verify
132 |   
133 |      ```bash
134 |      /usr/local/neo4j/bin/neo4j console
135 |      ```
136 | 
137 | - #### **Gremlin for Neo4J 2.X**
138 | 
139 |   > https://github.com/neo4j-contrib/gremlin-plugin
140 | 
141 |   ```bash
142 |   unzip neo4j-gremlin-plugin-tp2-2.1.5-server-plugin.zip -d $NEO4J_HOME/plugins/gremlin-plugin
143 |   ```
144 | 
145 | - #### **Apache Ant build tool**
146 | 
147 |   1. extract the tarball
148 | 
149 |      ```bash
150 |      mkdir /usr/local/ant
151 |      unzip -d /usr/local/ant apache-ant-1.8.4-bin.zip
152 |      ```
153 | 
154 |   2. set environment variable
155 | 
156 |      > **/etc/profile**
157 | 
158 |      ```bash
159 |      # Add These Content at the End of the File
160 |      #######################################
161 |      ANT_HOME=/usr/local/ant
162 |      PATH=$PATH:$ANT_HOME/bin
163 |      
164 |      export ANT_HOME
165 |      export PATH
166 |      #######################################
167 |      
168 |      source /etc/profile
169 |      ```
170 | 
171 |   3. verify
172 | 
173 |      ```bash
174 |      ant -version
175 |      ```
176 | 
177 | - #### **Joern 0.3.1**
178 | 
179 |   > https://joern.readthedocs.io/en/latest/installation.html#system-requirements-and-dependencies
180 | 
181 |   1. extract the tarball
182 |      ```bash
183 |   tar -xvf 0.3.1.tar.gz
184 |      ```
185 |      
186 | 2. extract the tarball of build dependencies
187 |   
188 |    ```bash
189 |      cd joern-0.3.1
190 |      tar -xvf lib.tar.gz
191 |      ```
192 |   
193 | 3. build the project
194 |   
195 |    ```bash
196 |      cd joern-0.3.1
197 |      ant
198 |      ```
199 |   
200 |    **The executable JAR file is located in joern-0.3.1/bin/joern.jar**
201 |   
202 | 4. set environment variable (optional)
203 |   
204 |    > **/etc/profile**
205 |   
206 |    ```bash
207 |      # Add These Content at the End of the File
208 |      #######################################
209 |      JOERN_HOME=/home/joern-0.3.1/
210 |      
211 |      export JOERN_HOME
212 |      #######################################
213 |      ```
214 |   
215 |    > **~/.bashrc**
216 |   
217 |    ```bash
218 |      # Add These Content at the End of the File
219 |      #######################################
220 |      alias joern='java -jar $JOERN/bin/joern.jar'
221 |      #######################################
222 |      ```
223 |   
224 |    ```bash
225 |      source /etc/profile
226 |      source ~/.bashrc
227 |      ```
228 |   
229 | 5. build additional tools (optional)
230 |   
231 |    ```
232 |      cd joern-0.3.1
233 |      ant tools
234 |      ```
235 |   
236 | 6. install python-joern
237 |   
238 |    ```bash
239 |      apt install python-setuptools python-dev python-pip
240 |      ```
241 |   
242 |    ```bash
243 |      pip2 install py2neo==2.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
244 |      ```
245 |   
246 |    ```bash
247 |      pip2 install py2neo-gremlin -i https://pypi.tuna.tsinghua.edu.cn/simple
248 |      ```
249 |   
250 |    ```bash
251 |      tar -xvf python-joern-0.3.1.tar.gz
252 |      cd python-joern-0.3.1
253 |      python2 setup.py install
254 |      ```
255 |   
256 | 7. install joern-tools
257 |   
258 |    ```bash
259 |      pip2 install chardet -i https://pypi.tuna.tsinghua.edu.cn/simple
260 |      pip2 install pygraphviz -i https://pypi.tuna.tsinghua.edu.cn/simple
261 |      ```
262 |   
263 |    ```bash
264 |      git clone https://github.com/fabsx00/joern-tools
265 |      cd joern-tools
266 |      python2 setup.py install
267 |      ```
268 |   
269 | 8. verify
270 |   
271 |    ```bash
272 |      joern-lookup
273 |      ```
274 | 
275 | ## Using
276 | 
277 | ### Step1 Generating Slices
278 | 
279 | > Work Dir: /home/SySeVR-master/Implementation/source2slice
280 | >
281 | > Code Dir: /home/code
282 | >
283 | > Recommended Memory Size: >=16GB (according to your code size)
284 | >
285 | > If you want to slice the NVD and SARD, you may divide them into parts.
286 | 
287 | 1. install dependences
288 | 
289 |    ```bash
290 |    apt install python-igraph
291 |    ```
292 | 
293 | 2. parse the source code
294 | 
295 |    >input: source codes
296 |    >
297 |    >output: .joernIndex
298 | 
299 |    ```bash
300 |    rm -rf ./.joernIndex
301 |    ```
302 | 
303 |    ```bash
304 |    java -Xmx16g -jar $JOERN_HOME/bin/joern.jar /home/code
305 |    ```
306 | 
307 |    This will create a neo4j database directory .joernIndex in this directory.
308 | 
309 | 3. generate CFG
310 | 
311 |    >input: .joernIndex
312 |    >
313 |    >output: cfg_db
314 | 
315 |    ```bash
316 |    # start neo4j at other terminal
317 |    /usr/local/neo4j/bin/neo4j console
318 |    ```
319 | 
320 |    ```bash
321 |    mkdir cfg_db
322 |    python2 get_cfg_relation.py
323 |    ```
324 | 
325 | 4. generate PDG
326 | 
327 |    >input: cfg_db .joernIndex
328 |    >
329 |    >output: pdg_db
330 | 
331 |    ```python
332 |    # modify access_db_operate.py
333 |    http.socket_timeout = 999999999 # a big number
334 |    ```
335 | 
336 |    ```bash
337 |    mkdir pdg_db
338 |    python2 complete_PDG.py
339 |    ```
340 | 
341 | 5. generate call graph of functions
342 | 
343 |    >input: pdg_db .joernIndex
344 |    >
345 |    >output: dict_call2cfgNodeID_funcID
346 | 
347 |    ```bash
348 |    mkdir dict_call2cfgNodeID_funcID
349 |    python2 access_db_operate.py
350 |    ```
351 | 
352 | 6. generate four kinds of SyCVs
353 | 
354 |    >input: dict_call2cfgNodeID_funcID
355 |    >
356 |    >outout: arrayuse_slice_points.pkl, integeroverflow_slice_points_new.pkl, pointuse_slice_points.pkl, sensifun_slice_points.pkl
357 | 
358 |    ```python
359 |    # modify points_get.py near the 128 rows
360 |    # change "location" to ",location"
361 |    for i in list_usenodes:
362 |                    if str(i).find(",location")==-1:
363 |                        list_usenodes.remove(i)
364 |                loc_list=[]
365 |                final_list=[]
366 |                for i in list_usenodes:
367 |                    #print(i)
368 |                    if ',location' in str(i):
369 |                        print(str(i))
370 |                        location=str(i).split(",type:")[0].split(",location:")[1][1:-1].split(":")
371 |                        count=int(location[0])
372 |                        loc_list.append(count)
373 |    
374 |    ```
375 | 
376 |    ```bash
377 |    python2 points_get.py
378 |    ```
379 | 
380 | 7. extract slices
381 | 
382 |    >input: dict_call2cfgNodeID_funcID, arrayuse_slice_points.pkl, integeroverflow_slice_points_new.pkl, pointuse_slice_points.pkl, sensifun_slice_points.pkl
383 |    >
384 |    >output: api_slices.txt, arrayuse_slice.txt, integeroverflow_slices.txt, pointeruse_slice.txt
385 | 
386 |    ```python
387 |    # modify save-file-path in extract_df.py
388 |    store_filepath = "integeroverflow_slices.txt"
389 |    store_filepath = "arraysuse_slices.txt"
390 |    store_filepath = "pointersuse_slices.txt"
391 |    store_filepath = "api_slices.txt"
392 |    ```
393 | 
394 |    ```python
395 |    # add slice_op.py at 348 rows
396 |    if not os.path.exists(path):
397 |    	i += 1
398 |    	continue
399 |    ```
400 | 
401 |    **Due to the limit of memory, you may execute four functions in extract_df.py one by one.**
402 | 
403 |    ```bash
404 |    python2 extract_df.py
405 |    ```
406 | 
407 | 8. get labels of slices
408 | 
409 |    >input: api_slices.txt, arrayuse_slice.txt, integeroverflow_slices.txt, pointeruse_slice.txt
410 |    >
411 |    >output: apt_slices_label.pkl, api_slices-vulline.pkl, array_slice_label.pkl, expr_slice_label.pkl, pointer_slice_label.pkl
412 | 
413 |    ```python
414 |    # modify make_label.py at 70 rows
415 |    # wrong format
416 |    _dict_cwe2line = {}
417 |    for _dict in dict:
418 |    	for key in _dict.keys():
419 |    		if _dict[key] not in _dict_cwe2line_target.keys():
420 |    ```
421 | 
422 |    ```bash
423 |    python2 make_label.py
424 |    ```
425 | 
426 | 9. combine labels with slices
427 | 
428 |    >input: apt_slices_label.pkl, api_slices-vulline.pkl, array_slice_label.pkl, expr_slice_label.pkl, pointer_slice_label.pkl, api_slices.txt, arrayuse_slice.txt, integeroverflow_slices.txt, pointeruse_slice.txt
429 |    >
430 |    >output: api_slices, arrayuse_slices.txt, integeroverflow_slices.txt, pointersuse_slices.txt
431 | 
432 |    ```bash
433 |    mkdir slices label_source slice_label
434 |    cp api_slices.txt arrayuse_slice.txt integeroverflow_slices.txt pointeruse_slice.txt ./slices
435 |    cp apt_slices_label.pkl api_slices-vulline.pkl array_slice_label.pkl expr_slice_label.pkl pointer_slice_label.pkl ./label_source
436 |    cd label_source
437 |    mv expr_slice_label.pkl integeroverflow_slices.pkl
438 |    mv apt_slices_label.pkl api_slices.pkl
439 |    mv array_slice_label.pkl arrayuse_slice.pkl
440 |    mv pointer_slice_label.pkl pointeruse_slice.pkl
441 |    ```
442 | 
443 |    ```bash
444 |    python2 data_preprocess.py
445 |    ```
446 | 
447 |    
448 | 
449 | ### Step2 Data Perprocess
450 | 
451 | pass
452 | 
453 | ### Step3 Deep Learning
454 | 
455 | pass


--------------------------------------------------------------------------------
/SySeVR_dependences.zip:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1f2f3d797cec9b1e3162d26496acc23969622353de327679c9258292fba93f18
3 | size 236608606
4 | 


--------------------------------------------------------------------------------
/access_db_operate.py:
--------------------------------------------------------------------------------
  1 | ## -*- coding: utf-8 -*-
  2 | from joern.all import JoernSteps
  3 | from igraph import *
  4 | from general_op import *
  5 | import pickle
  6 | from py2neo.packages.httpstream import http
  7 | http.socket_timeout = 999999999
  8 | 
  9 | def get_all_use_bydefnode(db, node_id):
 10 |     query_str = "g.v(%d).in('USE')" % node_id
 11 |     results = db.runGremlinQuery(query_str)
 12 |     list_re = []
 13 |     for re in results:
 14 |         if re.properties['type'] == 'Statement':
 15 |             continue
 16 |         else:
 17 |             list_re.append(re)
 18 | 
 19 |     return list_re
 20 | 
 21 | 
 22 | def get_all_def_bydefnode(db, node_id):
 23 |     query_str = "g.v(%d).in('DEF')" % node_id
 24 |     results = db.runGremlinQuery(query_str)
 25 |     list_re = []
 26 |     for re in results:
 27 |         if re.properties['type'] == 'Statement':
 28 |             continue
 29 |         else:
 30 |             list_re.append(re)
 31 | 
 32 |     return list_re
 33 | 
 34 | 
 35 | def get_exprstmt_node(db):
 36 |     query_expr_str = "queryNodeIndex('type:ExpressionStatement')"
 37 |     #results = db.runGremlinQuery(query_expr_str)
 38 |     results_1 = db.runGremlinQuery(query_expr_str)
 39 | 
 40 |     query_iddecl_str = 'queryNodeIndex("type:IdentifierDeclStatement")'
 41 |     results_2 = db.runGremlinQuery(query_iddecl_str)
 42 | 
 43 |     results = results_1 + results_2
 44 | 
 45 |     return results
 46 |     
 47 | 
 48 | def get_pointers_node(db):
 49 |     list_pointers_node = []
 50 |     query_iddecl_str = 'queryNodeIndex("type:IdentifierDeclStatement")'
 51 | 
 52 |     results = db.runGremlinQuery(query_iddecl_str)
 53 | 
 54 |     if results != []:
 55 |         for re in results:
 56 |             code = re.properties['code']
 57 |             if code.find(' = ') != -1:
 58 |                 code = code.split(' = ')[0]
 59 | 
 60 |             if code.find('*') != -1:
 61 |                 list_pointers_node.append(re)
 62 | 
 63 |     query_param_str = 'queryNodeIndex("type:Parameter")'
 64 |     results = db.runGremlinQuery(query_param_str)
 65 |     if results != []:
 66 |         for re in results:
 67 |             code = re.properties['code']
 68 |             if code.find(' = ') != -1:
 69 |                 code = code.split(' = ')[0]
 70 |                 
 71 |             if code.find('*') != -1:
 72 |                 list_pointers_node.append(re)
 73 | 
 74 |     return list_pointers_node
 75 | 
 76 | 
 77 | def get_arrays_node(db):
 78 |     list_arrays_node = []
 79 |     query_iddecl_str = "queryNodeIndex('type:IdentifierDeclStatement')"
 80 |     results = db.runGremlinQuery(query_iddecl_str)
 81 |     if results != []:
 82 |         for re in results:
 83 |             code = re.properties['code']
 84 |             if code.find(' = ') != -1:
 85 |                 code = code.split(' = ')[0]
 86 | 
 87 |             if code.find(' [ ') != -1:
 88 |                 list_arrays_node.append(re)
 89 | 
 90 |     query_param_str = "queryNodeIndex('type:Parameter')"
 91 |     results = db.runGremlinQuery(query_param_str)
 92 |     if results != []:
 93 |         for re in results:
 94 |             code = re.properties['code']
 95 |             if code.find(' = ') != -1:
 96 |                 code = code.split(' = ')[0]
 97 | 
 98 |             if code.find(' [ ') != -1:
 99 |                 list_arrays_node.append(re)
100 | 
101 |     return list_arrays_node
102 | 
103 | 
104 | def get_def_node(db, cfg_node_id):
105 |     query_str = "g.v(%d).out('DEF')" % cfg_node_id
106 |     results = db.runGremlinQuery(query_str)
107 |     return results
108 | 
109 | 
110 | def getFunctionNodeByName(db, funcname):
111 |     query_str = "queryNodeIndex('type:Function AND name:%s')" % funcname
112 |     results = db.runGremlinQuery(query_str)
113 |     return results
114 | 
115 | 
116 | def get_parameter_by_funcid(db, func_id):
117 |     query_str = "g.v(%d).out('IS_FUNCTION_OF_CFG').out('CONTROLS').filter{ it.type == 'Parameter' }.id" % func_id
118 |     results = db.runGremlinQuery(query_str)
119 |     return results
120 | 
121 | 
122 | def isNodeExist(g, nodeName):
123 |     if not g.vs:
124 |         return False
125 |     else:
126 |         return nodeName in g.vs['name']
127 | 
128 | 
129 | def getALLFuncNode(db):
130 |     query_str = "queryNodeIndex('type:Function')"
131 |     results = db.runGremlinQuery(query_str)
132 |     return results
133 | 
134 | 
135 | def getFuncNode(db, func_name):
136 |     query_str = 'getFunctionsByName("' + func_name + '")'
137 |     func_node = db.runGremlinQuery(query_str)
138 |     return func_node
139 |     
140 | 
141 | def getFuncFile(db, func_id):
142 |     query_str = "g.v(%d).in('IS_FILE_OF').filepath" % func_id
143 |     ret = db.runGremlinQuery(query_str)
144 |     print ret
145 |     return ret[0]
146 | 
147 | 
148 | def getCFGNodes(db, func_id):
149 |     query_str = 'queryNodeIndex("functionId:%s AND isCFGNode:True")' % func_id
150 |     cfgNodes = db.runGremlinQuery(query_str)
151 |     
152 |     return cfgNodes
153 | 
154 | 
155 | def getDDGEdges(db, func_id):
156 |     query_str = """queryNodeIndex('functionId:%s AND isCFGNode:True').outE('REACHES')""" % (func_id)
157 |     ddgEdges = db.runGremlinQuery(query_str)
158 |     return ddgEdges
159 | 
160 | 
161 | def getCDGEdges(db, func_id):
162 |     query_str = """queryNodeIndex('functionId:%s AND isCFGNode:True').outE('CONTROLS')""" % (func_id)
163 |     cdgEdges = db.runGremlinQuery(query_str)
164 |     return cdgEdges
165 | 
166 | 
167 | def getCFGEdges(db, func_id):
168 |     query_str = """queryNodeIndex('functionId:%s AND isCFGNode:True').outE('FLOWS_TO')""" % (func_id)
169 |     cfgEdges = db.runGremlinQuery(query_str)
170 |     return cfgEdges
171 | 
172 | 
173 | def drawGraph(db, edges, func_entry_node, graph_type):
174 |     g = Graph(directed=True)
175 |     func_id = func_entry_node._id
176 |     filepath = getFuncFile(db, func_id)
177 | 
178 |     for edge in edges:
179 |         if edge.start_node.properties['code'] == 'ENTRY':
180 |             startNode = str(edge.start_node.properties['functionId'])
181 |         else:
182 |             startNode = str(edge.start_node._id)
183 | 
184 |         if edge.start_node.properties['code'] == 'ERROR':
185 |             continue
186 | 
187 |         if isNodeExist(g, startNode) == False:
188 |             if edge.start_node.properties['code'] == 'ENTRY':
189 |                 node_prop = {'code': func_entry_node.properties['name'], 'type': func_entry_node.properties['type'],
190 |                          'location': func_entry_node.properties['location'], 'filepath':filepath, 'functionId':str(edge.start_node.properties['functionId'])}
191 |             else:
192 |                 node_prop = {'code': edge.start_node.properties['code'], 'type': edge.start_node.properties['type'],
193 |                          'location': edge.start_node.properties['location'], 'filepath':filepath, 'functionId':str(edge.start_node.properties['functionId'])}
194 |             g.add_vertex(startNode, **node_prop)#id is 'name'
195 | 
196 |         endNode = str(edge.end_node._id)
197 |         if isNodeExist(g, endNode) == False:
198 |             if graph_type == 'pdg' and edge.end_node.properties['code'] == 'EXIT':
199 |                 continue
200 | 
201 |             if edge.end_node.properties['code'] == 'ERROR':
202 |                 continue
203 | 
204 |             node_prop = {'code': edge.end_node.properties['code'], 'type': edge.end_node.properties['type'],
205 |                          'location': edge.end_node.properties['location'], 'filepath':filepath, 'functionId':str(edge.end_node.properties['functionId'])}
206 |             g.add_vertex(endNode, **node_prop)
207 | 
208 |         if graph_type == 'pdg':
209 |             edge_prop = {'var': edge.properties['var']}
210 |         else:
211 |             edge_prop = {'var': edge.properties['flowLabel']}          
212 |         g.add_edge(startNode, endNode, **edge_prop)
213 | 
214 |     return g
215 | 
216 | 
217 | def translatePDGByNode(db, func_node):
218 |     func_id = func_node._id
219 |     ddgEdges = getDDGEdges(db, func_id)
220 |     cdgEdges = getCDGEdges(db, func_id)
221 |     Edges = ddgEdges + cdgEdges
222 |     graph_type = 'pdg'
223 |     g = drawGraph(db, Edges, func_node, graph_type)
224 | 
225 |     return g
226 | 
227 | 
228 | def translateCFGByNode(db, func_node):
229 |     func_id = func_node._id
230 |     Edges = getCFGEdges(db, func_id)
231 |     graph_type = 'cfg'
232 |     g = drawGraph(db, Edges, func_node, graph_type)
233 | 
234 |     return g
235 | 
236 |     
237 | def getUSENodesVar(db, func_id):
238 |     query = "g.v(%s).out('USE').code" % func_id
239 |     ret = db.runGremlinQuery(query)
240 |     if ret == []:
241 |         return False
242 |     else:
243 |         return ret
244 | 
245 | 
246 | def getDEFNodesVar(db, func_id):
247 |     query = "g.v(%s).out('DEF').code" % func_id
248 |     ret = db.runGremlinQuery(query)
249 |     if ret == []:
250 |         return False
251 |     else:
252 |         return ret
253 | 
254 | 
255 | def getUseDefVarByPDG(db, pdg):
256 |     dict_cfg2use = {}
257 |     dict_cfg2def = {}
258 |     #print pdg
259 |     #need_to_addedge_node = []
260 |     for node in pdg.vs:
261 |         if node['type'] == 'Function':
262 |             continue
263 |             
264 |         func_id = node['name']
265 |         use_node = getUSENodesVar(db, func_id)
266 |         def_node = getDEFNodesVar(db, func_id)
267 | 
268 |         if node['type'] == 'Statement':
269 |             if def_node == False:
270 |                 code = node['code'].replace('\n', ' ')
271 |                 if code.find(" = ") != -1:
272 |                     value = code.split(" = ")[0].strip().split(' ')
273 |                     if value[-1] == ']':
274 |                         newvalue = code.split(" [ ")[0].strip().split(' ')
275 |                         if '->' in newvalue:
276 |                             a_index = newvalue.index('->')
277 |                             n_value = ' '.join([newvalue[a_index-1], '->', newvalue[a_index+1]])
278 |                             newvalue[a_index-1] = n_value
279 |                             del newvalue[a_index]
280 |                             del newvalue[a_index]
281 | 
282 |                         def_node = newvalue
283 | 
284 |                     else:
285 |                         if '->' in value:
286 |                             a_index = value.index('->')
287 |                             n_value = ' '.join([value[a_index-1], '->', value[a_index+1]])
288 |                             ob_value = value[a_index-1]
289 |                             value[a_index-1] = n_value
290 |                             del value[a_index]
291 |                             del value[a_index]
292 |                             value.append(ob_value.replace('*', ''))
293 | 
294 |                         def_node = value
295 | 
296 |                     #need_to_addedge_node.append(node['name'])
297 | 
298 |             if use_node == False:
299 |                 if code.find(" = ") != -1:
300 |                     value = code.split(" = ")[1].strip().split(' ')
301 |                     newvalue = []
302 |                     for v in value:
303 |                         if v == '*' or v == '+' or v == '-' or v == '->' or v == '(' or v == ')' or v == '[' or v == ']' or v == '&' or v == '.' or v == '::' or v == ';' or v == ',':
304 |                             continue
305 |                         else:
306 |                             newvalue.append(v.strip())
307 | 
308 |                 else:
309 |                     value = code.split(' ')
310 |                     newvalue = []
311 |                     for v in value:
312 |                         if v == '*' or v == '+' or v == '-' or v == '->' or v == '(' or v == ')' or v == '[' or v == ']' or v == '&' or v == '.' or v == '::' or v == ';' or v == ',':
313 |                             continue
314 |                         else:
315 |                             newvalue.append(v.strip())
316 | 
317 |                 use_node = newvalue
318 | 
319 | 
320 |         if use_node:
321 |             use_node = [code.replace('*', '').replace('&', '').strip() for code in use_node]
322 | 
323 |         if def_node:
324 |             def_node = [code.replace('*', '').replace('&', '').strip() for code in def_node]
325 | 
326 |         else:#add define node
327 |             new_def_node = getReturnVarOfAPI(node['code'])#get modify value of api_func
328 |             if node['name'] == '2078':
329 |                 print "new_def_node", new_def_node
330 | 
331 |             if new_def_node:
332 |                 def_node = []
333 |                 for code in new_def_node:
334 |                     new_code = code.replace('*', '').replace('&', '').strip()
335 |                     def_node.append(new_code)
336 | 
337 |                     if new_code not in use_node:
338 |                         use_node.append(new_code)
339 | 
340 |         if use_node:
341 |             dict_cfg2use[node['name']] = use_node
342 | 
343 |         if def_node:
344 |             dict_cfg2def[node['name']] = def_node
345 | 
346 |     return dict_cfg2use, dict_cfg2def
347 | 
348 | 
349 | def getFuncNodeByFile(db, filenodeID):  
350 |     query_str = 'g.v(%d).out("IS_FILE_OF")' % filenodeID
351 |     results = db.runGremlinQuery(query_str)
352 |     _list = []
353 |     for re in results:
354 |         if re.properties['type'] == 'Function':
355 |             _list.append(re)
356 |         else:
357 |             continue
358 | 
359 |     return _list
360 | 
361 | 
362 | def getAllFuncfileByTestID(db, testID):
363 |     testID = '*/'+ testID + '/*'
364 |     query_str = "queryNodeIndex('type:File AND filepath:%s').id" % testID
365 |     results = db.runGremlinQuery(query_str)
366 |     return results
367 | 
368 | 
369 | def get_calls_id(db, func_name):
370 |     query_str = 'getCallsTo("%s").id' % func_name
371 |     results = db.runGremlinQuery(query_str)
372 |     return results
373 | 
374 | 
375 | def getCFGNodeByCallee(db, node_ast_id):
376 |     #print "start"
377 |     query_str = "g.v(%s).in('IS_AST_PARENT')" % node_ast_id
378 |     results = db.runGremlinQuery(query_str)
379 |     #print "end"
380 |     if results == []:
381 |         return None
382 | 
383 |     for node in results:
384 |         if 'isCFGNode' in node.properties and node.properties['isCFGNode'] == 'True':
385 |             return node
386 |         else:
387 |             node = getCFGNodeByCallee(db, node._id)
388 |     
389 |     return node
390 | 
391 | 
392 | def getCalleeNode(db, func_id):
393 |     query_str = "queryNodeIndex('type:Callee AND functionId:%d')" % func_id
394 |     results = db.runGremlinQuery(query_str)
395 |     return results
396 | 
397 | 
398 | def get_all_calls_node(db, testID):
399 |     list_all_funcID = [node._id for node in getFuncNodeInTestID(db, testID)]
400 |     print "list_all_funcID", list_all_funcID
401 |     print "lenth", len(list_all_funcID)
402 |     if len(list_all_funcID)>130:
403 |         print ">100"
404 |         return False
405 |     list_all_callee_node = []
406 |     for func_id in list_all_funcID:#allfile in a testID
407 |         list_all_callee_node += getCalleeNode(db, func_id)
408 | 
409 |     if list_all_callee_node == []:
410 |         return False
411 |     else:
412 |         return [(str(node._id), node.properties['code'], str(node.properties['functionId'])) for node in list_all_callee_node]
413 | 
414 | 
415 | def getFuncNodeInTestID(db, testID):
416 |     list_all_file_id = getAllFuncfileByTestID(db, testID)
417 |     if list_all_file_id == []:
418 |         return False
419 | 
420 |     list_all_func_node = []  
421 | 
422 |     for file_id in list_all_file_id:
423 |         list_func_node = getFuncNodeByFile(db, file_id)
424 |         list_all_func_node += list_func_node
425 | 
426 |     return list_all_func_node
427 | 
428 | 
429 | def getClassByObjectAndFuncID(db, objectname, func_id):
430 |     #print objectname, func_id
431 |     all_cfg_node = getCFGNodes(db, func_id)
432 |     for cfg_node in all_cfg_node:
433 |         if cfg_node.properties['code'] == objectname and cfg_node.properties['type'] == 'Statement':
434 |             print objectname, func_id, cfg_node.properties['code'], cfg_node._id
435 |             query_str_1 = "queryNodeIndex('type:Statement AND code:%s AND functionId:%s')" % (objectname, func_id)
436 |             results_1 = db.runGremlinQuery(query_str_1)
437 |             if results_1 == []:
438 |                 return False
439 |             else:
440 |                 ob_cfgNode = results_1[0]
441 | 
442 |             location_row = ob_cfgNode.properties['location'].split(':')[0]
443 | 
444 |             query_str_2 = "queryNodeIndex('type:ExpressionStatement AND functionId:%s')" % func_id
445 |             results_2 = db.runGremlinQuery(query_str_2)
446 |             if results_2 == []:
447 |                 return False
448 | 
449 |             classname = False
450 |             for node in results_2:
451 |                 print node.properties['location'].split(':')[0], location_row
452 |                 if node.properties['location'].split(':')[0] == location_row:
453 |                     classname = node.properties['code']
454 |                     break
455 |                 
456 |                 else:
457 |                     continue
458 | 
459 |             return classname
460 | 
461 |         elif cfg_node.properties['code'].find(' '+objectname+' = new') != -1:
462 |             temp_value = cfg_node.properties['code'].split(' '+objectname+' = new')[1].replace('*', '').strip()
463 |             if temp_value.split(' ')[0] != 'const':
464 |                 classname = temp_value.split(' ')[0]
465 |             else:
466 |                 classname = temp_value.split(' ')[1]
467 | 
468 |             return classname
469 | 
470 | 
471 | def getDeleteNode(db, func_id):
472 |     query_str = "queryNodeIndex('code:delete AND functionId:%d')" % func_id
473 |     results = db.runGremlinQuery(query_str)
474 |     return results
475 | 
476 | 
477 | def get_all_delete_node(db, testID):
478 |     list_all_funcID = [node._id for node in getFuncNodeInTestID(db, testID)]
479 |     print "list_all_funcID", list_all_funcID
480 | 
481 |     list_all_delete_node = []
482 |     for func_id in list_all_funcID:#allfile in a testID
483 |         list_all_delete_node += getDeleteNode(db, func_id)
484 | 
485 |     if list_all_delete_node == []:
486 |         return False
487 |     else:
488 |         return list_all_delete_node
489 | 
490 | 
491 | def getDeclNode(db, func_id):
492 |     query_str = "queryNodeIndex('type:IdentifierDeclStatement AND functionId:%d')" % func_id
493 |     results = db.runGremlinQuery(query_str)
494 |     return results
495 | 
496 | 
497 | def get_all_iddecl_node(db, testID):
498 |     list_all_funcID = [node._id for node in getFuncNodeInTestID(db, testID)]
499 |     print "list_all_funcID", list_all_funcID
500 | 
501 |     list_all_decl_node = []
502 |     for func_id in list_all_funcID:#allfile in a testID
503 |         list_all_decl_node += getDeclNode(db, func_id)
504 | 
505 |     if list_all_decl_node == []:
506 |         return False
507 |     else:
508 |         return list_all_decl_node
509 | 
510 | 
511 | def getCallGraph(db, testID):
512 |     list_all_func_node = getFuncNodeInTestID(db, testID)
513 |     #print "list_all_func_node", list_all_func_node
514 |     if list_all_func_node == []:
515 |         return False
516 |     
517 |     call_g = Graph(directed=True)
518 | 
519 |     for func_node in list_all_func_node:
520 |         #print(func_node)
521 |         prop = {'funcname':func_node.properties['name'], 'type': func_node.properties['type'], 'filepath': func_node.properties['filepath']}
522 |         call_g.add_vertex(str(func_node._id), **prop)
523 | 
524 | 
525 |     list_all_callee = get_all_calls_node(db, testID)#we must limit result in testID, it already get callee node
526 |     #print '3 ', list_all_callee
527 |     if list_all_callee == False:
528 |         return False
529 | 
530 |     for func_node in list_all_func_node:
531 |         function_name = func_node.properties['name']
532 |         #print "function_name", function_name
533 |         tag = False
534 |         if function_name.find('::') != -1:#if is a function in class, have two problems
535 |             func_name = function_name.split('::')[-1].strip()
536 |             classname = function_name.split('::')[0].strip()
537 | 
538 |             if func_name == classname:#is a class::class, is a statementnode or a iddeclnode
539 |                 print 1
540 |                 list_callee_id = []
541 |                 list_delete_node = get_all_delete_node(db, testID)
542 |                 if list_delete_node == False:
543 |                     continue
544 | 
545 |                 for node in list_delete_node:
546 |                     functionID = node.properties["functionId"]
547 |                     all_cfg_node = getCFGNodes(db, functionID)
548 |                     delete_loc = node.properties['location'].split(':')[0]
549 | 
550 |                     for cfg_node in all_cfg_node:
551 |                         if cfg_node.properties['location'] != None and cfg_node.properties['location'].split(':')[0] == delete_loc and cfg_node.properties['code'] != 'delete' and cfg_node.properties['code'] != '[' and cfg_node.properties['code'] != '[':
552 |                             objectname = cfg_node.properties['code']
553 |                             ob_classname = getClassByObjectAndFuncID(db, objectname, functionID)
554 |                             pdg = getFuncPDGByfuncIDAndtestID(functionID, testID)
555 |                             if pdg == False:
556 |                                 continue
557 | 
558 |                             if ob_classname == classname:
559 |                                 for p_n in pdg.vs:
560 |                                     #print p_n['name'], str(node._id), str(cfg_node._id)
561 |                                     if p_n['name'] == str(node._id):
562 | 
563 |                                         list_s = p_n.predecessors()
564 |                                         for edge in pdg.es:
565 |                                             if pdg.vs[edge.tuple[0]] in list_s and pdg.vs[edge.tuple[1]] == p_n and edge['var'] == objectname:
566 |                                                 #print (functionID, str(pdg.vs[edge.tuple[0]]['name']))
567 |                                                 list_callee_id.append((str(functionID), str(pdg.vs[edge.tuple[0]]['name'])))
568 |                                             else:
569 |                                                 continue 
570 | 
571 |                                     elif p_n['name'] == str(cfg_node._id):
572 |                                         list_s = p_n.predecessors()
573 |                                         for edge in pdg.es:
574 |                                             if pdg.vs[edge.tuple[0]] in list_s and pdg.vs[edge.tuple[1]] == p_n and edge['var'] == objectname:
575 |                                                 list_callee_id.append((functionID, str(pdg.vs[edge.tuple[0]]['name'])))
576 |                                             else:
577 |                                                 continue  
578 | 
579 |                         else:
580 |                             continue
581 | 
582 | 
583 |                     else:
584 |                         continue
585 | 
586 |             elif func_name.replace('~', '') == classname:#is a class::~class
587 |                 list_callee_id = []
588 |                 list_delete_node = get_all_delete_node(db, testID)
589 |                 if list_delete_node == False:
590 |                     continue
591 | 
592 |                 for node in list_delete_node:
593 |                     functionID = node.properties["functionId"]
594 |                     all_cfg_node = getCFGNodes(db, functionID)
595 |                     delete_loc = node.properties['location'].split(':')[0]
596 | 
597 |                     for cfg_node in all_cfg_node:
598 |                         if cfg_node.properties['location'] != None and cfg_node.properties['location'].split(':')[0] == delete_loc and cfg_node.properties['code'] != 'delete' and cfg_node.properties['code'] != '[' and cfg_node.properties['code'] != '[':
599 |                             objectname = cfg_node.properties['code']
600 |                             #print objectname
601 | 
602 |                             ob_classname = getClassByObjectAndFuncID(db, objectname, functionID)
603 | 
604 |                             if ob_classname == classname:
605 |                                 pdg = getFuncPDGByfuncIDAndtestID(functionID, testID)
606 |                                 if pdg == False:
607 |                                     continue
608 | 
609 |                                 for p_n in pdg.vs:
610 |                                     if p_n['name'] == str(node._id):
611 |                                         list_callee_id.append((functionID, str(node._id)))
612 | 
613 |                                     elif p_n['name'] == str(cfg_node._id):
614 |                                         list_callee_id.append((functionID, str(cfg_node._id))) #delete and its object node
615 | 
616 |                         else:
617 |                             continue
618 | 
619 | 
620 |                     else:
621 |                         continue
622 | 
623 |             else:
624 |                 print 3
625 |                 tag = 'func'
626 |                 list_callee_id = []
627 |                 for _t in list_all_callee:#_t is a tuple, _t[0] is nodeid, 1 is funcname, 2 is func_id
628 |                     if _t[1].find('-> '+ func_name) != -1:#maybe is a class->funcname()
629 |                         objectname = _t[1].split(' -> '+ func_name)[0].strip()
630 |                         ob_classname = getClassByObjectAndFuncID(db, objectname, _t[2])
631 | 
632 |                         if ob_classname == classname:
633 |                             list_callee_id.append(_t[0])
634 | 
635 |                         else:
636 |                             continue
637 |                         
638 |                     else:
639 |                         continue
640 | 
641 | 
642 |         else:
643 |             tag = 'func'
644 |             list_callee_id = []
645 |             for _t in list_all_callee:
646 |                 if _t[1] == function_name:
647 |                     list_callee_id.append(_t[0])
648 | 
649 |         #print 4, list_callee_id
650 |         if list_callee_id == []:
651 |             continue
652 | 
653 |         else:
654 |             #change ast node to cfgnode
655 |             list_callee_CFGNode = []
656 |             if tag == 'func':
657 |                 #print 'z'
658 |                 for node_id in list_callee_id:
659 |                     #print 1
660 |                     callee_cfgnode = getCFGNodeByCallee(db, node_id)
661 |                     #print callee_cfgnode
662 |                     #print 2
663 | 
664 |                     if callee_cfgnode == None:
665 |                                                 
666 |                         print 'ERROR', callee_cfgnode
667 |                         continue
668 |                     else:
669 |                         list_callee_CFGNode.append(callee_cfgnode)
670 | 
671 |                 #print 'x'
672 |                 for node in list_callee_CFGNode:
673 |                     startNode = str(node.properties['functionId'])
674 |                     endNode = str(func_node._id)
675 |                     var = str(node._id)
676 |                     call_g = addDataEdge(call_g, startNode, endNode, var)#var is callee node id
677 |             else:
678 |                 #print 'y'
679 |                 for node in list_callee_id:
680 |                     startNode = str(node[0])
681 |                     endNode = str(func_node._id)
682 |                     var = str(node[1])
683 |                     call_g = addDataEdge(call_g, startNode, endNode, var)#var is callee node id
684 | 
685 | 
686 |     return call_g
687 | 
688 | 
689 | if __name__ == '__main__':
690 |     j = JoernSteps()
691 |     j.connectToDatabase()
692 | 
693 |     pdg_db_path = "pdg_db"
694 |     list_testID = os.listdir(pdg_db_path)
695 |     print list_testID
696 |     for testID in list_testID:
697 |         #if testID != '69055':
698 |         #    continue
699 | 
700 |         if os.path.exists(os.path.join("dict_call2cfgNodeID_funcID", str(testID))):
701 |             continue
702 | 
703 |         call_g = getCallGraph(j, testID)
704 |         if call_g == False:
705 |             continue
706 | 
707 |         _dict = {}
708 |         for edge in call_g.es:
709 |             endnode = call_g.vs[edge.tuple[1]]
710 | 
711 |             if endnode['name'] not in _dict:
712 |                 _dict[endnode['name']] = [(edge['var'], call_g.vs[edge.tuple[0]]['name'])]
713 | 
714 |             else:
715 |                 _dict[endnode['name']].append((edge['var'], call_g.vs[edge.tuple[0]]['name']))
716 | 
717 |         if not os.path.exists(os.path.join("dict_call2cfgNodeID_funcID", str(testID))):
718 |             os.mkdir(os.path.join("dict_call2cfgNodeID_funcID", str(testID)))
719 | 
720 |         filepath = os.path.join("dict_call2cfgNodeID_funcID", str(testID), "dict.pkl")
721 |         
722 |         print _dict
723 |         f = open(filepath, 'wb')
724 |         pickle.dump(_dict, f, True)
725 |         f.close()
726 | 


--------------------------------------------------------------------------------
/all.py:
--------------------------------------------------------------------------------
 1 | from py2neo import Graph
 2 | from py2neo.ext.gremlin import Gremlin
 3 | import os
 4 | 
 5 | DEFAULT_GRAPHDB_URL = "http://localhost:7474/db/data/"
 6 | DEFAULT_STEP_DIR = os.path.dirname(__file__) + '/joernsteps/'
 7 | 
 8 | class JoernSteps:
 9 | 
10 |     def __init__(self):
11 |         self._initJoernSteps()
12 |         self.initCommandSent = False
13 | 
14 |     def setGraphDbURL(self, url):
15 |         """ Sets the graph database URL. By default,
16 |         http://localhost:7474/db/data/ is used."""
17 |         self.graphDbURL = url
18 |     
19 |     def addStepsDir(self, stepsDir):
20 |         """Add an additional directory containing steps to be injected
21 |         into the server"""
22 |         self.stepsDirs.append(stepsDir)
23 |     
24 |     def connectToDatabase(self):
25 |         """ Connects to the database server."""
26 |         self.graphDb = Graph(self.graphDbURL)
27 |         self.gremlin = Gremlin(self.graphDb)
28 | 
29 |     def runGremlinQuery(self, query):
30 | 
31 |         """ Runs the specified gremlin query on the database. It is
32 |         assumed that a connection to the database has been
33 |         established. To allow the user-defined steps located in the
34 |         joernsteps directory to be used in the query, these step
35 |         definitions are prepended to the query."""
36 |         
37 |         if not self.initCommandSent:
38 |             self.initCommand = self._createInitCommand()
39 |             self.initCommandSent = True
40 |             finalQuery = self.initCommand
41 |         else:
42 |             finalQuery = ""
43 |         finalQuery += query
44 |         return self.gremlin.execute(finalQuery)
45 |         
46 |     def runCypherQuery(self, cmd):
47 |         """ Runs the specified cypher query on the graph database."""
48 |         return cypher.execute(self.graphDb, cmd)
49 | 
50 |     def getGraphDbURL(self):
51 |         return self.graphDbURL
52 |     
53 |     """
54 |     Create chunks from a list of ids.
55 |     This method is useful when you want to execute many independent 
56 |     traversals on a large set of start nodes. In that case, you
57 |     can retrieve the set of start node ids first, then use 'chunks'
58 |     to obtain disjoint subsets that can be passed to idListToNodes.
59 |     """
60 |     def chunks(self, idList, chunkSize):
61 |         for i in xrange(0, len(idList), chunkSize):
62 |             yield idList[i:i+chunkSize]
63 | 
64 |     def _initJoernSteps(self):
65 |         self.graphDbURL = DEFAULT_GRAPHDB_URL
66 |         self.stepsDirs = [DEFAULT_STEP_DIR]
67 | 
68 |     def _createInitCommand(self):
69 |         
70 |         initCommand = ""
71 | 
72 |         for stepsDir in self.stepsDirs:
73 |             for (root, dirs, files) in os.walk(stepsDir, followlinks=True):
74 |                 files.sort()
75 |                 for f in files:
76 |                     filename = os.path.join(root, f)
77 |                     if not filename.endswith('.groovy'): continue
78 |                     initCommand += file(filename).read() + "\n"
79 |         return initCommand
80 | 


--------------------------------------------------------------------------------
/complete_PDG.py:
--------------------------------------------------------------------------------
  1 | ## coding:utf-8
  2 | from access_db_operate import *
  3 | import copy
  4 | from general_op import *
  5 | from py2neo.packages.httpstream import http
  6 | http.socket_timeout = 9999
  7 | 
  8 | def modifyDataEdgeVal(pdg):
  9 |     for edge in pdg.es:
 10 |         if edge['var'] == None:
 11 |             continue
 12 | 
 13 |         new_val = ''
 14 |         for c in edge['var']:
 15 |             if c == '*':
 16 |                 continue
 17 |             else:
 18 |                 new_val += c
 19 | 
 20 |         edge['var'] = new_val
 21 | 
 22 |     return pdg
 23 | 
 24 | 
 25 | def modifyStmtNode(pdg):
 26 |     compare_row = 0
 27 |     dict_row2nodestmt = {}
 28 |     dict_row2nodeid = {}
 29 |     #only process statement node
 30 |     dict_static = {}
 31 | 
 32 |     i = 0
 33 |     while i < pdg.vcount():
 34 |         if pdg.vs[i]['type'] == 'Statement' and pdg.vs[i]['code'] == 'static':
 35 |             raw = int(pdg.vs[i]['location'].split(':')[0])
 36 |             col = int(pdg.vs[i]['location'].split(':')[1])
 37 |             dict_static[raw] = (raw, col)
 38 |             pdg.delete_vertices(i)
 39 |         else:
 40 |             i += 1
 41 | 
 42 |     i = 0
 43 |     while i < pdg.vcount():
 44 |         if pdg.vs[i]['type'] == 'Statement':
 45 |             row = int(pdg.vs[i]['location'].split(':')[0])
 46 |             col = int(pdg.vs[i]['location'].split(':')[1])
 47 |             _tuple = (pdg.vs[i]['code'], row, col, pdg.vs[i]['location'])
 48 | 
 49 |             if row not in dict_row2nodestmt.keys():
 50 |                 dict_row2nodestmt[row] = [_tuple]
 51 |                 dict_row2nodeid[row] = pdg.vs[i]['name'] #to confirm delete order
 52 |                 i += 1
 53 | 
 54 |             else:
 55 |                 dict_row2nodestmt[row].append(_tuple)
 56 |                 pdg.delete_vertices(i)
 57 | 
 58 | 
 59 |         else:
 60 |             i += 1
 61 | 
 62 |     #process single node but not statement node
 63 |     j = 0
 64 |     list_nodeindex_to_delete = []
 65 |     while j < pdg.vcount():
 66 |         if pdg.vs[j]['location'] != None:
 67 |             row = int(pdg.vs[j]['location'].split(':')[0])
 68 |             col = int(pdg.vs[j]['location'].split(':')[1])
 69 |         else:
 70 |             j += 1
 71 |             continue
 72 | 
 73 |         if row in dict_row2nodestmt.keys() and pdg.vs[j]['type'] != 'Statement':
 74 |             _tuple = (pdg.vs[j]['code'], row, col, pdg.vs[j]['location'])
 75 |             dict_row2nodestmt[row].append(_tuple)
 76 |             list_nodeindex_to_delete.append(pdg.vs[j]['name'])
 77 |             j += 1
 78 | 
 79 |         else:
 80 |             j += 1
 81 | 
 82 | 
 83 |     for key in dict_row2nodestmt.keys():
 84 |         dict_row2nodestmt[key].sort(key=lambda e:e[2])
 85 |         #print dict_row2nodestmt[key]
 86 |         nodename = dict_row2nodeid[key]
 87 |         nodeIndex = 0
 88 |         for node in pdg.vs:
 89 |             if node['name'] == nodename:
 90 |                 break
 91 |             else:
 92 |                 nodeIndex += 1
 93 | 
 94 |         location = dict_row2nodestmt[key][0][3]
 95 | 
 96 |         new_code = ' '.join([_t[0] for _t in dict_row2nodestmt[key]]).strip()
 97 | 
 98 |         #not consider ';' appear too much times
 99 |         pdg.vs[nodeIndex]['code'] = new_code
100 |         pdg.vs[nodeIndex]['location'] = location
101 |         pdg.vs[nodeIndex]['type'] = 'Statement'
102 | 
103 |     for d_name in list_nodeindex_to_delete:
104 |         i = 0
105 |         while i < pdg.vcount():
106 |             if pdg.vs[i]['name'] == d_name:
107 |                 pdg.delete_vertices(i)
108 |             else:
109 |                 i += 1
110 | 
111 | 
112 |     n = 0
113 |     while  n < pdg.vcount():
114 |         if pdg.vs[n]['location'] == None:
115 |             n += 1
116 |             continue
117 | 
118 |         raw = int(pdg.vs[n]['location'].split(':')[0])
119 |         col = int(pdg.vs[n]['location'].split(':')[1])
120 |         if raw in dict_static.keys() and col > dict_static[raw][1]:
121 |             pdg.vs[n]['code'] = 'static ' + pdg.vs[n]['code']
122 | 
123 |         n += 1
124 | 
125 |     list_node_index = []
126 |     for node in pdg.vs:
127 |         if node['type'] == 'Statement':
128 |             raw = int(node['location'].split(':')[0])
129 |             list_node_index.append((raw, node))
130 | 
131 |     list_node_index.sort(key=lambda x:(x[0], x[1]))
132 | 
133 |     i = 1
134 |     list_del_name = []
135 |     while i < len(list_node_index):
136 |         if list_node_index[i][0]-list_node_index[i-1][0] == 1:
137 |             list_node_index[i][1]['code'] = list_node_index[i-1][1]['code'] + '\n' + list_node_index[i][1]['code']
138 |             list_del_name.append(list_node_index[i-1][1]['name'])
139 |             del list_node_index[i-1]
140 |         else:
141 |             i += 1
142 | 
143 |     _dict = {}
144 |     for n in list_node_index:
145 |         _dict[n[1]['name']] = n[1]['code']
146 | 
147 |     j = 0
148 |     while j < pdg.vcount():
149 |         if pdg.vs[j]['name'] in list_del_name:
150 |             pdg.delete_vertices(j)
151 |         elif pdg.vs[j]['name'] in _dict.keys():
152 |             pdg.vs[j]['code'] =  _dict[pdg.vs[j]['name']]
153 |             j += 1
154 |         else:
155 |             j += 1
156 | 
157 |     #for v in pdg.vs:
158 |     #    print v['code'], v['type'], v['name']
159 |     #exit()
160 | 
161 |     return pdg
162 |          
163 | 
164 | def getInitNodeOfDecl(pdg, list_sorted_pdgnode, node, var, dict_use, dict_def):
165 |     index = list_sorted_pdgnode.index(node)
166 |     list_init_node = []
167 |     for i in range(index+1, len(list_sorted_pdgnode)):
168 |         if list_sorted_pdgnode[i]['type'] != 'IdentifierDeclStatement' and list_sorted_pdgnode[i]['name'] in dict_def.keys():
169 |             if var in dict_def[list_sorted_pdgnode[i]['name']]:
170 |                 if isEdgeExists(pdg, node['name'], list_sorted_pdgnode[i]['name'], var):
171 |                     continue
172 |                 else:
173 |                     list_init_node.append((list_sorted_pdgnode[i], i))#is init node and dataedge not exists
174 | 
175 |         elif list_sorted_pdgnode[i]['type'] != 'IdentifierDeclStatement' and list_sorted_pdgnode[i]['name'] not in dict_def.keys():
176 |             print list_sorted_pdgnode[i]['name']
177 |             if list_sorted_pdgnode[i]['name'] in dict_use.keys() and var in dict_use[list_sorted_pdgnode[i]['name']]:
178 |                 #print '2'
179 |                 if isEdgeExists(pdg, node['name'], list_sorted_pdgnode[i]['name'], var):
180 |                     continue
181 |                 else:
182 |                     list_init_node.append((list_sorted_pdgnode[i], i))
183 | 
184 |         else:
185 |             continue
186 |             
187 |     return list_init_node
188 | 
189 | 
190 | def completeDeclStmtOfPDG(pdg, dict_use, dict_def, dict_if2cfgnode, dict_cfgnode2if):
191 |     list_sorted_pdgnode = sortedNodesByLoc(pdg.vs)
192 |     dict_declnode2val = {}
193 |     for node in pdg.vs:
194 |         if (node['type'] == 'IdentifierDeclStatement' or node['type'] == 'Parameter' or node['type'] == 'Statement') and node['code'].find(' = ') == -1:#find not init node
195 |             if node['type'] == 'IdentifierDeclStatement' or node['type'] == 'Parameter':
196 |                 list_var = dict_def[node['name']]
197 |             else:
198 |                 list_var = getVarOfNode(node['code'])
199 | 
200 |             if list_var == False:
201 |                 continue
202 | 
203 |             else:
204 |                 for var in list_var:
205 |                     results = getInitNodeOfDecl(pdg, list_sorted_pdgnode, node, var, dict_use, dict_def)
206 |                     if results != []:
207 |                         for result in results:
208 |                             if node['name'] not in dict_cfgnode2if.keys():#startnode not belong to if
209 |                                 startnode = node['name']
210 |                                 endnode = result[0]['name']
211 |                                 pdg = addDataEdge(pdg, startnode, endnode, var)
212 | 
213 |                             else:
214 |                                 list_if = dict_cfgnode2if[node['name']]
215 |                                 list_not_scan = []
216 | 
217 |                                 for ifstmt_n in list_if:
218 |                                     tuple_statements = dict_if2cfgnode[ifstmt_n]
219 | 
220 |                                     if node['name'] in tuple_statements[0]:
221 |                                         list_not_scan += tuple_statements[1]
222 | 
223 |                                     elif node['name'] in tuple_statements[1]:
224 |                                         list_not_scan += tuple_statements[0]
225 | 
226 |                                 if result[0]['name'] not in list_not_scan:
227 |                                     startnode = node['name']
228 |                                     endnode = result[0]['name']
229 |                                     pdg = addDataEdge(pdg, startnode, endnode, var)
230 | 
231 |     return pdg
232 | 
233 | 
234 | def get_nodes_before_exit(pdg, dict_if2cfgnode, dict_cfgnode2if):
235 |     _dict = {}
236 |     for key in dict_cfgnode2if.keys():
237 |         results = pdg.vs.select(name=key)
238 |         if len(results) != 0 and (results[0]['type'] == 'BreakStatement' or results[0]['type'] == 'ReturnStatement' or results[0]['code'].find('exit ') != -1 or results[0]['type'] == 'GotoStatement'):# if stms have return
239 |             if_name = ''
240 |             if len(dict_cfgnode2if[key]) == 1:
241 |                 if_name = dict_cfgnode2if[key][0]
242 |             else:
243 |                 if_name = get_ifname(key, dict_if2cfgnode, dict_cfgnode2if)
244 | 
245 |             print "key", key, if_name, dict_cfgnode2if[key]
246 | 
247 |             _list_name_0 = dict_if2cfgnode[if_name][0]
248 |             _list_name_1 = dict_if2cfgnode[if_name][1]
249 | 
250 |             if key in _list_name_0:
251 |                 ret_index = _list_name_0.index(key)
252 |                 del _list_name_0[ret_index] #_list_name are set of nodes which under the same if with return node or exit or goto statement
253 | 
254 |                 for name in _list_name_0:
255 |                     _dict[name] = key
256 | 
257 |             if key in _list_name_1:
258 |                 ret_index = _list_name_1.index(key)
259 |                 del _list_name_1[ret_index] #_list_name are set of nodes which under the same if with return node or exit or goto statement
260 | 
261 |                 for name in _list_name_1:
262 |                     _dict[name] = key
263 | 
264 |         else:
265 |             continue
266 | 
267 |     return _dict
268 | 
269 | 
270 | def completeDataEdgeOfPDG(pdg, dict_use, dict_def, dict_if2cfgnode, dict_cfgnode2if):
271 | #if a var in define list but there is not a edge between a node which use it and node which define it,not include id_decl
272 |     list_sorted_pdgnode = sortedNodesByLoc(pdg.vs)
273 |     exit2stmt_dict = get_nodes_before_exit(pdg, dict_if2cfgnode, dict_cfgnode2if)
274 |     dict_declnode2val = {}
275 | 
276 |     for i in range(0, len(list_sorted_pdgnode)):
277 |         if list_sorted_pdgnode[i]['type'] == 'IdentifierDeclStatement':
278 |             continue
279 | 
280 |         if list_sorted_pdgnode[i]['name'] in dict_def.keys():
281 |             #print "list_sorted_pdgnode[i]['name']", list_sorted_pdgnode[i]['name']
282 |             list_def_var = dict_def[list_sorted_pdgnode[i]['name']]
283 | 
284 |             for def_var in list_def_var:
285 |                 for j in range(i+1, len(list_sorted_pdgnode)):
286 |                     if list_sorted_pdgnode[i]['name'] in exit2stmt_dict.keys():
287 |                         exit_name = exit2stmt_dict[list_sorted_pdgnode[i]['name']]
288 | 
289 |                         if list_sorted_pdgnode[j]['name'] == exit_name:
290 |                             break
291 | 
292 |                         elif list_sorted_pdgnode[j]['name'] in dict_use.keys() and def_var in dict_use[list_sorted_pdgnode[j]['name']]:
293 |                             if list_sorted_pdgnode[i]['name'] not in dict_cfgnode2if.keys():
294 |                                 #must add
295 |                                 startnode = list_sorted_pdgnode[i]['name']
296 |                                 endnode = list_sorted_pdgnode[j]['name']
297 |                                 addDataEdge(pdg, startnode, endnode, def_var)
298 | 
299 |                                 if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]:
300 |                                     break
301 | 
302 |                             elif list_sorted_pdgnode[i]['name'] in dict_cfgnode2if.keys() and list_sorted_pdgnode[j]['name'] not in dict_cfgnode2if.keys():
303 |                                 startnode = list_sorted_pdgnode[i]['name']
304 |                                 endnode = list_sorted_pdgnode[j]['name']
305 |                                 addDataEdge(pdg, startnode, endnode, def_var)
306 | 
307 |                                 if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]:
308 |                                     break
309 | 
310 |                             elif list_sorted_pdgnode[i]['name'] in dict_cfgnode2if.keys() and list_sorted_pdgnode[j]['name'] in dict_cfgnode2if.keys():
311 |                                 if_list = dict_cfgnode2if[list_sorted_pdgnode[i]['name']]
312 |                                 _not_scan = []
313 |                                 for if_stmt in if_list:
314 |                                     _tuple = dict_if2cfgnode[if_stmt]
315 |                                     if list_sorted_pdgnode[i]['name'] in _tuple[0]:
316 |                                         _not_scan += _tuple[1]
317 |                                     else:
318 |                                         _not_scan += _tuple[0]
319 | 
320 |                                 if list_sorted_pdgnode[j]['name'] not in _not_scan:
321 |                                     startnode = list_sorted_pdgnode[i]['name']
322 |                                     endnode = list_sorted_pdgnode[j]['name']
323 |                                     addDataEdge(pdg, startnode, endnode, def_var)
324 | 
325 |                                 if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]:
326 |                                     break
327 | 
328 |                     else:
329 |                         if list_sorted_pdgnode[j]['name'] in dict_use.keys() and def_var in dict_use[list_sorted_pdgnode[j]['name']]:
330 |                             if list_sorted_pdgnode[i]['name'] not in dict_cfgnode2if.keys():
331 |                                 #must add
332 |                                 startnode = list_sorted_pdgnode[i]['name']
333 |                                 endnode = list_sorted_pdgnode[j]['name']
334 |                                 addDataEdge(pdg, startnode, endnode, def_var)
335 | 
336 |                                 if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]:
337 |                                     break
338 | 
339 |                             elif list_sorted_pdgnode[i]['name'] in dict_cfgnode2if.keys() and list_sorted_pdgnode[j]['name'] not in dict_cfgnode2if.keys():
340 |                                 startnode = list_sorted_pdgnode[i]['name']
341 |                                 endnode = list_sorted_pdgnode[j]['name']
342 |                                 addDataEdge(pdg, startnode, endnode, def_var)
343 | 
344 |                                 if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]:
345 |                                     break
346 | 
347 |                             elif list_sorted_pdgnode[i]['name'] in dict_cfgnode2if.keys() and list_sorted_pdgnode[j]['name'] in dict_cfgnode2if.keys():
348 |                                 if_list = dict_cfgnode2if[list_sorted_pdgnode[i]['name']]
349 |                                 _not_scan = []
350 |                                 for if_stmt in if_list:
351 |                                     _tuple = dict_if2cfgnode[if_stmt]
352 |                                     if list_sorted_pdgnode[i]['name'] in _tuple[0]:
353 |                                         _not_scan += _tuple[1]
354 |                                     else:
355 |                                         _not_scan += _tuple[0]
356 | 
357 |                                 if list_sorted_pdgnode[j]['name'] not in _not_scan:
358 |                                     startnode = list_sorted_pdgnode[i]['name']
359 |                                     endnode = list_sorted_pdgnode[j]['name']
360 |                                     addDataEdge(pdg, startnode, endnode, def_var)
361 | 
362 |                                 if list_sorted_pdgnode[j]['name'] in dict_def.keys() and def_var in dict_def[list_sorted_pdgnode[j]['name']]:
363 |                                     break
364 | 
365 | 
366 |         else:
367 |             continue
368 | 
369 |     return pdg
370 | 
371 | 
372 | def addDataEdgeOfObject(pdg, dict_if2cfgnode, dict_cfgnode2if):
373 |     for node in pdg.vs:
374 |         if node['code'].find(' = new ') != -1:
375 |             objectname = node['code'].split(' = new ')[0].split(' ')[-1].strip()
376 |             cur_name = node['name']
377 | 
378 |             for pnode in pdg.vs:
379 |                 #print pnode['code']
380 |                 if pnode['name'] == cur_name:
381 |                     continue
382 | 
383 |                 if node['name'] not in dict_cfgnode2if.keys():
384 |                     if pnode['code'].find(objectname + ' -> ') != -1:
385 |                         if pnode['code'].split(objectname + ' -> ')[0] == '':
386 |                             startnode = node['name']
387 |                             endnode = pnode['name']
388 |                             def_var = objectname
389 |                             addDataEdge(pdg, startnode, endnode, def_var)
390 |                         elif pnode['code'].split(objectname + ' -> ')[0][-1] == ' ':
391 |                             startnode = node['name']
392 |                             endnode = pnode['name']
393 |                             def_var = objectname
394 |                             addDataEdge(pdg, startnode, endnode, def_var)
395 | 
396 |                     elif pnode['code'].find('delete ') != -1:
397 |                         startnode = node['name']
398 |                         endnode = pnode['name']
399 |                         def_var = objectname
400 |                         addDataEdge(pdg, startnode, endnode, def_var)
401 | 
402 |                     else:
403 |                         continue
404 | 
405 |                 else:
406 |                     list_if = dict_cfgnode2if[node['name']]
407 |                     list_not_scan = []
408 | 
409 |                     for ifstmt_n in list_if:
410 |                         tuple_statements = dict_if2cfgnode[ifstmt_n]
411 | 
412 |                         if node['name'] in tuple_statements[0]:
413 |                             list_not_scan += tuple_statements[1]
414 | 
415 |                         elif node['name'] in tuple_statements[1]:
416 |                             list_not_scan += tuple_statements[0]
417 | 
418 |                     if pnode['code'].find(objectname + ' -> ') != -1 and pnode['name'] not in list_not_scan:
419 |                         if pnode['code'].split(objectname + ' -> ')[0] == '':
420 |                             startnode = node['name']
421 |                             endnode = pnode['name']
422 |                             def_var = objectname
423 |                             addDataEdge(pdg, startnode, endnode, def_var)
424 |                         elif pnode['code'].split(objectname + ' -> ')[0][-1] == ' ' :
425 |                             startnode = node['name']
426 |                             endnode = pnode['name']
427 |                             def_var = objectname
428 |                             addDataEdge(pdg, startnode, endnode, def_var)
429 | 
430 |                     elif pnode['code'].find('delete ') != -1  and pnode['name'] not in list_not_scan:
431 |                         startnode = node['name']
432 |                         endnode = pnode['name']
433 |                         def_var = objectname
434 |                         addDataEdge(pdg, startnode, endnode, def_var)
435 | 
436 |                     else:
437 |                         continue
438 | 
439 |         else:
440 |             continue
441 | 
442 |     return pdg
443 | 
444 | def deleteCDG(pdg):
445 |     edge=pdg.es
446 |     a=len(edge)
447 |     list_d=[]
448 |     print("delete cdg")
449 |     for j in range(0,a):
450 |         #print edge[j]
451 |         if edge[j]['var']==None:
452 |             list_d.append(j)
453 |     a=list(reversed(list_d))
454 |     for i in a:
455 |         pdg.delete_edges(edge[i])
456 |     return pdg 
457 | 
458 | def main():
459 |     j = JoernSteps()
460 |     j.connectToDatabase()
461 |     all_func_node = getALLFuncNode(j)
462 |     for node in all_func_node:
463 |         testID = getFuncFile(j, node._id).split('/')[-2]
464 |         path = os.path.join("pdg_db", testID)
465 | 
466 |         store_file_name = node.properties['name'] + '_' + str(node._id)
467 | 
468 |         store_path = os.path.join(path, store_file_name)
469 |         if os.path.exists(store_path):
470 |             continue
471 | 
472 |         initpdg = translatePDGByNode(j, node)#get init PDG
473 |         opt_pdg_1 = modifyStmtNode(initpdg)#merge every statement node
474 | 
475 |         cfg_path = os.path.join("cfg_db", testID, store_file_name)
476 |         for _file in os.listdir(cfg_path):
477 |             if _file == 'dict_if2cfgnode':
478 |                 fin = open(os.path.join(cfg_path, _file))
479 |                 dict_if2cfgnode = pickle.load(fin)
480 |                 fin.close()
481 | 
482 |             elif _file == 'dict_cfgnode2if':
483 |                 fin = open(os.path.join(cfg_path, _file))
484 |                 dict_cfgnode2if = pickle.load(fin)
485 |                 fin.close()
486 | 
487 |             else:
488 |                 print cfg_path
489 |                 fin = open(os.path.join(cfg_path, _file))
490 |                 cfg = pickle.load(fin)
491 |                 fin.close()
492 | 
493 |         i = 0
494 |         while i < opt_pdg_1.vcount():
495 |             if opt_pdg_1.vs[i]['type'] == 'Statement' and opt_pdg_1.vs[i]['name'] not in cfg.vs['name']:
496 |                 for n in cfg.vs:
497 |                     if opt_pdg_1.vs[i]['code'] == n['code'] and int(opt_pdg_1.vs[i]['location'].split(':')[0]) == int(n['location'].split(':')[0]):
498 |                         opt_pdg_1.vs[i]['name'] = n['name']
499 |                         opt_pdg_1.vs[i]['location'] = n['location']
500 |                         break
501 |                     else:
502 |                         continue
503 | 
504 |             i += 1
505 | 
506 |         d_use, d_def = getUseDefVarByPDG(j, opt_pdg_1)#get use and def nodedict of every cfgnode
507 |         opt_pdg_2 = modifyDataEdgeVal(opt_pdg_1)#not distinguish pointer and buffer it points      
508 |         
509 |         opt_pdg_3 = completeDeclStmtOfPDG(opt_pdg_2, d_use, d_def, dict_if2cfgnode, dict_cfgnode2if)
510 | 
511 |         opt_pdg_4 = completeDataEdgeOfPDG(opt_pdg_3, d_use, d_def, dict_if2cfgnode, dict_cfgnode2if)#add data edge to get more info
512 |         
513 |         opted_pdg_5 = addDataEdgeOfObject(opt_pdg_4, dict_if2cfgnode, dict_cfgnode2if)
514 |       
515 |         #opted_pdg=deleteCDG(opted_pdg_5)
516 |         
517 | 
518 |         if not os.path.exists(path):
519 |             os.mkdir(path)
520 |         print store_path, path    
521 |         f = open(store_path, 'wb')
522 |         pickle.dump(opted_pdg_5, f, True)
523 |         f.close()
524 |     
525 | 
526 | if __name__ == '__main__':
527 |     main()             
528 | 
529 | 
530 | 
531 | 


--------------------------------------------------------------------------------
/data_preprocess.py:
--------------------------------------------------------------------------------
 1 | ## coding:utf-8
 2 | 
 3 | import pickle
 4 | import os
 5 | 
 6 | slice_path = './slices/'
 7 | label_path = './label_source/'
 8 | folder_path = './slice_label/'
 9 | for filename in os.listdir(slice_path):
10 |     if filename.endswith('.txt') is False:
11 |         continue
12 |     print(filename)
13 |     filepath = os.path.join(slice_path,filename)
14 |     f = open(filepath,'r')
15 |     slicelists = f.read().split('------------------------------')
16 |     f.close()
17 |     labelpath = os.path.join(label_path,filename[:-4]+'.pkl')
18 |     f = open(labelpath,'rb')
19 |     labellists = pickle.load(f)
20 |     if isinstance(labellists,tuple):
21 |         labellists = labellists[0]
22 |     f.close()
23 | 	
24 |     if slicelists[0] == '':
25 |         del slicelists[0]
26 |     if slicelists[-1] == '' or slicelists[-1] == '\n' or slicelists[-1] == '\r\n':
27 |         del slicelists[-1]
28 | 
29 |     file_path = os.path.join(folder_path,filename)
30 |     f = open(file_path,'a+')
31 |     index = -1
32 |     for slicelist in slicelists:
33 |         index += 1
34 |         sentences = slicelist.split('\n')
35 |         if sentences[0] == '\r' or sentences[0] == '':
36 |             del sentences[0]
37 |         if sentences == []:
38 |             continue
39 |         if sentences[-1] == '':
40 |             del sentences[-1]
41 |         if sentences[-1] == '\r':
42 |             del sentences[-1]
43 |         labellist = labellists[index]
44 |         for labels in labellist:
45 |             if labels:
46 |                 label = 1
47 |             else:
48 |                 label = 0
49 |         for sentence in sentences:
50 |             f.write(str(sentence)+'\n')
51 |         f.write(str(label)+'\n')
52 |         f.write('------------------------------'+'\n')
53 |     f.close()
54 | print('\success!')
55 |         
56 |             
57 |     
58 |     
59 | 


--------------------------------------------------------------------------------
/extract_df.py:
--------------------------------------------------------------------------------
  1 | ## coding:utf-8
  2 | from joern.all import JoernSteps
  3 | from igraph import *
  4 | from access_db_operate import *
  5 | from slice_op import *
  6 | from py2neo.packages.httpstream import http
  7 | http.socket_timeout = 9999
  8 | 
  9 | 
 10 | def get_slice_file_sequence(store_filepath, list_result, count, func_name, startline, filepath_all):
 11 |     list_for_line = []
 12 |     statement_line = 0
 13 |     vulnline_row = 0
 14 |     list_write2file = []
 15 | 
 16 |     for node in list_result:    
 17 |         if node['type'] == 'Function':
 18 |             f2 = open(node['filepath'], 'r')
 19 |             content = f2.readlines()
 20 |             f2.close()
 21 |             raw = int(node['location'].split(':')[0])-1
 22 |             code = content[raw].strip()
 23 | 
 24 |             new_code = ""
 25 |             if code.find("#define") != -1:
 26 |                 list_write2file.append(code + ' ' + str(raw+1) + '\n')
 27 |                 continue
 28 | 
 29 |             while (len(code) >= 1 and code[-1] != ')' and code[-1] != '{'):
 30 |                 if code.find('{') != -1:
 31 |                     index = code.index('{')
 32 |                     new_code += code[:index].strip()
 33 |                     list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
 34 |                     break
 35 | 
 36 |                 else:
 37 |                     new_code += code + '\n'
 38 |                     raw += 1
 39 |                     code = content[raw].strip()
 40 |                     #print "raw", raw, code
 41 | 
 42 |             else:
 43 |                 new_code += code
 44 |                 new_code = new_code.strip()
 45 |                 if new_code[-1] == '{':
 46 |                     new_code = new_code[:-1].strip()
 47 |                     list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
 48 |                     #list_line.append(str(raw+1))
 49 |                 else:
 50 |                     list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
 51 |                     #list_line.append(str(raw+1))
 52 | 
 53 |         elif node['type'] == 'Condition':
 54 |             raw = int(node['location'].split(':')[0])-1
 55 |             if raw in list_for_line:
 56 |                 continue
 57 |             else:
 58 |                 #print node['type'], node['code'], node['name']
 59 |                 f2 = open(node['filepath'], 'r')
 60 |                 content = f2.readlines()
 61 |                 f2.close()
 62 |                 code = content[raw].strip()
 63 |                 pattern = re.compile("(?:if|while|for|switch)")
 64 |                 #print code
 65 |                 res = re.search(pattern, code)
 66 |                 if res == None:
 67 |                     raw = raw - 1
 68 |                     code = content[raw].strip()
 69 |                     new_code = ""
 70 | 
 71 |                     while (code[-1] != ')' and code[-1] != '{'):
 72 |                         if code.find('{') != -1:
 73 |                             index = code.index('{')
 74 |                             new_code += code[:index].strip()
 75 |                             list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
 76 |                             #list_line.append(str(raw+1))
 77 |                             list_for_line.append(raw)
 78 |                             break
 79 | 
 80 |                         else:
 81 |                             new_code += code + '\n'
 82 |                             list_for_line.append(raw)
 83 |                             raw += 1
 84 |                             code = content[raw].strip()
 85 | 
 86 |                     else:
 87 |                         new_code += code
 88 |                         new_code = new_code.strip()
 89 |                         if new_code[-1] == '{':
 90 |                             new_code = new_code[:-1].strip()
 91 |                             list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
 92 |                             #list_line.append(str(raw+1))
 93 |                             list_for_line.append(raw)
 94 | 
 95 |                         else:
 96 |                             list_for_line.append(raw)
 97 |                             list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
 98 |                             #list_line.append(str(raw+1))
 99 | 
100 |                 else:
101 |                     res = res.group()
102 |                     if res == '':
103 |                         print filepath_all + ' ' + func_name + " error!"
104 |                         exit()
105 | 
106 |                     elif res != 'for':
107 |                         new_code = res + ' ( ' + node['code'] + ' ) '
108 |                         list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
109 |                         #list_line.append(str(raw+1))
110 | 
111 |                     else:
112 |                         new_code = ""
113 |                         if code.find(' for ') != -1:
114 |                             code = 'for ' + code.split(' for ')[1]
115 | 
116 |                         while code != '' and code[-1] != ')' and code[-1] != '{':
117 |                             if code.find('{') != -1:
118 |                                 index = code.index('{')
119 |                                 new_code += code[:index].strip()
120 |                                 list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
121 |                                 #list_line.append(str(raw+1))
122 |                                 list_for_line.append(raw)
123 |                                 break
124 | 
125 |                             elif code[-1] == ';' and code[:-1].count(';') >= 2:
126 |                                 new_code += code
127 |                                 list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
128 |                                 #list_line.append(str(raw+1))
129 |                                 list_for_line.append(raw)
130 |                                 break
131 | 
132 |                             else:
133 |                                 new_code += code + '\n'
134 |                                 list_for_line.append(raw)
135 |                                 raw += 1
136 |                                 code = content[raw].strip()
137 | 
138 |                         else:
139 |                             new_code += code
140 |                             new_code = new_code.strip()
141 |                             if new_code[-1] == '{':
142 |                                 new_code = new_code[:-1].strip()
143 |                                 list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
144 |                                 #list_line.append(str(raw+1))
145 |                                 list_for_line.append(raw)
146 | 
147 |                             else:
148 |                                 list_for_line.append(raw)
149 |                                 list_write2file.append(new_code + ' ' + str(raw+1) + '\n')
150 |                                 #list_line.append(str(raw+1))
151 |         
152 |         elif node['type'] == 'Label':
153 |             f2 = open(node['filepath'], 'r')
154 |             content = f2.readlines()
155 |             f2.close()
156 |             raw = int(node['location'].split(':')[0])-1
157 |             code = content[raw].strip()
158 |             list_write2file.append(code + ' ' + str(raw+1) + '\n')
159 |             #list_line.append(str(raw+1))
160 | 
161 |         elif node['type'] == 'ForInit':
162 |             continue
163 | 
164 |         elif node['type'] == 'Parameter':
165 |             if list_result[0]['type'] != 'Function':
166 |                 row = node['location'].split(':')[0]
167 |                 list_write2file.append(node['code'] + ' ' + str(row) + '\n')
168 |                 #list_line.append(row)
169 |             else:
170 |                 continue
171 | 
172 |         elif node['type'] == 'IdentifierDeclStatement':
173 |             if node['code'].strip().split(' ')[0] == "undef":
174 |                 f2 = open(node['filepath'], 'r')
175 |                 content = f2.readlines()
176 |                 f2.close()
177 |                 raw = int(node['location'].split(':')[0])-1
178 |                 code1 = content[raw].strip()
179 |                 list_code2 = node['code'].strip().split(' ')
180 |                 i = 0
181 |                 while i < len(list_code2):
182 |                     if code1.find(list_code2[i]) != -1:
183 |                         del list_code2[i]
184 |                     else:
185 |                         break
186 |                 code2 = ' '.join(list_code2)
187 | 
188 |                 list_write2file.append(code1 + ' ' + str(raw+1) + '\n' + code2 + ' ' + str(raw+2) + '\n')
189 | 
190 |             else:
191 |                 list_write2file.append(node['code'] + ' ' + node['location'].split(':')[0] + '\n')
192 | 
193 |         elif node['type'] == 'ExpressionStatement':
194 |             row = int(node['location'].split(':')[0])-1
195 |             if row in list_for_line:
196 |                 continue
197 | 
198 |             if node['code'] in ['\n', '\t', ' ', '']:
199 |                 list_write2file.append(node['code'] + ' ' + str(row+1) + '\n')
200 |                 #list_line.append(row+1)
201 |             elif node['code'].strip()[-1] != ';':
202 |                 list_write2file.append(node['code'] + '; ' + str(row+1) + '\n')
203 |                 #list_line.append(row+1)
204 |             else:
205 |                 list_write2file.append(node['code'] + ' ' + str(row+1) + '\n')
206 |                 #list_line.append(row+1)
207 | 
208 |         elif node['type'] == "Statement":
209 |             row = node['location'].split(':')[0]
210 |             list_write2file.append(node['code'] + ' ' + str(row) + '\n')
211 |             #list_line.append(row+1)
212 | 
213 |         else:         
214 |             #print node['name'], node['code'], node['type'], node['filepath']
215 |             if node['location'] == None:
216 |                 continue
217 |             f2 = open(node['filepath'], 'r')
218 |             content = f2.readlines()
219 |             f2.close()
220 |             row = int(node['location'].split(':')[0])-1
221 |             code = content[row].strip()
222 |             if row in list_for_line:
223 |                 continue
224 | 
225 |             else:
226 |                 list_write2file.append(node['code'] + ' ' + str(row+1) + '\n')
227 |                 #list_line.append(str(row+1))
228 | 
229 |     f = open(store_filepath, 'a')
230 |     f.write(str(count) + ' ' + filepath_all + ' ' + func_name + ' ' + startline + '\n')
231 |     for wb in list_write2file:
232 |         f.write(wb)
233 |     f.write('------------------------------' + '\n')     
234 |     f.close()
235 | 
236 | 
237 | def program_slice(pdg, startnodesID, slicetype, testID):#process startnodes as a list, because main func has many different arguments
238 |     list_startnodes = []
239 |     if pdg == False or pdg == None:
240 |         return [], [], []
241 |         
242 |     for node in pdg.vs:
243 |         #print node['functionId']
244 |         if node['name'] in startnodesID:
245 |             list_startnodes.append(node)
246 | 
247 |     if list_startnodes == []:
248 |         return [], [], []
249 | 
250 |     if slicetype == 0:#backwords
251 |         start_line = list_startnodes[0]['location'].split(':')[0]
252 |         start_name = list_startnodes[0]['name']
253 |         startline_path = list_startnodes[0]['filepath']
254 |         results_back = program_slice_backwards(pdg, list_startnodes)
255 | 
256 |         not_scan_func_list = []
257 |         results_back, temp = process_cross_func(results_back, testID, 1, results_back, not_scan_func_list)
258 | 
259 | 
260 |         return [results_back], start_line, startline_path
261 | 
262 |     elif slicetype == 1:#forwords
263 |         print "start extract forword dataflow!"
264 |         print list_startnodes, startnodesID
265 |         start_line = list_startnodes[0]['location'].split(':')[0]
266 |         start_name = list_startnodes[0]['name']
267 |         startline_path = list_startnodes[0]['filepath']
268 |         results_for = program_slice_forward(pdg, list_startnodes)
269 | 
270 |         not_scan_func_list = []
271 |         results_for, temp = process_cross_func(results_for, testID, 1, results_for, not_scan_func_list)
272 | 
273 |         return [results_for], start_line, startline_path
274 | 
275 |     else:#bi_direction
276 |         print "start extract backwords dataflow!"
277 | 
278 |         start_line = list_startnodes[0]['location'].split(':')[0]
279 |         start_name = list_startnodes[0]['name']
280 |         startline_path = list_startnodes[0]['filepath']
281 |         results_back = program_slice_backwards(pdg, list_startnodes)#results_back is a list of nodes
282 | 
283 |         results_for = program_slice_forward(pdg, list_startnodes)      
284 |         
285 | 
286 |         _list_name = []
287 |         for node_back in results_back:
288 |             _list_name.append(node_back['name'])
289 | 
290 |         for node_for in results_for:
291 |             if node_for['name'] in _list_name:
292 |                 continue
293 |             else:
294 |                 results_back.append(node_for)
295 | 
296 |         results_back = sortedNodesByLoc(results_back)
297 |        
298 |         iter_times = 0
299 |         start_list = [[results_back, iter_times]]
300 |         i = 0
301 |         not_scan_func_list = []
302 |         list_cross_func_back, not_scan_func_list = process_crossfuncs_back_byfirstnode(start_list, testID, i, not_scan_func_list)
303 |         list_results_back = [l[0] for l in list_cross_func_back]
304 |       
305 |         all_result = [] 
306 |         for results_back in list_results_back:
307 |             index = 1
308 |             for a_node in results_back:
309 |                 if a_node['name'] == start_name:
310 |                     break
311 |                 else:
312 |                     index += 1
313 | 
314 |             list_to_crossfunc_back = results_back[:index]
315 |             list_to_crossfunc_for = results_back[index:]
316 | 
317 |             list_to_crossfunc_back, temp = process_cross_func(list_to_crossfunc_back, testID, 0, list_to_crossfunc_back, not_scan_func_list)
318 | 
319 |             list_to_crossfunc_for, temp = process_cross_func(list_to_crossfunc_for, testID, 1, list_to_crossfunc_for, not_scan_func_list)
320 | 
321 |             all_result.append(list_to_crossfunc_back + list_to_crossfunc_for)
322 |   
323 | 
324 |         return all_result, start_line, startline_path
325 | 
326 | 
327 | def api_slice():
328 |     count = 1
329 |     store_filepath = "api_slices.txt"
330 |     f = open("sensifunc_slice_points.pkl", 'rb')
331 |     dict_unsliced_sensifunc = pickle.load(f)
332 |     f.close()
333 |     for key in dict_unsliced_sensifunc.keys():#key is testID
334 | 
335 |         for _t in dict_unsliced_sensifunc[key]:
336 |             list_sensitive_funcid = _t[0]
337 |             pdg_funcid = _t[1]
338 |             sensitive_funcname = _t[2]
339 | 
340 |             if sensitive_funcname.find("main") != -1:
341 |                 continue #todo
342 |             else:
343 |                 slice_dir = 2
344 |                 pdg = getFuncPDGById(key, pdg_funcid)
345 |                 if pdg == False:
346 |                     print 'error'
347 |                     exit()
348 | 
349 |                 list_code, startline, startline_path = program_slice(pdg, list_sensitive_funcid, slice_dir, key)
350 |                 #print len(list_code)
351 | 
352 |                 if list_code == []:
353 |                     fout = open("error.txt", 'a')
354 |                     fout.write(sensitive_funcname + ' ' + str(list_sensitive_funcid) + ' found nothing! \n')
355 |                     fout.close()
356 |                 else:
357 |                     for _list in list_code:
358 |                         get_slice_file_sequence(store_filepath, _list, count, sensitive_funcname, startline, startline_path)
359 |                         count += 1
360 | 
361 | def pointers_slice():
362 |     count = 1
363 |     store_filepath = "pointersuse_slices.txt"
364 |     f = open("pointuse_slice_points.pkl", 'rb')
365 |     dict_unsliced_pointers = pickle.load(f)
366 |     print dict_unsliced_pointers
367 |     f.close()
368 | 
369 |     #l = ['CVE-2013-4921', 'CVE-2013-4514', 'CVE-2015-1158', 'CVE-2015-1159', 'CVE-2005-3806', 'CVE-2012-6647', 'CVE-2012-2123', 'CVE-2015-0239', 'CVE-2013-2596', 'CVE-2008-5713', 'CVE-2015-2192', 'CVE-2015-2191', 'CVE-2006-5751', 'CVE-2014-1690', 'CVE-2012-5354', 'CVE-2008-3527', 'CVE-2004-1151', 'CVE-2011-0059', 'CVE-2008-3833', 'CVE-2010-4258', 'CVE-2014-2241', 'CVE-2011-2689', 'CVE-2011-2723', 'CVE-2014-4655', 'CVE-2014-4654', 'CVE-2010-0727', 'CVE-2014-4656', 'CVE-2014-4652', 'CVE-2009-2909', 'CVE-2008-1514', 'CVE-2014-3534', 'CVE-2014-3537', 'CVE-2012-1947', 'CVE-2012-5670', 'CVE-2011-1759', 'CVE-2011-1750', 'CVE-2007-0006', 'CVE-2010-4805', 'CVE-2013-2015', 'CVE-2014-3122', 'CVE-2011-0085', 'CVE-2011-0084', 'CVE-2011-0083', 'CVE-2007-6151', 'CVE-2009-3547', 'CVE-2012-0044', 'CVE-2014-8133', 'CVE-2009-3238', 'CVE-2012-0041', 'CVE-2009-3234', 'CVE-2013-4220', 'CVE-2014-0203', 'CVE-2011-1138', 'CVE-2005-3807', 'CVE-2014-3523', 'CVE-2013-0854', 'CVE-2010-3877', 'CVE-2013-0913', 'CVE-2013-1732', 'CVE-2014-8884', 'CVE-2013-1735', 'CVE-2013-1736', 'CVE-2013-0914', 'CVE-2010-2960', 'CVE-2010-2962', 'CVE-2010-2240', 'CVE-2009-0946', 'CVE-2012-3984', 'CVE-2010-1224', 'CVE-2014-1498', 'CVE-2012-6617', 'CVE-2012-6616', 'CVE-2010-0437', 'CVE-2010-1188', 'CVE-2012-2652', 'CVE-2006-4790', 'CVE-2013-0867', 'CVE-2013-0866', 'CVE-2014-1522', 'CVE-2013-0864', 'CVE-2013-0863', 'CVE-2010-3880', 'CVE-2013-0861', 'CVE-2013-0860', 'CVE-2014-3511', 'CVE-2013-0869', 'CVE-2013-0868', 'CVE-2008-5029', 'CVE-2006-4813', 'CVE-2011-0716', 'CVE-2013-1848', 'CVE-2008-5025', 'CVE-2011-0711', 'CVE-2011-0710', 'CVE-2013-0764', 'CVE-2005-2261', 'CVE-2010-2500', 'CVE-2013-0761', 'CVE-2012-1090', 'CVE-2014-0155', 'CVE-2012-1097', 'CVE-2009-3640', 'CVE-2011-3363', 'CVE-2011-3362', 'CVE-2015-2922', 'CVE-2012-0464', 'CVE-2014-2099', 'CVE-2014-9661', 'CVE-2014-9665', 'CVE-2014-8712', 'CVE-2014-8713', 'CVE-2014-8714', 'CVE-2014-7841', 'CVE-2014-7842', 'CVE-2012-3377', 'CVE-2014-1552', 'CVE-2012-0855', 'CVE-2009-0675', 'CVE-2012-5237', 'CVE-2010-4346', 'CVE-2014-1950', 'CVE-2012-5238', 'CVE-2009-1961', 'CVE-2014-9584', 'CVE-2010-2226', 'CVE-2015-0562', 'CVE-2013-0166', 'CVE-2014-5271', 'CVE-2014-5272', 'CVE-2014-3470', 'CVE-2015-0204', 'CVE-2008-1390', 'CVE-2011-1080', 'CVE-2012-1146', 'CVE-2011-3944', 'CVE-2011-2896', 'CVE-2012-3430', 'CVE-2008-3276', 'CVE-2008-3275', 'CVE-2008-3272', 'CVE-2012-2776', 'CVE-2013-4933', 'CVE-2013-4587', 'CVE-2009-0935', 'CVE-2011-1712', 'CVE-2013-0796', 'CVE-2010-4656', 'CVE-2010-2478', 'CVE-2015-0228', 'CVE-2009-0269', 'CVE-2013-1573', 'CVE-2013-4929', 'CVE-2013-6339', 'CVE-2012-3979', 'CVE-2010-4163', 'CVE-2012-3976', 'CVE-2012-2802', 'CVE-2010-4649', 'CVE-2012-3972', 'CVE-2010-4165', 'CVE-2009-0859', 'CVE-2009-3722', 'CVE-2012-4186', 'CVE-2012-4184', 'CVE-2009-3726', 'CVE-2012-2313', 'CVE-2011-2535', 'CVE-2011-2534', 'CVE-2011-2536', 'CVE-2010-3080', 'CVE-2012-0957', 'CVE-2011-3936', 'CVE-2012-1952', 'CVE-2011-3934', 'CVE-2012-1956', 'CVE-2012-1955', 'CVE-2010-3855', 'CVE-2010-3858', 'CVE-2012-1958', 'CVE-2013-4162', 'CVE-2012-3975', 'CVE-2009-3290', 'CVE-2012-4204', 'CVE-2012-0451', 'CVE-2012-4207', 'CVE-2014-1737', 'CVE-2013-0755', 'CVE-2014-1738', 'CVE-2012-3962', 'CVE-2013-0756', 'CVE-2013-0750', 'CVE-2010-4073', 'CVE-2005-2555', 'CVE-2010-2495', 'CVE-2012-2136', 'CVE-2012-2137', 'CVE-2010-2499', 'CVE-2015-3814', 'CVE-2015-3811', 'CVE-2005-2492', 'CVE-2015-3813', 'CVE-2015-3812', 'CVE-2013-0849', 'CVE-2014-3633', 'CVE-2014-3631', 'CVE-2012-0457', 'CVE-2012-0456', 'CVE-2005-4635', 'CVE-2013-7100', 'CVE-2011-2999', 'CVE-2011-2998', 'CVE-2010-2521', 'CVE-2011-2988', 'CVE-2006-5619', 'CVE-2009-3080', 'CVE-2010-4668', 'CVE-2013-2232', 'CVE-2013-2237', 'CVE-2014-7933', 'CVE-2011-1173', 'CVE-2013-4163', 'CVE-2013-3562', 'CVE-2013-3560', 'CVE-2010-3066', 'CVE-2015-5949', 'CVE-2005-3848', 'CVE-2006-2935', 'CVE-2006-2934', 'CVE-2010-1488', 'CVE-2005-3847', 'CVE-2009-4410', 'CVE-2013-4265', 'CVE-2013-4264', 'CVE-2009-3621', 'CVE-2013-0799', 'CVE-2013-1709', 'CVE-2011-3660', 'CVE-2011-3661', 'CVE-2015-2666', 'CVE-2013-0792', 'CVE-2013-4348', 'CVE-2015-0292', 'CVE-2013-2548', 'CVE-2012-1976', 'CVE-2013-6367', 'CVE-2006-1525', 'CVE-2010-0006', 'CVE-2010-1148', 'CVE-2014-8109', 'CVE-2010-0007', 'CVE-2013-1796', 'CVE-2013-0753', 'CVE-2011-4611', 'CVE-2013-1798', 'CVE-2008-2826', 'CVE-2011-4348', 'CVE-2013-6449', 'CVE-2014-1874', 'CVE-2010-0003', 'CVE-2011-1479', 'CVE-2013-1708', 'CVE-2013-7113', 'CVE-2013-1700', 'CVE-2013-1705', 'CVE-2013-1704', 'CVE-2013-1707', 'CVE-2010-1173', 'CVE-2010-2068', 'CVE-2006-1530', 'CVE-2012-2390', 'CVE-2009-1439', 'CVE-2012-2393', 'CVE-2011-3648', 'CVE-2012-6062', 'CVE-2015-4652', 'CVE-2011-1598', 'CVE-2013-4081', 'CVE-2007-4997', 'CVE-2013-4083', 'CVE-2013-4082', 'CVE-2011-1592', 'CVE-2012-6060', 'CVE-2009-1338', 'CVE-2006-4997', 'CVE-2013-7264', 'CVE-2012-5669', 'CVE-2006-6333', 'CVE-2013-1581', 'CVE-2013-6673', 'CVE-2012-0458', 'CVE-2013-0845', 'CVE-2010-3861', 'CVE-2012-4293', 'CVE-2012-4292', 'CVE-2012-4565', 'CVE-2009-4021', 'CVE-2014-6431', 'CVE-2014-6430', 'CVE-2014-4943', 'CVE-2012-4298', 'CVE-2011-1927', 'CVE-2011-1023', 'CVE-2007-1592', 'CVE-2009-0747', 'CVE-2009-0746', 'CVE-2011-1147', 'CVE-2012-5240', 'CVE-2014-1642', 'CVE-2012-2787', 'CVE-2012-2786', 'CVE-2012-0045', 'CVE-2012-2783', 'CVE-2013-4300', 'CVE-2012-2788', 'CVE-2006-2445', 'CVE-2011-0521', 'CVE-2006-2446', 'CVE-2011-2984', 'CVE-2015-0253', 'CVE-2014-8369', 'CVE-2014-0206', 'CVE-2006-2448', 'CVE-2008-3792', 'CVE-2011-2909', 'CVE-2010-2798', 'CVE-2009-1046', 'CVE-2014-2907', 'CVE-2014-3186', 'CVE-2013-3231', 'CVE-2013-3230', 'CVE-2011-2906', 'CVE-2013-3234', 'CVE-2007-1217', 'CVE-2014-1497', 'CVE-2011-2588', 'CVE-2013-1696', 'CVE-2011-2587', 'CVE-2013-1693', 'CVE-2012-2669', 'CVE-2011-2378', 'CVE-2011-2373', 'CVE-2008-4989', 'CVE-2011-2371', 'CVE-2010-4347', 'CVE-2014-4048', 'CVE-2011-3619', 'CVE-2010-4343', 'CVE-2010-4342', 'CVE-2010-4263', 'CVE-2013-2128', 'CVE-2013-5717', 'CVE-2014-9319', 'CVE-2014-9318', 'CVE-2013-2234', 'CVE-2013-7339', 'CVE-2014-9316', 'CVE-2013-5719', 'CVE-2013-1572', 'CVE-2013-1576', 'CVE-2011-4579', 'CVE-2010-1748', 'CVE-2013-1578', 'CVE-2012-0477', 'CVE-2014-3181', 'CVE-2014-3182', 'CVE-2014-3183', 'CVE-2014-3184', 'CVE-2014-3185', 'CVE-2006-5158', 'CVE-2013-0872', 'CVE-2013-0873', 'CVE-2013-0874', 'CVE-2013-3302', 'CVE-2013-0877', 'CVE-2013-0878', 'CVE-2011-3973', 'CVE-2009-3888', 'CVE-2013-4534', 'CVE-2015-6243', 'CVE-2015-6242', 'CVE-2013-4533', 'CVE-2013-4125', 'CVE-2014-8412', 'CVE-2013-4129', 'CVE-2015-6249', 'CVE-2011-1146', 'CVE-2011-1079', 'CVE-2015-6241', 'CVE-2010-1636', 'CVE-2014-0160', 'CVE-2013-0865', 'CVE-2012-6638', 'CVE-2010-3298', 'CVE-2012-6539', 'CVE-2010-1088', 'CVE-2014-9679', 'CVE-2010-1083', 'CVE-2014-9676', 'CVE-2012-6061', 'CVE-2010-1087', 'CVE-2010-1086', 'CVE-2010-1085', 'CVE-2009-3612', 'CVE-2015-3395', 'CVE-2013-7022', 'CVE-2013-7023', 'CVE-2013-7021', 'CVE-2013-7026', 'CVE-2013-7027', 'CVE-2013-7024', 'CVE-2013-5634', 'CVE-2012-3364', 'CVE-2012-0042', 'CVE-2008-0420', 'CVE-2011-1776', 'CVE-2010-3772', 'CVE-2005-4886', 'CVE-2014-2894', 'CVE-2011-1770', 'CVE-2010-3774', 'CVE-2005-3359', 'CVE-2013-1954', 'CVE-2014-5206', 'CVE-2012-2100', 'CVE-2014-4608', 'CVE-2009-2407', 'CVE-2005-3356', 'CVE-2011-1171', 'CVE-2010-2806', 'CVE-2013-7015', 'CVE-2010-2803', 'CVE-2014-3640', 'CVE-2009-2768', 'CVE-2010-2808', 'CVE-2009-0065', 'CVE-2013-4511', 'CVE-2008-3915', 'CVE-2010-2519', 'CVE-2012-4530', 'CVE-2014-2309', 'CVE-2014-7145', 'CVE-2010-3078', 'CVE-2007-6206', 'CVE-2007-4571', 'CVE-2010-2071', 'CVE-2013-1792', 'CVE-2011-2707', 'CVE-2011-3000', 'CVE-2011-2700', 'CVE-2011-3658', 'CVE-2013-4270', 'CVE-2011-3654', 'CVE-2011-3653', 'CVE-2014-9683', 'CVE-2005-3857', 'CVE-2014-1445', 'CVE-2013-5618', 'CVE-2013-1958', 'CVE-2009-2287', 'CVE-2013-0782', 'CVE-2011-1180', 'CVE-2011-1182', 'CVE-2013-6671', 'CVE-2013-3076', 'CVE-2013-5613', 'CVE-2013-5599', 'CVE-2009-0787', 'CVE-2011-1573', 'CVE-2010-2937', 'CVE-2007-1000', 'CVE-2013-2276', 'CVE-2013-5593', 'CVE-2013-4079', 'CVE-2011-1477', 'CVE-2013-5597', 'CVE-2009-0028', 'CVE-2014-1488', 'CVE-2008-4210', 'CVE-2014-1481', 'CVE-2014-1487', 'CVE-2010-2066', 'CVE-2013-5601', 'CVE-2015-3808', 'CVE-2015-3809', 'CVE-2013-7281', 'CVE-2014-3601', 'CVE-2011-0073', 'CVE-2013-4470', 'CVE-2013-0859', 'CVE-2012-4288', 'CVE-2012-4289', 'CVE-2012-0444', 'CVE-2011-2987', 'CVE-2013-6450', 'CVE-2012-4285', 'CVE-2012-4287', 'CVE-2014-9419', 'CVE-2013-6457', 'CVE-2013-2058', 'CVE-2010-4256', 'CVE-2010-4251', 'CVE-2014-2739', 'CVE-2014-6424', 'CVE-2011-0055', 'CVE-2011-0051', 'CVE-2014-2097', 'CVE-2012-3969', 'CVE-2012-1183', 'CVE-2012-1184', 'CVE-2010-4078', 'CVE-2013-0848', 'CVE-2010-4074', 'CVE-2008-5134', 'CVE-2010-4076', 'CVE-2012-3964', 'CVE-2012-3966', 'CVE-2010-4072', 'CVE-2009-3638', 'CVE-2013-2930', 'CVE-2014-9672', 'CVE-2012-6537', 'CVE-2012-4190', 'CVE-2014-1446', 'CVE-2014-2523', 'CVE-2014-1509', 'CVE-2014-6423', 'CVE-2014-1502', 'CVE-2012-1945', 'CVE-2012-1946', 'CVE-2010-3864', 'CVE-2012-1940', 'CVE-2013-0844', 'CVE-2012-1942', 'CVE-2014-0195', 'CVE-2010-3904', 'CVE-2013-7112', 'CVE-2010-3907', 'CVE-2009-1360', 'CVE-2014-7825', 'CVE-2006-1864', 'CVE-2013-4153', 'CVE-2013-4151', 'CVE-2013-4150', 'CVE-2013-1828', 'CVE-2013-4401', 'CVE-2014-6426', 'CVE-2011-1044', 'CVE-2009-1630', 'CVE-2012-0023', 'CVE-2011-4031', 'CVE-2012-2800', 'CVE-2012-2801', 'CVE-2010-2062', 'CVE-2012-2803', 'CVE-2013-7014', 'CVE-2008-2931', 'CVE-2012-6540', 'CVE-2013-4247', 'CVE-2011-3649', 'CVE-2015-3008', 'CVE-2012-0068', 'CVE-2012-6542', 'CVE-2011-3191', 'CVE-2014-9743', 'CVE-2013-1929', 'CVE-2009-2406', 'CVE-2006-0039', 'CVE-2009-1337', 'CVE-2014-1510', 'CVE-2011-3484', 'CVE-2006-0035', 'CVE-2013-5600', 'CVE-2012-2319', 'CVE-2013-1920', 'CVE-2013-7265', 'CVE-2010-3850', 'CVE-2013-7267', 'CVE-2014-2286', 'CVE-2011-4102', 'CVE-2012-5668', 'CVE-2011-4100', 'CVE-2010-3859', 'CVE-2015-4490', 'CVE-2010-4079', 'CVE-2014-1739', 'CVE-2012-0056', 'CVE-2011-1078', 'CVE-2011-4086', 'CVE-2014-0196', 'CVE-2013-3235', 'CVE-2013-6167', 'CVE-2014-8546', 'CVE-2015-3417', 'CVE-2011-3623', 'CVE-2014-0205', 'CVE-2014-6426', 'CVE-2014-6427', 'CVE-2014-6428', 'CVE-2014-6429', 'CVE-2005-2800', 'CVE-2014-7826', 'CVE-2014-1438', 'CVE-2012-6618', 'CVE-2012-6541', 'CVE-2015-0820', 'CVE-2015-0823', 'CVE-2011-1833', 'CVE-2009-1897', 'CVE-2012-2790', 'CVE-2012-2791', 'CVE-2012-2793', 'CVE-2012-2794', 'CVE-2009-3002', 'CVE-2009-3001', 'CVE-2012-2797', 'CVE-2011-2182', 'CVE-2011-2183', 'CVE-2011-2184', 'CVE-2013-5651', 'CVE-2009-2844', 'CVE-2009-2846', 'CVE-2014-3510', 'CVE-2014-8541', 'CVE-2013-3225', 'CVE-2013-3226', 'CVE-2014-8542', 'CVE-2014-2851', 'CVE-2014-8544', 'CVE-2013-3222', 'CVE-2010-3084', 'CVE-2014-8549', 'CVE-2013-4936', 'CVE-2013-3228', 'CVE-2013-3229', 'CVE-2011-3192', 'CVE-2013-0268', 'CVE-2013-1763', 'CVE-2011-1019', 'CVE-2013-1767', 'CVE-2012-2796', 'CVE-2013-1680', 'CVE-2012-0066', 'CVE-2011-1160', 'CVE-2011-0069', 'CVE-2013-2140', 'CVE-2011-2364', 'CVE-2011-2367', 'CVE-2014-2299', 'CVE-2011-1493', 'CVE-2011-2368', 'CVE-2011-1495', 'CVE-2014-0221', 'CVE-2013-4205', 'CVE-2013-2486', 'CVE-2013-2094', 'CVE-2013-7266', 'CVE-2015-6246', 'CVE-2013-4928', 'CVE-2011-0014', 'CVE-2013-2481', 'CVE-2011-1012', 'CVE-2011-1010', 'CVE-2008-5079', 'CVE-2010-4527', 'CVE-2010-4526', 'CVE-2015-6654', 'CVE-2013-4592', 'CVE-2013-4591', 'CVE-2010-1437', 'CVE-2011-2484', 'CVE-2011-2482', 'CVE-2014-8643', 'CVE-2006-0557', 'CVE-2011-3946', 'CVE-2011-3945', 'CVE-2014-8160', 'CVE-2014-9428', 'CVE-2011-3941', 'CVE-2013-1860', 'CVE-2014-9420', 'CVE-2011-3949', 'CVE-2013-4296', 'CVE-2013-4297', 'CVE-2013-2495', 'CVE-2012-2779', 'CVE-2013-4931', 'CVE-2013-4930', 'CVE-2013-6399', 'CVE-2013-4932', 'CVE-2013-4934', 'CVE-2014-1549', 'CVE-2009-2847', 'CVE-2013-0311', 'CVE-2013-0310', 'CVE-2013-0313', 'CVE-2011-1771', 'CVE-2010-1641', 'CVE-2014-0077', 'CVE-2012-6057', 'CVE-2012-6056', 'CVE-2012-6055', 'CVE-2012-6054', 'CVE-2012-6053', 'CVE-2012-3957', 'CVE-2014-5471', 'CVE-2014-9603', 'CVE-2009-3624', 'CVE-2014-9604', 'CVE-2012-6059', 'CVE-2012-6058', 'CVE-2013-7017', 'CVE-2013-2230', 'CVE-2014-2673', 'CVE-2014-2672', 'CVE-2013-7013', 'CVE-2013-7012', 'CVE-2013-7011', 'CVE-2011-0006', 'CVE-2013-0791', 'CVE-2013-0790', 'CVE-2013-0793', 'CVE-2012-5532', 'CVE-2013-0795', 'CVE-2014-8543', 'CVE-2013-7019', 'CVE-2013-7018', 'CVE-2009-2484', 'CVE-2009-4307', 'CVE-2013-1819', 'CVE-2012-1973', 'CVE-2014-8545', 'CVE-2009-4308', 'CVE-2012-2745', 'CVE-2011-3950', 'CVE-2011-1747', 'CVE-2014-8547', 'CVE-2011-1745', 'CVE-2011-2928', 'CVE-2014-2889', 'CVE-2010-0741', 'CVE-2011-3002', 'CVE-2011-3003', 'CVE-2012-6657', 'CVE-2009-1389', 'CVE-2009-4895', 'CVE-2008-5700', 'CVE-2009-1385', 'CVE-2006-5462', 'CVE-2006-5749', 'CVE-2013-7010', 'CVE-2008-3686', 'CVE-2014-1684', 'CVE-2012-3553', 'CVE-2009-1192', 'CVE-2015-3331', 'CVE-2008-2750', 'CVE-2009-4005', 'CVE-2015-3339', 'CVE-2010-4648', 'CVE-2009-2691', 'CVE-2011-2605', 'CVE-2014-7283', 'CVE-2011-4101', 'CVE-2013-7268', 'CVE-2013-7269', 'CVE-2013-2206', 'CVE-2011-0726', 'CVE-2010-3429', 'CVE-2014-2038', 'CVE-2009-1527', 'CVE-2014-1508', 'CVE-2004-0535', 'CVE-2011-2216', 'CVE-2012-0452', 'CVE-2011-4326', 'CVE-2011-4324', 'CVE-2011-2213', 'CVE-2011-4081', 'CVE-2011-4087', 'CVE-2012-0058', 'CVE-2008-1294', 'CVE-2010-4080', 'CVE-2010-4081', 'CVE-2009-4138', 'CVE-2010-4083', 'CVE-2011-2174', 'CVE-2014-4027', 'CVE-2011-3637', 'CVE-2009-3228', 'CVE-2009-0031', 'CVE-2013-1727', 'CVE-2013-1726', 'CVE-2011-1684', 'CVE-2010-4242', 'CVE-2014-6432', 'CVE-2007-3642', 'CVE-2013-4399', 'CVE-2010-4248', 'CVE-2014-3687', 'CVE-2012-3991', 'CVE-2007-4521', 'CVE-2014-0038', 'CVE-2010-3432', 'CVE-2013-6336', 'CVE-2013-2634', 'CVE-2013-2635', 'CVE-2013-2636', 'CVE-2009-1298', 'CVE-2012-0207', 'CVE-2009-2651', 'CVE-2006-2778', 'CVE-2012-2375', 'CVE-2013-0852', 'CVE-2006-1856', 'CVE-2006-1855', 'CVE-2013-0851', 'CVE-2013-0856', 'CVE-2010-3875', 'CVE-2010-3876', 'CVE-2013-0855', 'CVE-2013-1583', 'CVE-2013-1582', 'CVE-2013-1580', 'CVE-2013-1587', 'CVE-2013-1586', 'CVE-2013-1584', 'CVE-2013-6436', 'CVE-2013-1588', 'CVE-2013-4149', 'CVE-2010-1162', 'CVE-2010-4243', 'CVE-2010-2537', 'CVE-2013-0778', 'CVE-2010-2248', 'CVE-2013-0772', 'CVE-2013-0771', 'CVE-2010-0623', 'CVE-2013-0777', 'CVE-2010-2538', 'CVE-2013-0774', 'CVE-2011-0021', 'CVE-2011-3353', 'CVE-2012-0478', 'CVE-2014-3610', 'CVE-2014-3611', 'CVE-2012-0475', 'CVE-2012-0474', 'CVE-2012-0471', 'CVE-2012-0470', 'CVE-2014-9656', 'CVE-2014-9657', 'CVE-2005-4618', 'CVE-2006-3741', 'CVE-2014-8709', 'CVE-2014-9658', 'CVE-2013-0290', 'CVE-2013-3227', 'CVE-2012-4461', 'CVE-2009-1336', 'CVE-2014-7937', 'CVE-2012-1595', 'CVE-2012-1594', 'CVE-2015-0834', 'CVE-2015-0833', 'CVE-2013-4563', 'CVE-2012-0067', 'CVE-2006-6106', 'CVE-2011-2175', 'CVE-2011-2365', 'CVE-2014-4667', 'CVE-2005-2617', 'CVE-2010-0307', 'CVE-2014-4174', 'CVE-2013-5641', 'CVE-2013-5642', 'CVE-2011-1093', 'CVE-2013-6891', 'CVE-2014-3509', 'CVE-2013-1722', 'CVE-2010-2431', 'CVE-2013-3559', 'CVE-2013-3557', 'CVE-2013-4541', 'CVE-2014-8416', 'CVE-2014-8415', 'CVE-2014-8414', 'CVE-2011-1175', 'CVE-2013-1676', 'CVE-2011-1170', 'CVE-2011-0070', 'CVE-2013-1672', 'CVE-2011-2022', 'CVE-2012-1583', 'CVE-2013-1679', 'CVE-2013-1678', 'CVE-2011-0079', 'CVE-2012-6538', 'CVE-2014-2289', 'CVE-2014-2282', 'CVE-2014-2283', 'CVE-2010-3296', 'CVE-2011-1959', 'CVE-2011-3670', 'CVE-2010-3297', 'CVE-2013-4353', 'CVE-2009-1243', 'CVE-2009-1242', 'CVE-2010-2954', 'CVE-2010-2955', 'CVE-2014-9374', 'CVE-2008-4445', 'CVE-2012-2774', 'CVE-2013-2488', 'CVE-2013-1979', 'CVE-2011-4594', 'CVE-2009-0835', 'CVE-2013-6378', 'CVE-2011-4598', 'CVE-2011-2496', 'CVE-2012-6548', 'CVE-2014-5472', 'CVE-2013-2478', 'CVE-2009-2692', 'CVE-2013-2476', 'CVE-2011-4352', 'CVE-2012-2775', 'CVE-2009-2698', 'CVE-2014-8173', 'CVE-2013-3673', 'CVE-2013-3672', 'CVE-2013-3670', 'CVE-2011-1172', 'CVE-2013-3675', 'CVE-2013-3674', 'CVE-2012-6547', 'CVE-2009-0676', 'CVE-2013-6380', 'CVE-2013-6381', 'CVE-2012-6543', 'CVE-2013-6383', 'CVE-2013-4513', 'CVE-2013-4512', 'CVE-2009-4141', 'CVE-2012-4467', 'CVE-2013-4516', 'CVE-2013-4515', 'CVE-2013-1774', 'CVE-2013-2547', 'CVE-2011-1748', 'CVE-2008-4302', 'CVE-2011-1076', 'CVE-2011-1746', 'CVE-2008-4554', 'CVE-2014-3153', 'CVE-2014-9529', 'CVE-2013-2852', 'CVE-2005-3181', 'CVE-2011-1581', 'CVE-2015-3636', 'CVE-2011-1957', 'CVE-2013-1957', 'CVE-2014-5045', 'CVE-2010-3015', 'CVE-2012-1961', 'CVE-2013-0850', 'CVE-2011-2518', 'CVE-2013-4514', 'CVE-2013-4922', 'CVE-2013-4923', 'CVE-2013-4921', 'CVE-2013-4927', 'CVE-2013-4924', 'CVE-2010-5313', 'CVE-2010-4650', 'CVE-2010-4158', 'CVE-2014-0069', 'CVE-2010-4157', 'CVE-2013-2850', 'CVE-2010-2492']
370 |     #l = ['CVE-2016-5278', 'CVE-2015-5154', 'CVE-2016-9576', 'CVE-2016-2808', 'CVE-2016-1930', 'CVE-2016-2532', 'CVE-2015-4521', 'CVE-2015-4522', 'CVE-2015-7203', 'CVE-2016-5126', 'CVE-2017-6348', 'CVE-2015-8961', 'CVE-2015-8962', 'CVE-2016-5275', 'CVE-2016-4439', 'CVE-2016-7908', 'CVE-2016-7154', 'CVE-2015-4036', 'CVE-2015-3456', 'CVE-2015-2740', 'CVE-2016-3134', 'CVE-2015-5283', 'CVE-2016-9776', 'CVE-2016-7155', 'CVE-2016-9101', 'CVE-2016-7156', 'CVE-2016-2818', 'CVE-2015-8363', 'CVE-2015-7194', 'CVE-2016-6511', 'CVE-2016-5264', 'CVE-2015-5307', 'CVE-2015-4002', 'CVE-2016-9373', 'CVE-2016-1583', 'CVE-2016-7180', 'CVE-2016-1935', 'CVE-2015-2729', 'CVE-2016-5238', 'CVE-2016-1714', 'CVE-2015-0829', 'CVE-2016-2838', 'CVE-2016-2529', 'CVE-2015-4511', 'CVE-2016-4082', 'CVE-2015-4513', 'CVE-2015-4512', 'CVE-2015-2739', 'CVE-2015-8953', 'CVE-2013-4542', 'CVE-2016-9923', 'CVE-2016-2327', 'CVE-2016-2329', 'CVE-2016-2328', 'CVE-2016-1970', 'CVE-2015-8817', 'CVE-2016-1974', 'CVE-2016-5829', 'CVE-2016-2847', 'CVE-2016-7161', 'CVE-2017-6474', 'CVE-2017-6470', 'CVE-2016-2530', 'CVE-2016-6507', 'CVE-2016-6506', 'CVE-2015-4517', 'CVE-2015-0830', 'CVE-2016-6508', 'CVE-2016-4002', 'CVE-2015-8785', 'CVE-2015-4500', 'CVE-2015-4501', 'CVE-2016-4006', 'CVE-2015-4504', 'CVE-2015-4487', 'CVE-2015-2724', 'CVE-2015-2725', 'CVE-2016-8909', 'CVE-2016-2330', 'CVE-2016-4805', 'CVE-2015-7178', 'CVE-2015-7179', 'CVE-2015-7176', 'CVE-2015-7177', 'CVE-2015-7174', 'CVE-2015-7175', 'CVE-2016-9104', 'CVE-2015-3906', 'CVE-2016-5280', 'CVE-2016-6513', 'CVE-2016-2814', 'CVE-2016-2819', 'CVE-2015-8365', 'CVE-2016-4952', 'CVE-2016-8910', 'CVE-2016-7910', 'CVE-2016-7913', 'CVE-2016-7912', 'CVE-2017-6214', 'CVE-2016-10154', 'CVE-2016-6351', 'CVE-2016-9685', 'CVE-2016-1957', 'CVE-2016-1956', 'CVE-2016-6213', 'CVE-2016-1953', 'CVE-2016-1952', 'CVE-2016-7425', 'CVE-2015-6252', 'CVE-2015-1872', 'CVE-2015-8663', 'CVE-2015-8662', 'CVE-2015-8661', 'CVE-2014-5388', 'CVE-2016-4080', 'CVE-2015-4482', 'CVE-2015-1339', 'CVE-2016-5728', 'CVE-2015-4484', 'CVE-2015-1333', 'CVE-2016-4998', 'CVE-2016-2550', 'CVE-2016-9103', 'CVE-2016-3156','CVE-2016-4952', 'CVE-2016-9923', 'CVE-2016-9685', 'CVE-2016-2329', 'CVE-2015-0829', 'CVE-2016-2838', 'CVE-2016-2529', 'CVE-2015-4511', 'CVE-2015-4513', 'CVE-2015-4512', 'CVE-2015-4517', 'CVE-2016-4082', 'CVE-2015-8953', 'CVE-2015-4511', 'CVE-2015-0829', 'CVE-2016-2838', 'CVE-2016-2529', 'CVE-2015-0824', 'CVE-2016-2837', 'CVE-2016-2836', 'CVE-2016-2523', 'CVE-2016-2522', 'CVE-2016-7179', 'CVE-2016-7177', 'CVE-2016-7176', 'CVE-2016-8658', 'CVE-2015-3209', 'CVE-2016-7170', 'CVE-2016-2824', 'CVE-2016-5106', 'CVE-2016-5107', 'CVE-2016-6833', 'CVE-2015-5366', 'CVE-2016-6835', 'CVE-2016-4454', 'CVE-2015-6526', 'CVE-2015-7220', 'CVE-2015-7221', 'CVE-2016-5276', 'CVE-2016-5277', 'CVE-2016-5274', 'CVE-2015-4651', 'CVE-2017-5547', 'CVE-2016-9793', 'CVE-2015-7180', 'CVE-2016-3955', 'CVE-2014-0182', 'CVE-2015-3810', 'CVE-2016-4453', 'CVE-2015-4493', 'CVE-2016-6828', 'CVE-2015-2710', 'CVE-2015-7217', 'CVE-2016-7094', 'CVE-2016-4441', 'CVE-2015-5156', 'CVE-2015-4473', 'CVE-2016-4079', 'CVE-2015-5364', 'CVE-2015-6820', 'CVE-2015-3815', 'CVE-2015-7198', 'CVE-2015-7199', 'CVE-2015-5158', 'CVE-2017-6353', 'CVE-2016-9376', 'CVE-2015-2712', 'CVE-2016-4568', 'CVE-2015-2716', 'CVE-2016-5258', 'CVE-2016-5259', 'CVE-2015-7512', 'CVE-2015-8554', 'CVE-2015-7201', 'CVE-2015-7202', 'CVE-2016-5254', 'CVE-2016-5255', 'CVE-2016-5256', 'CVE-2016-5257', 'CVE-2016-2213', 'CVE-2017-5548', 'CVE-2016-0718', 'CVE-2016-7042', 'CVE-2015-0827', 'CVE-2015-0826', 'CVE-2016-4568', 'CVE-2015-2716', 'CVE-2016-7466', 'CVE-2016-8666', 'CVE-2015-8743', 'CVE-2016-3062', 'CVE-2015-3214', 'CVE-2016-5258', 'CVE-2016-5259', 'CVE-2015-7512', 'CVE-2015-8554', 'CVE-2015-7201', 'CVE-2015-7202', 'CVE-2016-5254', 'CVE-2016-5255', 'CVE-2016-5256', 'CVE-2016-5257', 'CVE-2016-2213', 'CVE-2017-5548', 'CVE-2016-0718', 'CVE-2016-1981', 'CVE-2015-2726', 'CVE-2016-5400', 'CVE-2016-5403', 'CVE-2016-2857', 'CVE-2015-1779', 'CVE-2016-7042', 'CVE-2015-0827', 'CVE-2015-0826', 'CVE-2016-4568', 'CVE-2015-2716', 'CVE-2016-7466', 'CVE-2016-8666', 'CVE-2015-8743', 'CVE-2016-1977', 'CVE-2016-3062', 'CVE-2015-3214', 'CVE-2015-0824', 'CVE-2016-2837', 'CVE-2016-2836', 'CVE-2016-2523', 'CVE-2016-2522', 'CVE-2016-7179', 'CVE-2016-7177', 'CVE-2016-7176', 'CVE-2016-8658', 'CVE-2015-3209', 'CVE-2016-7170']
371 |     #print len(list(set(l)))
372 |     #exit()
373 |     l = ['CVE-2015-4521', 'CVE-2015-4482', 'CVE-2016-2824', 'CVE-2015-4487', 'CVE-2014-2894', 'CVE-2015-4484', 'CVE-2016-4002', 'CVE-2015-2729', 'CVE-2015-4500', 'CVE-2015-4501', 'CVE-2016-5238', 'CVE-2014-5263', 'CVE-2015-2726', 'CVE-2013-4526', 'CVE-2014-0223', 'CVE-2013-4527', 'CVE-2016-2814', 'CVE-2015-7178', 'CVE-2015-7179', 'CVE-2013-4530', 'CVE-2013-4533', 'CVE-2015-8662', 'CVE-2015-7176', 'CVE-2016-1714', 'CVE-2015-7174', 'CVE-2015-7175', 'CVE-2016-9104', 'CVE-2016-5280', 'CVE-2016-9101', 'CVE-2016-9103', 'CVE-2016-2819', 'CVE-2016-2818', 'CVE-2015-0829', 'CVE-2016-4952', 'CVE-2015-4511', 'CVE-2015-4512', 'CVE-2015-4517', 'CVE-2014-9676', 'CVE-2013-6399', 'CVE-2016-8910', 'CVE-2013-0866', 'CVE-2013-4542', 'CVE-2015-3395', 'CVE-2016-6351', 'CVE-2016-9923', 'CVE-2013-0860', 'CVE-2016-1957', 'CVE-2016-1956', 'CVE-2013-7020', 'CVE-2013-7021', 'CVE-2016-1953', 'CVE-2016-1952', 'CVE-2013-0868', 'CVE-2014-8541', 'CVE-2016-1970', 'CVE-2014-9319', 'CVE-2014-8542', 'CVE-2016-7421', 'CVE-2014-8544', 'CVE-2014-8547', 'CVE-2016-7161', 'CVE-2014-7937', 'CVE-2015-1872', 'CVE-2014-9317', 'CVE-2014-9316', 'CVE-2014-7933', 'CVE-2016-5258', 'CVE-2016-5259', 'CVE-2015-7512', 'CVE-2014-3640', 'CVE-2016-5106', 'CVE-2016-5107', 'CVE-2016-4453', 'CVE-2015-7203', 'CVE-2016-6835', 'CVE-2016-4454', 'CVE-2016-5256', 'CVE-2016-5257', 'CVE-2015-7220', 'CVE-2015-7221', 'CVE-2015-4504', 'CVE-2016-7170', 'CVE-2016-5278', 'CVE-2015-7180', 'CVE-2016-1981', 'CVE-2016-8909', 'CVE-2016-2836', 'CVE-2016-2857', 'CVE-2013-0858', 'CVE-2014-0182', 'CVE-2013-0856', 'CVE-2013-0857', 'CVE-2016-5403', 'CVE-2014-2099', 'CVE-2014-2098', 'CVE-2015-1779', 'CVE-2016-6833', 'CVE-2014-2097', 'CVE-2015-4493', 'CVE-2015-0825', 'CVE-2015-0824', 'CVE-2016-2837', 'CVE-2015-0826', 'CVE-2016-5254', 'CVE-2016-4441', 'CVE-2015-7194', 'CVE-2015-6820', 'CVE-2013-4149', 'CVE-2015-7198', 'CVE-2015-7199', 'CVE-2015-2710', 'CVE-2015-2712', 'CVE-2013-7022', 'CVE-2013-7023', 'CVE-2013-0845', 'CVE-2016-7466', 'CVE-2015-7202', 'CVE-2013-4151', 'CVE-2013-4150', 'CVE-2016-8668']
374 |     for key in dict_unsliced_pointers.keys():#key is testID
375 |         print key
376 |         if key in l:
377 |             continue
378 | 
379 |         for _t in dict_unsliced_pointers[key]:
380 |             list_pointers_funcid = _t[0]
381 |             pdg_funcid = _t[1]
382 |             print key, pdg_funcid
383 |             pointers_name = str(_t[2])
384 | 
385 | 
386 |             slice_dir = 2
387 |             pdg = getFuncPDGById(key, pdg_funcid)
388 |             if pdg == False:
389 |                 print 'error'
390 |                 exit()
391 | 
392 |             list_code, startline, startline_path = program_slice(pdg, list_pointers_funcid, slice_dir, key)
393 | 
394 |             if list_code == []:
395 |                 fout = open("error.txt", 'a')
396 |                 fout.write(pointers_name + ' ' + str(list_pointers_funcid) + ' found nothing! \n')
397 |                 fout.close()
398 |             else:
399 |                 for _list in list_code:
400 |                     get_slice_file_sequence(store_filepath, _list, count, pointers_name, startline, startline_path)
401 |                     count += 1
402 | 
403 | 
404 | def arrays_slice():
405 |     count = 1
406 |     store_filepath = "arraysuse_slices.txt"
407 |     f = open("arrayuse_slice_points.pkl", 'rb')
408 |     dict_unsliced_pointers = pickle.load(f)
409 |     f.close()
410 |     #l = ['CVE-2010-2068', 'CVE-2015-1158', 'CVE-2006-1530', 'CVE-2012-2802', 'CVE-2010-4165', 'CVE-2014-3523', 'CVE-2012-6062', 'CVE-2013-1672', 'CVE-2007-4997', 'CVE-2013-4082', 'CVE-2012-4186', 'CVE-2013-4512', 'CVE-2013-6450', 'CVE-2011-2534', 'CVE-2014-1690', 'CVE-2011-2536', 'CVE-2012-2319', 'CVE-2012-0957', 'CVE-2011-3936', 'CVE-2004-1151', 'CVE-2013-4929', 'CVE-2010-3296', 'CVE-2011-4102', 'CVE-2012-5668', 'CVE-2011-4100', 'CVE-2011-1959', 'CVE-2012-3969', 'CVE-2012-1183', 'CVE-2011-0726', 'CVE-2013-0756', 'CVE-2004-0535', 'CVE-2010-2495', 'CVE-2012-2393', 'CVE-2015-3811', 'CVE-2012-2776', 'CVE-2009-2909', 'CVE-2014-3633', 'CVE-2014-1508', 'CVE-2011-2529', 'CVE-2014-3537', 'CVE-2012-1947', 'CVE-2013-0844', 'CVE-2012-1942', 'CVE-2014-0195', 'CVE-2012-4293', 'CVE-2012-4292', 'CVE-2008-1390', 'CVE-2011-0021', 'CVE-2012-3991', 'CVE-2007-4521', 'CVE-2009-0746', 'CVE-2011-1147', 'CVE-2012-5240', 'CVE-2013-2634', 'CVE-2014-8133', 'CVE-2006-2778', 'CVE-2012-4288', 'CVE-2015-0253', 'CVE-2012-0444', 'CVE-2013-1726', 'CVE-2013-7112', 'CVE-2006-1856', 'CVE-2013-0850', 'CVE-2011-3623', 'CVE-2013-1582', 'CVE-2013-1732', 'CVE-2014-8884', 'CVE-2013-0772', 'CVE-2014-9374', 'CVE-2014-1497', 'CVE-2014-0221', 'CVE-2013-1696', 'CVE-2011-1833', 'CVE-2013-1693', 'CVE-2013-0872', 'CVE-2012-2790', 'CVE-2012-2791', 'CVE-2012-2796', 'CVE-2012-0477', 'CVE-2012-2652', 'CVE-2006-4790', 'CVE-2013-0867', 'CVE-2013-4932', 'CVE-2013-0860', 'CVE-2014-3511', 'CVE-2014-3510', 'CVE-2013-0868', 'CVE-2014-8541', 'CVE-2014-2739', 'CVE-2014-9319', 'CVE-2006-4813', 'CVE-2014-8544', 'CVE-2011-3973', 'CVE-2013-1848', 'CVE-2014-9316', 'CVE-2012-1594', 'CVE-2013-1573', 'CVE-2012-0068', 'CVE-2015-0833', 'CVE-2010-1748', 'CVE-2012-0067', 'CVE-2011-3362', 'CVE-2014-3182', 'CVE-2013-5641', 'CVE-2013-5642', 'CVE-2011-3484', 'CVE-2013-6891', 'CVE-2014-8712', 'CVE-2014-8713', 'CVE-2014-8714', 'CVE-2013-4534', 'CVE-2010-2431', 'CVE-2014-8412', 'CVE-2011-1175', 'CVE-2012-5237', 'CVE-2011-1173', 'CVE-2012-5238', 'CVE-2014-4611', 'CVE-2015-0564', 'CVE-2014-5271', 'CVE-2011-0055', 'CVE-2014-3470', 'CVE-2014-8643', 'CVE-2015-0204', 'CVE-2014-2286', 'CVE-2012-6537', 'CVE-2011-3945', 'CVE-2011-3944', 'CVE-2011-2896', 'CVE-2010-2955', 'CVE-2013-2495', 'CVE-2013-4931', 'CVE-2013-4933', 'CVE-2012-2775', 'CVE-2013-4934', 'CVE-2013-4936', 'CVE-2011-4594', 'CVE-2014-6424', 'CVE-2013-0311', 'CVE-2011-4598', 'CVE-2006-2935', 'CVE-2011-4352', 'CVE-2012-1184', 'CVE-2005-3356', 'CVE-2012-6059', 'CVE-2012-6058', 'CVE-2011-3950', 'CVE-2014-9672', 'CVE-2010-2803', 'CVE-2013-7011', 'CVE-2013-3674', 'CVE-2009-0676', 'CVE-2013-6380', 'CVE-2009-2768', 'CVE-2015-3008', 'CVE-2013-0796', 'CVE-2009-2484', 'CVE-2013-4264', 'CVE-2013-4928', 'CVE-2014-8542', 'CVE-2012-6540', 'CVE-2015-0228', 'CVE-2013-7008', 'CVE-2013-7009']
411 |     l = []
412 |     for key in dict_unsliced_pointers.keys():#key is testID
413 |        
414 |         if key in l:
415 |             continue
416 | 
417 |         for _t in dict_unsliced_pointers[key]:
418 |             list_pointers_funcid = _t[0]
419 |             pdg_funcid = _t[1]
420 |             print pdg_funcid
421 |             arrays_name = str(_t[2])
422 | 
423 | 
424 |             slice_dir = 2
425 |             pdg = getFuncPDGById(key, pdg_funcid)
426 |             if pdg == False:
427 |                 print 'error'
428 |                 exit()
429 | 
430 |             list_code, startline, startline_path = program_slice(pdg, list_pointers_funcid, slice_dir, key)
431 | 
432 |             if list_code == []:
433 |                 fout = open("error.txt", 'a')
434 |                 fout.write(arrays_name + ' ' + str(list_pointers_funcid) + ' found nothing! \n')
435 |                 fout.close()
436 |             else:
437 |                 for _list in list_code:
438 |                     get_slice_file_sequence(store_filepath, _list, count, arrays_name, startline, startline_path)
439 |                     count += 1
440 | 
441 | 
442 | def integeroverflow_slice():
443 |     count = 1
444 |     store_filepath = "integeroverflow_slices.txt"
445 |     f = open("integeroverflow_slice_points_new.pkl", 'rb')
446 |     dict_unsliced_expr = pickle.load(f)
447 |     f.close()
448 | 
449 |     l = ['CVE-2016-5259', 'CVE-2015-7512', 'CVE-2014-3640', 'CVE-2016-5106', 'CVE-2016-5107', 'CVE-2016-4453', 'CVE-2015-4475', 'CVE-2016-6835', 'CVE-2016-4454', 'CVE-2016-5257', 'CVE-2015-7220', 'CVE-2015-7221', 'CVE-2016-5278', 'CVE-2016-1981', 'CVE-2015-2726', 'CVE-2016-2857', 'CVE-2013-0858', 'CVE-2014-0182', 'CVE-2013-0856', 'CVE-2013-0857', 'CVE-2016-5403', 'CVE-2014-2099', 'CVE-2014-2098', 'CVE-2015-1779', 'CVE-2016-6833', 'CVE-2014-2097', 'CVE-2015-7203', 'CVE-2015-7194', 'CVE-2015-6820', 'CVE-2015-7199', 'CVE-2015-2710', 'CVE-2016-4952', 'CVE-2015-2712', 'CVE-2013-7022', 'CVE-2013-7023', 'CVE-2013-0845', 'CVE-2016-7466', 'CVE-2015-7202', 'CVE-2013-4151', 'CVE-2013-4150', 'CVE-2016-8668']
450 |     for key in dict_unsliced_expr.keys():#key is testID
451 |         if key in l:
452 |             continue
453 |         for _t in dict_unsliced_expr[key]:
454 |             list_expr_funcid = _t[0]
455 |             pdg_funcid = _t[1]
456 |             print pdg_funcid
457 |             expr_name = str(_t[2])
458 | 
459 | 
460 |             slice_dir = 2
461 |             pdg = getFuncPDGById(key, pdg_funcid)
462 |             if pdg == False:
463 |                 print 'error'
464 |                 exit()
465 | 
466 |             list_code, startline, startline_path = program_slice(pdg, list_expr_funcid, slice_dir, key)
467 | 
468 |             if list_code == []:
469 |                 fout = open("error.txt", 'a')
470 |                 fout.write(expr_name + ' ' + str(list_expr_funcid) + ' found nothing! \n')
471 |                 fout.close()
472 |             else:
473 |                 for _list in list_code:
474 |                     get_slice_file_sequence(store_filepath, _list, count, expr_name, startline, startline_path)
475 |                     count += 1
476 |      
477 | 
478 | if __name__ == "__main__":
479 |    # api_slice()
480 |    # pointers_slice()
481 |    # arrays_slice()
482 |     integeroverflow_slice()
483 |     print("All Done.")
484 | 


--------------------------------------------------------------------------------
/general_op.py:
--------------------------------------------------------------------------------
  1 | ## coding:utf-8
  2 | import os
  3 | import pickle
  4 | import re
  5 | 
  6 | list_destparam_0_cpyapi = ['sprintf', 'gets', 'fgets', '_memccpy', '_mbscpy', '_strncpy', 'wmemset', 'vasprintf', 'asprintf', 'wcsncpy', 'lstrcpy', '_wcsncpy', '_snprintf', 'memcpy', 'memmove', '_tcscpy', '_snwprintf', 'strcpy', 'CopyMemory', 'wcsncat', 'vsprintf', 'lstrcpyn', 'vsnprintf', '_mbsncat', 'wmemmove', 'memset', 'wmemcpy', 'strcat', 'fprintf', '_strncat', '_tcsncpy', '_mbsnbcpy', 'strncpy', 'strncat', 'wcscpy', 'snprintf', 'lstrcat']
  7 | list_scanf_api = ['vfscanf', 'fscanf', 'vscanf', 'scanf', 'vsscanf', 'sscanf', 'swscanf']
  8 | list_key_words = []
  9 | 
 10 | 
 11 | def del_ctrl_edge(pdg):
 12 |     i = 0
 13 |     while i < pdg.ecount():
 14 |         if pdg.es[i]['var'] == None:
 15 |             pdg.delete_edges(i)
 16 |         else:
 17 |             i += 1
 18 | 
 19 |     return pdg
 20 | 
 21 | 
 22 | def get_ifname(node_id, dict_if2cfgnode, dict_cfgnode2if):
 23 |     if_name = ''
 24 |     min_count = 10000000
 25 |     for if_n in dict_cfgnode2if[node_id]:
 26 |         if len(dict_if2cfgnode[if_n]) < min_count:
 27 |             min_count = len(dict_if2cfgnode[if_n])
 28 |             if_name = if_n
 29 |         else:
 30 |             continue
 31 | 
 32 |     return if_name
 33 | 
 34 | 
 35 | def isFuncCall(node):
 36 |     result = getCalleeName(node)
 37 |     if result != []:
 38 |         return result
 39 |     else:
 40 |         return False
 41 | 
 42 | 
 43 | def getCalleeName(slicenode):
 44 |     #get functions' name
 45 |     code = slicenode['code']
 46 |     if slicenode['type'] == "Function":
 47 |         return []
 48 | 
 49 |     pattern = "((?:_|[A-Za-z])\w*(?:\s(?:\.|::|\->|)\s(?:_|[A-Za-z])\w*)*)\s\("
 50 |     result = re.findall(pattern, code)
 51 | 
 52 |     i = 0
 53 |     while i < len(result):
 54 |         if result[i] in list_key_words:
 55 |             del result[i]
 56 |         else:
 57 |             i += 1
 58 | 
 59 |     return result #return is funcnamelist
 60 | 
 61 | 
 62 | def getFuncPDGBynodeIDAndtestID(list_cfgNodeID_funcID, testID):
 63 |     _dict = {}
 64 |     for _tuple in list_cfgNodeID_funcID:
 65 |         cfgNodeID = _tuple[0]
 66 |         func_id = _tuple[1]
 67 |         path = os.path.join('pdg_db', testID)
 68 |         for _file in os.listdir(path):
 69 |             if _file.split('_')[-1] == func_id:
 70 |                 fpath = os.path.join(path, _file)
 71 |                 fin = open(fpath, 'rb')
 72 |                 pdg = pickle.load(fin)
 73 |                 _dict[cfgNodeID] = pdg
 74 |                 fin.close()
 75 |                 break
 76 | 
 77 |     return _dict
 78 | 
 79 | 
 80 | def getFuncPDGBynodeIDAndtestID_noctrl(list_cfgNodeID_funcID, testID):
 81 |     _dict = {}
 82 |     for _tuple in list_cfgNodeID_funcID:
 83 |         cfgNodeID = _tuple[0]
 84 |         func_id = _tuple[1]
 85 |         for _dir in os.listdir("pdg/"):
 86 |             list_testid = os.listdir(os.path.join("pdg/", _dir))
 87 | 
 88 |             if testID not in list_testid:
 89 |                 continue
 90 | 
 91 |             else:    
 92 |                 path = os.path.join("pdg/", _dir, testID)
 93 |                 for _file in os.listdir(path):
 94 |                     if _file.split('_')[-1] == func_id:
 95 |                         fpath = os.path.join(path, _file)
 96 |                         fin = open(fpath, 'rb')
 97 |                         pdg = pickle.load(fin)
 98 |                         _dict[cfgNodeID] = pdg
 99 |                         fin.close()
100 |                         break
101 | 
102 |     return _dict
103 | 
104 | 
105 | def getFuncPDGByfuncIDAndtestID(func_ID, testID):
106 |     path = os.path.join('pdg_db', testID)
107 |     pdg = False
108 |     for _file in os.listdir(path):
109 |         if _file.split('_')[-1] == str(func_ID):
110 |             fpath = os.path.join(path, _file)
111 |             fin = open(fpath, 'rb')
112 |             pdg = pickle.load(fin)
113 |             fin.close()
114 |             break
115 | 
116 |     return pdg
117 | 
118 | 
119 | def getFuncPDGByfuncIDAndtestID_noctrl(func_ID, testID, _type):
120 |     pdg = False
121 |     for _dir in os.listdir("pdg/"):
122 |         list_testid = os.listdir(os.path.join("pdg/", _dir))
123 | 
124 |         if testID not in list_testid:
125 |             continue
126 | 
127 |         else:
128 |             path = os.path.join("pdg/", _dir, testID)
129 |             for _file in os.listdir(path):
130 |                 if _file.split('_')[-1] == str(func_ID):
131 |                     fpath = os.path.join(path, _file)
132 |                     fin = open(fpath, 'rb')
133 |                     pdg = pickle.load(fin)
134 |                     fin.close()
135 |                     break
136 | 
137 |     return pdg
138 | 
139 | 
140 | def getReturnVarOfAPI(code):
141 |     for api in list_destparam_0_cpyapi:
142 |         if code.find(api + ' ') != -1:
143 |             _list = code.split(api + ' ')
144 |             if _list[0] == '' and _list[1][0] == '(':
145 |                 var = _list[1].split(',')[0].replace('(', '').strip()
146 |                 if var.find(' & ') > -1:
147 |                     var = var.split(' & ')[1]
148 | 
149 |                 if var.find(' + ') != -1:
150 |                     var = var.split(' + ')[0]
151 |                     if var.find(' . ') != -1:
152 |                         _list = [var]
153 |                         var_1 = []
154 |                         while var.find(' . ') != -1:
155 |                             var_1.append(var.split(' . ')[0])
156 |                             _list.append(' . '.join(var_1))
157 |                             var = ' . '.join(var.split(' . ')[1:])
158 | 
159 |                         return _list
160 | 
161 |                     elif var.find(' -> ') != -1:
162 |                         _list = [var]
163 |                         var_1 = []
164 |                         while var.find(' -> ') != -1:
165 |                             var_1.append(var.split(' -> ')[0])
166 |                             _list.append(' -> '.join(var_1))
167 |                             var = ' -> '.join(var.split(' -> ')[1:])
168 | 
169 |                         return _list
170 | 
171 |                     else:
172 |                         return [var]
173 | 
174 |                 elif var.find(' - ') != -1:
175 |                     var = var.split(' - ')[0]
176 |                     if var.find(' . ') != -1:
177 |                         _list = [var]
178 |                         var_1 = []
179 |                         while var.find(' . ') != -1:
180 |                             var_1.append(var.split(' . ')[0])
181 |                             _list.append(' . '.join(var_1))
182 |                             var = ' . '.join(var.split(' . ')[1:])
183 | 
184 |                         return _list
185 | 
186 |                     elif var.find(' -> ') != -1:
187 |                         _list = [var]
188 |                         var_1 = []
189 |                         while var.find(' -> ') != -1:
190 |                             var_1.append(var.split(' -> ')[0])
191 |                             _list.append(' -> '.join(var_1))
192 |                             var = ' -> '.join(var.split(' -> ')[1:])
193 | 
194 |                         return _list
195 | 
196 |                     else:
197 |                         return [var]
198 | 
199 |                 elif var.find(' * ') != -1:
200 |                     temp = var.split(' * ')[1]
201 |                     if temp[0] == ')':
202 |                         var = temp[1:].strip()
203 |                     else:
204 |                         var = var.split(' * ')[0]
205 | 
206 |                     if var.find(' . ') != -1:
207 |                         _list = [var]
208 |                         var_1 = []
209 |                         while var.find(' . ') != -1:
210 |                             var_1.append(var.split(' . ')[0])
211 |                             _list.append(' . '.join(var_1))
212 |                             var = ' . '.join(var.split(' . ')[1:])
213 | 
214 |                         return _list
215 | 
216 |                     elif var.find(' -> ') != -1:
217 |                         _list = [var]
218 |                         var_1 = []
219 |                         while var.find(' -> ') != -1:
220 |                             var_1.append(var.split(' -> ')[0])
221 |                             _list.append(' -> '.join(var_1))
222 |                             var = ' -> '.join(var.split(' -> ')[1:])
223 | 
224 |                         return _list
225 | 
226 |                     else:
227 |                         return [var]
228 | 
229 |                 elif var.find(' . ') != -1:
230 |                     _list = [var]
231 |                     var_1 = []
232 |                     while var.find(' . ') != -1:
233 |                         var_1.append(var.split(' . ')[0])
234 |                         _list.append(' . '.join(var_1))
235 |                         var = ' . '.join(var.split(' . ')[1:])
236 | 
237 |                     return _list
238 | 
239 |                 elif var.find(' -> ') != -1:
240 |                     _list = [var]
241 |                     var_1 = []
242 |                     while var.find(' -> ') != -1:
243 |                         var_1.append(var.split(' -> ')[0])
244 |                         _list.append(' -> '.join(var_1))
245 |                         var = ' -> '.join(var.split(' -> ')[1:])
246 | 
247 |                     return _list
248 | 
249 |                 else:
250 |                     return [var]
251 | 
252 |         else:
253 |             continue
254 | 
255 |     for scanfapi in list_scanf_api:
256 |         if scanfapi in ['fscanf', 'sscanf', 'swscanf', 'vfscanf', 'vsscanf']:
257 |             if code.find(scanfapi + ' ') != -1:
258 |                 _list = code.split(scanfapi+' ')
259 |                 if _list[0] == '' and _list[1][0] == '(':
260 |                     list_var = _list[1].split(',')[2:]
261 |                     list_var = [var.replace('(', '').strip() for var in list_var]
262 |                     new_list_var = []
263 |                     for var in list_var:
264 |                         if var.find(' & ') > -1:
265 |                             var = var.split(' & ')[1]
266 | 
267 |                         if var.find(' + ') > -1:
268 |                             var = var.split(' + ')[0]
269 |                             if var.find(' . ') != -1:
270 |                                 _list = [var]
271 |                                 var_1 = []
272 |                                 while var.find(' . ') != -1:
273 |                                     var_1.append(var.split(' . ')[0])
274 |                                     _list.append(' . '.join(var_1))
275 |                                     var = ' . '.join(var.split(' . ')[1:])
276 | 
277 |                                 new_list_var += _list
278 | 
279 |                             elif var.find(' -> ') != -1:
280 |                                 _list = [var]
281 |                                 var_1 = []
282 |                                 while var.find(' -> ') != -1:
283 |                                     var_1.append(var.split(' -> ')[0])
284 |                                     _list.append(' -> '.join(var_1))
285 |                                     var = ' -> '.join(var.split(' -> ')[1:])
286 | 
287 |                                 new_list_var += _list
288 | 
289 |                             else:
290 |                                 new_list_var.append(var)
291 | 
292 |                         elif var.find(' - ') != -1:
293 |                             var = var.split(' - ')[0]
294 |                             if var.find(' . ') != -1:
295 |                                 _list = [var]
296 |                                 var_1 = []
297 |                                 while var.find(' . ') != -1:
298 |                                     var_1.append(var.split(' . ')[0])
299 |                                     _list.append(' . '.join(var_1))
300 |                                     var = ' . '.join(var.split(' . ')[1:])
301 | 
302 |                                 new_list_var += _list
303 | 
304 |                             elif var.find(' -> ') != -1:
305 |                                 _list = [var]
306 |                                 var_1 = []
307 |                                 while var.find(' -> ') != -1:
308 |                                     var_1.append(var.split(' -> ')[0])
309 |                                     _list.append(' -> '.join(var_1))
310 |                                     var = ' -> '.join(var.split(' -> ')[1:])
311 | 
312 |                                 new_list_var += _list
313 | 
314 |                             else:
315 |                                 new_list_var.append(var)
316 | 
317 |                         elif var.find(' * ') != -1:
318 |                             temp = var.split(' * ')[1]
319 |                             if temp[0] == ')':
320 |                                 var = temp[1:].strip()
321 |                             else:
322 |                                 var = var.split(' * ')[0]
323 | 
324 |                             if var.find(' . ') != -1:
325 |                                 _list = [var]
326 |                                 var_1 = []
327 |                                 while var.find(' . ') != -1:
328 |                                     var_1.append(var.split(' . ')[0])
329 |                                     _list.append(' . '.join(var_1))
330 |                                     var = ' . '.join(var.split(' . ')[1:])
331 | 
332 |                                 new_list_var += _list
333 | 
334 |                             elif var.find(' -> ') != -1:
335 |                                 _list = [var]
336 |                                 var_1 = []
337 |                                 while var.find(' -> ') != -1:
338 |                                     var_1.append(var.split(' -> ')[0])
339 |                                     _list.append(' -> '.join(var_1))
340 |                                     var = ' -> '.join(var.split(' -> ')[1:])
341 | 
342 |                                 new_list_var += _list
343 | 
344 |                             else:
345 |                                 new_list_var.append(var)
346 | 
347 |                         elif var.find(' . ') != -1:
348 |                             _list = [var]
349 |                             var_1 = []
350 |                             while var.find(' . ') != -1:
351 |                                 var_1.append(var.split(' . ')[0])
352 |                                 _list.append(' . '.join(var_1))
353 |                                 var = ' . '.join(var.split(' . ')[1:])
354 | 
355 |                             new_list_var += _list
356 | 
357 |                         elif var.find(' -> ') != -1:
358 |                             _list = [var]
359 |                             var_1 = []
360 |                             while var.find(' -> ') != -1:
361 |                                 var_1.append(var.split(' -> ')[0])
362 |                                 _list.append(' -> '.join(var_1))
363 |                                 var = ' -> '.join(var.split(' -> ')[1:])
364 | 
365 |                             new_list_var += _list
366 | 
367 |                         else:
368 |                             new_list_var.append(var)
369 | 
370 |                     return new_list_var
371 | 
372 | 
373 |         elif scanfapi in ['scanf', 'vscanf']:
374 |             if code.find(scanfapi) != -1:
375 |                 _list = code.split(scanfapi + ' ')
376 |                 if _list[0] == '' and _list[1][0] == '(':
377 |                     list_var = _list[1].split(',')[1:]
378 |                     list_var = [var.replace('(', '').strip() for var in list_var]
379 |                     new_list_var = []
380 |                     for var in list_var:
381 |                         if var.find(' & ') > -1:
382 |                             var = var.split(' & ')[1]
383 | 
384 |                         if var.find(' + ') != -1:
385 |                             var = var.split(' + ')[0]
386 |                             if var.find(' . ') != -1:
387 |                                 _list = [var]
388 |                                 var_1 = []
389 |                                 while var.find(' . ') != -1:
390 |                                     var_1.append(var.split(' . ')[0])
391 |                                     _list.append(' . '.join(var_1))
392 |                                     var = ' . '.join(var.split(' . ')[1:])
393 | 
394 |                                 new_list_var += _list
395 | 
396 |                             else:
397 |                                 new_list_var.append(var)
398 | 
399 |                         elif var.find(' - ') != -1:
400 |                             var = var.split(' - ')[0]
401 |                             if var.find(' . ') != -1:
402 |                                 _list = [var]
403 |                                 var_1 = []
404 |                                 while var.find(' . ') != -1:
405 |                                     var_1.append(var.split(' . ')[0])
406 |                                     _list.append(' . '.join(var_1))
407 |                                     var = ' . '.join(var.split(' . ')[1:])
408 | 
409 |                                 new_list_var += _list
410 | 
411 |                             else:
412 |                                 new_list_var.append(var)
413 | 
414 |                         elif var.find(' * ') != -1:
415 |                             temp = var.split(' * ')[1]
416 |                             if temp[0] == ')':
417 |                                 var = temp[1:].strip()
418 |                             else:
419 |                                 var = var.split(' * ')[0]
420 | 
421 |                             if var.find(' . ') != -1:
422 |                                 _list = [var]
423 |                                 var_1 = []
424 |                                 while var.find(' . ') != -1:
425 |                                     var_1.append(var.split(' . ')[0])
426 |                                     _list.append(' . '.join(var_1))
427 |                                     var = ' . '.join(var.split(' . ')[1:])
428 | 
429 |                                 new_list_var += _list
430 | 
431 |                             else:
432 |                                 new_list_var.append(var)
433 | 
434 |                         elif var.find(' . ') != -1:
435 |                             _list = [var]
436 |                             var_1 = []
437 |                             while var.find(' . ') != -1:
438 |                                 var_1.append(var.split(' . ')[0])
439 |                                 _list.append(' . '.join(var_1))
440 |                                 var = ' . '.join(var.split(' . ')[1:])
441 | 
442 |                             new_list_var += _list
443 | 
444 |                         elif var.find(' -> ') != -1:
445 |                             _list = [var]
446 |                             var_1 = []
447 |                             while var.find(' -> ') != -1:
448 |                                 var_1.append(var.split(' -> ')[0])
449 |                                 _list.append(' -> '.join(var_1))
450 |                                 var = ' -> '.join(var.split(' -> ')[1:])
451 | 
452 |                             new_list_var += _list
453 | 
454 |                         else:
455 |                             new_list_var.append(var)
456 | 
457 |                     return new_list_var
458 | 
459 |     return False
460 | 
461 | 
462 | def isEdgeExists(pdg, startnode, endnode, var):
463 |     for edge in pdg.es:
464 |         if pdg.vs[edge.tuple]['name'][0] == startnode and pdg.vs[edge.tuple]['name'][1] == endnode and edge['var'] == var:
465 |             return True
466 |         else:
467 |             continue
468 | 
469 |     return False 
470 | 
471 | 
472 | def addDataEdge(pdg, startnode, endnode, var):
473 |     if isEdgeExists(pdg, startnode, endnode, var):
474 |         return pdg
475 |             
476 |     edge_prop = {'var': var}
477 |     pdg.add_edge(startnode, endnode, **edge_prop)
478 |     return pdg
479 | 
480 | 
481 | def getVarOfNode(code):
482 |     list_var = []
483 |     if code.find(' = ') != -1:
484 |         _list = code.split(' = ')[0].split(' ')
485 |         if ']' in _list:        
486 |             index_right = _list.index(']')
487 |             index_left = _list.index('[')
488 |             
489 |             i = 0
490 |             while i < len(_list):
491 |                 if i < index_left or i > index_right:
492 |                     list_var.append(_list[i])
493 |                 i += 1
494 | 
495 |     elif code[-1] == ';':
496 |         code = code[:-1].strip()
497 |         if '(' in code:
498 |             list_var = False
499 |         else:
500 |             list_value = code.split(',')#-1 is ;
501 |             for _list in list_value:
502 |                 _list = code.split(' ')
503 |                 if '[' in _list:
504 |                     index = _list.index('[')
505 |                     var = _list[index-1]
506 |                     list_var.append(var)
507 | 
508 |                 else:
509 |                     var = _list[-1]
510 |                     list_var.append(var)
511 | 
512 |     else:
513 |         if '(' in code:
514 |             list_var = False
515 |         else:
516 |             list_value = code.split(',')#-1 is ;
517 |             for _list in list_value:
518 |                 _list = code.split(' ')
519 |                 if '[' in _list:
520 |                     index = _list.index('[')
521 |                     var = _list[index-1]
522 |                     list_var.append(var)
523 | 
524 |                 else:
525 |                     var = _list[-1]
526 |                     list_var.append(var)
527 | 
528 |     return list_var
529 | 
530 | 
531 | def sortedNodesByLoc(list_node):
532 |     _list = []
533 |     for node in list_node:
534 |         if node['location'] == None:
535 |             row = 'inf'
536 |             col = 'inf'
537 |         else:
538 |             row, col = [int(node['location'].split(':')[0]), int(node['location'].split(':')[1])]
539 |         _list.append((row, col, node))
540 | 
541 |     _list.sort(key=lambda x: (x[0], x[1]))
542 | 
543 | 
544 |     list_ordered_nodes = [_tuple[2] for _tuple in _list]
545 | 
546 |     return list_ordered_nodes
547 | 
548 | 
549 | def getFuncPDGById(testID, pdg_funcid):
550 |     file_dir = os.path.join("pdg_db", testID)
551 | 
552 |     for _file in os.listdir(file_dir):
553 |         func_id = _file.split('_')[-1]
554 | 
555 |         if func_id == pdg_funcid:
556 |             pdg_path = os.path.join(file_dir, _file)
557 |             f = open(pdg_path, 'rb')
558 |             pdg = pickle.load(f)
559 |             f.close()
560 | 
561 |             return pdg
562 | 
563 | 
564 | def getFuncPDGById_noctrl(testID, pdg_funcid):
565 |     for _dir in os.listdir("pdg/"):
566 |         list_testid = os.listdir(os.path.join("pdg/", _dir))
567 | 
568 |         if testID not in list_testid:
569 |             continue
570 |         else:
571 |             file_dir = os.path.join("pdg/", _dir, testID)
572 | 
573 |             for _file in os.listdir(file_dir):
574 |                 func_id = _file.split('_')[-1]
575 | 
576 |                 if func_id == pdg_funcid:
577 |                     pdg_path = os.path.join(file_dir, _file)
578 |                     f = open(pdg_path, 'rb')
579 |                     pdg = pickle.load(f)
580 |                     f.close()
581 | 
582 |                     return pdg
583 | 
584 | 
585 | def getFuncPDGByNameAndtestID(func_name, testID):
586 |     path = os.path.join('pdg_db', testID)
587 |     pdg = False
588 |     for _file in os.listdir(path):
589 |         if '_'.join(_file.split('_')[:-1]) == func_name:
590 |             fpath = os.path.join(path, _file)
591 |             fin = open(fpath, 'rb')
592 |             pdg = pickle.load(fin)
593 |             fin.close()
594 |             break
595 | 
596 |     return pdg
597 | 
598 | 
599 | def getFuncPDGByNameAndtestID_noctrl(func_name, testID):
600 |     pdg = False
601 |     for _dir in os.listdir("pdg_db/"):
602 |         list_testid = os.listdir(os.path.join("pdg_db/", _dir))
603 | 
604 |         if testID not in list_testid:
605 |             continue
606 | 
607 |         else:
608 |             path = os.path.join('pdg', _dir, testID)
609 |             for _file in os.listdir(path):
610 |                 if '_'.join(_file.split('_')[:-1]) == func_name:
611 |                     fpath = os.path.join(path, _file)
612 |                     fin = open(fpath, 'rb')
613 |                     pdg = pickle.load(fin)
614 |                     fin.close()
615 |                     break
616 | 
617 |     return pdg
618 | 
619 | 
620 | def isNewOrDelOp(node, testID):
621 |     if node['code'].find(' = new ') != -1:
622 | 
623 |         tempvalue = node['code'].split(' = new ')[1].replace('*', '')
624 |         if tempvalue.split(' ')[0] != 'const':
625 |             classname = tempvalue.split(' ')[0].strip()
626 |             funcname = classname + ' :: ' + classname
627 |             return funcname
628 | 
629 |         else:
630 |             classname = tempvalue.split(' ')[1].strip()
631 |             funcname = classname + ' :: ' + classname
632 |             return funcname
633 | 
634 |     elif node['code'].find('delete ') != -1:
635 |         objectname = node['code'].split('delete ')[1].replace(';', '').strip()
636 |         list_s = []
637 |         functionID = node['functionId']
638 |         pdg = getFuncPDGByfuncIDAndtestID(functionID, testID)
639 |         for n in pdg.vs:
640 | 
641 |             if n['name'] == node['name']:
642 |                 list_s = n.predecessors()
643 | 
644 |                 for edge in pdg.es:
645 |                     if pdg.vs[edge.tuple[0]] in list_s and pdg.vs[edge.tuple[1]] == n and edge['var'] == objectname:
646 | 
647 |                         start_n = pdg.vs[edge.tuple[0]]
648 |                         if start_n['code'].find(' = new ') != -1:
649 |                         
650 |                             tempvalue = start_n['code'].split(' = new ')[1].replace('*', '')
651 |                             if tempvalue.split(' ')[0] != 'const':
652 |                                 classname = tempvalue.split(' ')[0].strip()
653 |                                 funcname = classname + ' :: ~' + classname
654 |                                 return funcname
655 | 
656 |                             else:
657 |                                 classname = tempvalue.split(' ')[1].strip()
658 |                                 funcname = classname + ' :: ~' + classname
659 |                                 return funcname
660 | 
661 |                         else:
662 |                             continue
663 | 
664 |     return False
665 | 
666 | 
667 | def isNewOrDelOp_noctrl(node, testID, _type):
668 |     if node['code'].find(' = new ') != -1:
669 |         tempvalue = node['code'].split(' = new ')[1].replace('*', '')
670 |         if tempvalue.split(' ')[0] != 'const':
671 |             classname = tempvalue.split(' ')[0].strip()
672 |             funcname = classname + ' :: ' + classname
673 |             return funcname
674 | 
675 |         else:
676 |             classname = tempvalue.split(' ')[1].strip()
677 |             funcname = classname + ' :: ' + classname
678 |             return funcname
679 | 
680 |     elif node['code'].find('delete ') != -1:
681 |         objectname = node['code'].split('delete ')[1].replace(';', '').strip()
682 |         list_s = []
683 |         functionID = node['functionId']
684 | 
685 |         if _type:
686 |             pdg = getFuncPDGByfuncIDAndtestID_noctrl(functionID, testID, _type)
687 |         else:
688 |             pdg = getFuncPDGByfuncIDAndtestID(functionID, testID)
689 | 
690 |         for n in pdg.vs:
691 |             
692 |             if n['name'] == node['name']:
693 |                 list_s = n.predecessors()
694 | 
695 |                 for edge in pdg.es:
696 |                     if pdg.vs[edge.tuple[0]] in list_s and pdg.vs[edge.tuple[1]] == n and edge['var'] == objectname:
697 |                         
698 |                         start_n = pdg.vs[edge.tuple[0]]
699 |                         if start_n['code'].find(' = new ') != -1:
700 |                             tempvalue = start_n['code'].split(' = new ')[1].replace('*', '')
701 |                             if tempvalue.split(' ')[0] != 'const':
702 |                                 classname = tempvalue.split(' ')[0].strip()
703 |                                 funcname = classname + ' :: ~' + classname
704 |                                 return funcname
705 | 
706 |                             else:
707 |                                 classname = tempvalue.split(' ')[1].strip()
708 |                                 funcname = classname + ' :: ~' + classname
709 |                                 return funcname
710 | 
711 |                         else:
712 |                             continue
713 | 
714 |     return False
715 | 


--------------------------------------------------------------------------------
/get_cfg_relation.py:
--------------------------------------------------------------------------------
  1 | # -- coding: utf-8 --
  2 | from access_db_operate import *
  3 | from complete_PDG import *
  4 | import re
  5 | from py2neo.packages.httpstream import http
  6 | http.socket_timeout = 9999
  7 | 
  8 | 
  9 | def getSubCFGGraph(startNode, list_node, not_scan_list):
 10 |     #print "startNode", startNode['code']
 11 |     #print ""
 12 |     if startNode['name'] in not_scan_list or startNode['code'] == 'EXIT':
 13 |         return list_node, not_scan_list
 14 | 
 15 |     else:
 16 |         list_node.append(startNode)
 17 |         not_scan_list.append(startNode['name'])
 18 | 
 19 |     successors = startNode.successors()
 20 |     if successors != []:
 21 |         for p_node in successors:
 22 |             #print "P_node", p_node['name'], p_node['code']  
 23 |             list_node, not_scan_list = getSubCFGGraph(p_node, list_node, not_scan_list)
 24 | 
 25 |     return list_node, not_scan_list
 26 | 
 27 | print
 28 | def getCtrlRealtionOfCFG(cfg):
 29 | 
 30 |     list_ifstmt_nodes = []
 31 |     for node in cfg.vs:
 32 |         if node['type'] == 'Condition':
 33 |             filepath = node['filepath']
 34 |             location_row = int(node['location'].split(':')[0])
 35 |             fin = open(filepath, 'r')
 36 |             content = fin.readlines()
 37 |             fin.close()
 38 |             src_code = content[location_row-1]
 39 | 
 40 |             pattern = re.compile("(?:if|while|for|switch)")
 41 |             #print src_code, node['name']
 42 |             result = re.search(pattern, src_code)
 43 |             if result == None:
 44 |                 res = 'for'
 45 |             else:
 46 |                 res = result.group()
 47 |             if res == '':
 48 |                 print "error!"
 49 |                 exit()
 50 | 
 51 |             elif res == 'if':
 52 |                 list_ifstmt_nodes.append(node)
 53 |             
 54 |             else:
 55 |                 continue
 56 | 
 57 |         else:
 58 |             continue
 59 | 
 60 |     _dict = {}
 61 |     for if_node in list_ifstmt_nodes:
 62 |         list_truestmt_nodes = []
 63 |         list_falsestmt_nodes = []
 64 |         for es in cfg.es:
 65 |             if cfg.vs[es.tuple[0]] == if_node and es['var'] == 'True':
 66 |                 
 67 |                 start_node = cfg.vs[es.tuple[1]]
 68 |                 
 69 |                 not_scan_list = [if_node['name']]
 70 |                 list_truestmt_nodes, temp = getSubCFGGraph(start_node, list_truestmt_nodes, not_scan_list)
 71 | 
 72 |             elif cfg.vs[es.tuple[0]] == if_node and es['var'] == 'False':
 73 |                 
 74 |                 start_node = cfg.vs[es.tuple[1]]              
 75 |                 not_scan_list = [if_node['name']]
 76 |                 list_falsestmt_nodes, temp = getSubCFGGraph(start_node, list_falsestmt_nodes, not_scan_list)
 77 | 
 78 |             else:
 79 |                 continue
 80 | 
 81 |         _share_list = []
 82 |         for t_node in list_truestmt_nodes:
 83 |             if t_node in list_falsestmt_nodes:
 84 |                 _share_list.append(t_node)
 85 |             else:
 86 |                 continue
 87 | 
 88 |         if _share_list != []:
 89 |             i = 0
 90 |             while i < len(list_truestmt_nodes):
 91 |                 if list_truestmt_nodes[i] in _share_list:
 92 |                     del list_truestmt_nodes[i]
 93 |                 else:
 94 |                     i += 1
 95 | 
 96 |             i = 0
 97 |             while i < len(list_falsestmt_nodes):
 98 |                 if list_falsestmt_nodes[i] in _share_list:
 99 |                     del list_falsestmt_nodes[i]
100 |                 else:
101 |                     i += 1
102 | 
103 |             _dict[if_node['name']] = ([t_node['name'] for t_node in list_truestmt_nodes], [f_node['name'] for f_node in list_falsestmt_nodes])
104 | 
105 |         else:
106 |             filepath = cfg.vs[0]['filepath']
107 |             fin = open(filepath, 'r')
108 |             content = fin.readlines()
109 |             fin.close()
110 | 
111 |             if_line = int(if_node['location'].split(':')[0])-1
112 |             #print list_truestmt_nodes
113 |             if list_truestmt_nodes == []:
114 |                 continue
115 |             sorted_list_truestmt_nodes = sortedNodesByLoc(list_truestmt_nodes)
116 | 
117 |             true_stmt_start = sorted_list_truestmt_nodes[0]
118 |             start_line = int(true_stmt_start['location'].split(':')[0])
119 |             str_if_stmts = '\n'.join(content[if_line:start_line])
120 | 
121 |             if '{' in str_if_stmts:
122 |                 if sorted_list_truestmt_nodes[-1]['location'] == None:
123 |                     end_line = int(sorted_list_truestmt_nodes[-2]['location'].split(':')[0])
124 |                 else:
125 |                     end_line = int(sorted_list_truestmt_nodes[-1]['location'].split(':')[0])
126 | 
127 |                 list_stmt = content[if_line:end_line]
128 |                 left_brace = 0
129 |                 i = 0
130 |                 index = 0    
131 |                 tag = 0         
132 |                 for stmt in list_stmt:
133 |                     for c in stmt:
134 |                         if c == '{':
135 |                             left_brace += 1
136 | 
137 |                         elif c == '}':
138 |                             left_brace -= 1
139 | 
140 |                             if left_brace == 0:
141 |                                 tag = 1
142 |                                 break
143 | 
144 |                     if tag == 1:
145 |                         break
146 |                     else:
147 |                         index += 1
148 | 
149 |                 real_end_line = int(if_node['location'].split(':')[0]) + index
150 | 
151 |                 list_real_true_stmt = []
152 | 
153 |                 for node in sorted_list_truestmt_nodes:
154 |                     if node['location'] == None:
155 |                         continue
156 | 
157 |                     if int(node['location'].split(':')[0]) >= if_line+1 and int(node['location'].split(':')[0]) <= real_end_line:
158 |                         list_real_true_stmt.append(node)
159 | 
160 |             else:
161 |                 list_real_true_stmt = [true_stmt_start]
162 | 
163 |             if list_falsestmt_nodes == []:
164 |                 continue
165 |             sorted_list_falsestmt_nodes = sortedNodesByLoc(list_falsestmt_nodes)
166 |             
167 | 
168 |             false_stmt_start = sorted_list_falsestmt_nodes[0]
169 |             if sorted_list_truestmt_nodes[-1]['location'] != None:
170 |                 start_line = int(sorted_list_truestmt_nodes[-1]['location'].split(':')[0])
171 |             else:
172 |                 start_line = int(sorted_list_truestmt_nodes[-2]['location'].split(':')[0])
173 |             end_line = int(false_stmt_start['location'].split(':')[0])
174 | 
175 | 
176 |             str_else_stmts = '\n'.join(content[start_line:end_line])
177 | 
178 |             if 'else' in str_else_stmts:                
179 |                 else_line = 0
180 |                 for line in content[start_line:end_line]:
181 |                     if 'else' in line:
182 |                         break
183 |                     else:
184 |                         else_line += 1
185 | 
186 |                 real_else_line = start_line + else_line + 1
187 |                 str_else_stmts = str_else_stmts.split('else')[1]
188 | 
189 |                 if '{' in str_else_stmts:
190 |                     if sorted_list_falsestmt_nodes[-1]['location'] != None:
191 |                         end_line = int(sorted_list_falsestmt_nodes[-1]['location'].split(':')[0])
192 |                     elif sorted_list_falsestmt_nodes[-2]['location'] != None:
193 |                         end_line = int(sorted_list_falsestmt_nodes[-2]['location'].split(':')[0])
194 |                     else:
195 |                         end_line = int(sorted_list_falsestmt_nodes[-3]['location'].split(':')[0])
196 |                     list_stmt = content[real_else_line-1:end_line]
197 |                     left_brace = 0
198 |                     i = 0
199 |                     index = 0    
200 |                     tag = 0         
201 |                     for stmt in list_stmt:
202 |                         for c in stmt:
203 |                             if c == '{':
204 |                                 left_brace += 1
205 | 
206 |                             elif c == '}':
207 |                                 left_brace -= 1
208 | 
209 |                                 if left_brace == 0:
210 |                                     tag = 1
211 |                                     break
212 | 
213 |                         if tag == 1:
214 |                             break
215 |                         else:
216 |                             index += 1
217 | 
218 |                     real_end_line = int(if_node['location'].split(':')[0]) + index
219 |                     #print "real_end_line", real_end_line
220 |                     list_real_false_stmt = []
221 | 
222 |                     for node in sorted_list_falsestmt_nodes:
223 |                         if node['location'] == None:
224 |                             continue
225 | 
226 |                         if int(node['location'].split(':')[0]) >= if_line+1 and int(node['location'].split(':')[0]) <= real_end_line:
227 |                             list_real_false_stmt.append(node)
228 | 
229 |                 else:
230 |                     list_real_false_stmt = [false_stmt_start]
231 |                     print "false_stmt_start", false_stmt_start['name']
232 | 
233 |             else:
234 |                 list_real_false_stmt = []
235 | 
236 |             _dict[if_node['name']] = ([t_node['name'] for t_node in list_real_true_stmt], [f_node['name'] for f_node in list_real_false_stmt])
237 | 
238 | 
239 |     return _dict
240 | 
241 | 
242 | def completeDataEdgeOfCFG(cfg):
243 |     list_ordered_list = sortedNodesByLoc(cfg.vs)
244 | 
245 |     for node in list_ordered_list:
246 |         if node['type'] == 'Statement':
247 |             list_pre = node.predecessors()
248 |             list_su = node.successors()
249 | 
250 |             if list_pre == [] or list_pre == None:
251 |                 index = list_ordered_list.index(node)
252 |                 start_node = list_ordered_list[index-1]['name']
253 |                 end_node = node['name']
254 |                 var = None
255 |                 addDataEdge(cfg, start_node, end_node, var)
256 | 
257 |             if list_su == [] or list_su == None:
258 |                 index = list_ordered_list.index(node)
259 |                 start_node = node['name']
260 |                 end_node = list_ordered_list[index+1]['name']
261 |                 var = None
262 |                 addDataEdge(cfg, start_node, end_node, var)
263 | 
264 |     return cfg
265 | 
266 | 
267 | def main():
268 |     j = JoernSteps()
269 |     j.connectToDatabase()
270 |     all_func_node = getALLFuncNode(j)
271 |     for node in all_func_node:
272 |         testID = getFuncFile(j, node._id).split('/')[-2]
273 |         path = os.path.join("cfg_db", testID)
274 | 
275 |         store_file_name = node.properties['name'] + '_' + str(node._id)
276 |         store_path = os.path.join(path, store_file_name)
277 | 
278 |         initcfg = translateCFGByNode(j, node)#get init CFG
279 |         opt_cfg_1 = modifyStmtNode(initcfg)
280 |         cfg = completeDataEdgeOfCFG(opt_cfg_1)
281 |         _dict = getCtrlRealtionOfCFG(cfg)
282 | 
283 |         _dict_node2ifstmt = {}
284 |         for key in _dict.keys():
285 |             _list = _dict[key][0] + _dict[key][1]
286 |             for v in _list:
287 |                 if v not in _dict_node2ifstmt.keys():
288 |                     _dict_node2ifstmt[v] = [key]
289 | 
290 |                 else:
291 |                     _dict_node2ifstmt[v].append(key)
292 | 
293 |         for key in _dict_node2ifstmt.keys():
294 |             _dict_node2ifstmt[key] = list(set(_dict_node2ifstmt[key]))
295 | 
296 |         if not os.path.exists(path):
297 |             os.mkdir(path)
298 | 
299 |         if not os.path.exists(store_path):
300 |             os.mkdir(store_path)
301 |         else:
302 |             continue
303 |         
304 |         filename = 'cfg'
305 |         cfg_store_path = os.path.join(store_path, filename)
306 |         fout = open(cfg_store_path, 'wb')
307 |         pickle.dump(cfg, fout, True)
308 |         fout.close()
309 | 
310 |         filename = 'dict_if2cfgnode'
311 |         dict_store_path_1 = os.path.join(store_path, filename)
312 |         fout = open(dict_store_path_1, 'wb')
313 |         pickle.dump(_dict, fout, True)
314 |         fout.close()
315 | 
316 |         filename = 'dict_cfgnode2if'
317 |         dict_store_path_2 = os.path.join(store_path, filename)
318 |         fout = open(dict_store_path_2, 'wb')
319 |         pickle.dump(_dict_node2ifstmt, fout, True)
320 |         fout.close()
321 | 
322 |         print node.properties['name']
323 |         print _dict
324 |         print _dict_node2ifstmt
325 |         print ''
326 | 
327 | 
328 | if __name__ == '__main__':
329 |     main()
330 | 


--------------------------------------------------------------------------------
/make_label.py:
--------------------------------------------------------------------------------
  1 | ## coding:utf-8
  2 | import os
  3 | import pickle
  4 | 
  5 | 
  6 | f = open("dict_cwe2father.pkl", 'rb')
  7 | dict_cwe2father = pickle.load(f)
  8 | f.close()
  9 | 
 10 | #print dict_cwe2father['CWE-787']
 11 | 
 12 | f = open("label_vec_type.pkl", 'rb')
 13 | label_vec_type = pickle.load(f)
 14 | f.close()
 15 | 
 16 | f = open("dict_testcase2code.pkl",'rb')
 17 | dict_testcase2code = pickle.load(f)
 18 | f.close()
 19 | 
 20 | 
 21 | def get_label_veclist(list_cwe):
 22 |     list_label = [0] * len(label_vec_type)
 23 |     for cweid in list_cwe:
 24 |         index = label_vec_type.index(cweid)
 25 |         list_label[index] = 1
 26 | 
 27 |     return list_label
 28 | 
 29 | 
 30 | def get_label_cwe(cweid, label_cwe):
 31 |     if cweid in label_vec_type:
 32 |         label_cwe.append(cweid)
 33 |         return label_cwe
 34 | 
 35 |     else:
 36 |         if cweid == 'CWE-1000':
 37 |             label_cwe = label_vec_type
 38 |         else:
 39 |             fathercweid = dict_cwe2father[cweid]
 40 | 
 41 |             for _id in fathercweid:
 42 |                 label_cwe = get_label_cwe(_id, label_cwe)
 43 | 
 44 |     return label_cwe
 45 | 
 46 | 
 47 | def make_label(path, dict_vuln2testcase, _type):
 48 |     f = open(path, 'r')
 49 |     context = f.read().split('------------------------------')[:-1]
 50 |     f.close()
 51 | 
 52 |     context[0] = '\n' + context[0]
 53 | 
 54 |     list_all_label = []
 55 |     list_all_vulline = []
 56 |     for _slice in context:
 57 |         vulline = []
 58 |         index_line = _slice.split('\n')[1] 
 59 |         list_codes = _slice.split('\n')[2:-1] 
 60 |         case_name = index_line.split(' ')[1]
 61 |         key_name = '/'.join(index_line.split(' ')[1].split('/')[-2:])
 62 |         print index_line
 63 | 
 64 |         if key_name in dict_vuln2testcase.keys():
 65 |             list_codeline = [code.split(' ')[-1] for code in list_codes]
 66 |             dict = dict_vuln2testcase[key_name]
 67 | 
 68 |             _dict_cwe2line_target = {}
 69 |             _dict_cwe2line = {}
 70 | 	    for _dict in dict: 
 71 |                 for key in _dict.keys():
 72 |                     if _dict[key] not in _dict_cwe2line_target.keys():
 73 |                         _dict_cwe2line_target[_dict[key]] = [key] 
 74 |                     else:
 75 |                         _dict_cwe2line_target[_dict[key]].append(key)
 76 | 
 77 |                 
 78 |                 for line in list_codeline:
 79 |                     line = line.strip()
 80 |                     if line in _dict.keys():
 81 |                         if not ' '.join((list_codes[list_codeline.index(line)].strip()).split(' ')[:-1]) == dict_testcase2code[key_name+"/"+line].strip():
 82 |                             continue
 83 |                         cweid = _dict[line]
 84 |                         vulline.append(list_codeline.index(line))
 85 | 
 86 |                         if cweid not in _dict_cwe2line.keys():
 87 |                             _dict_cwe2line[cweid] = [line]
 88 |                         else:
 89 |                             _dict_cwe2line[cweid].append(line)
 90 | 
 91 |             if _type:
 92 |                 list_vuln_cwe = []
 93 |                 for key in _dict_cwe2line.keys():
 94 |                     if key == 'Any...':
 95 |                         continue
 96 |                     if len(_dict_cwe2line[key]) == len(_dict_cwe2line_target[key]):
 97 |                         label_cwe = []
 98 |                         label_cwe = get_label_cwe(key, label_cwe)
 99 |                         list_vuln_cwe += label_cwe
100 | 
101 |             else:
102 |                 list_vuln_cwe = []
103 |                 for key in _dict_cwe2line.keys():
104 |                     if key == 'Any...':
105 |                         continue
106 |                     label_cwe = []
107 |                     label_cwe = get_label_cwe(key, label_cwe)
108 |                     list_vuln_cwe += label_cwe
109 | 
110 |             if list_vuln_cwe == []:
111 |                 list_label = [0] * len(label_vec_type)
112 |             else:
113 |                 list_vuln_cwe = list(set(list_vuln_cwe))
114 |                 list_label = get_label_veclist(list_vuln_cwe)
115 |             
116 |         else:
117 |             list_label = [0] * len(label_vec_type)
118 | 
119 |         list_all_label.append(list_label)
120 |         list_all_vulline.append(vulline)
121 | 
122 |     return list_all_label, list_all_vulline
123 | 
124 | 
125 | def main():
126 |     f = open("dict_flawline2filepath.pkl", 'rb')
127 |     dict_vuln2testcase = pickle.load(f)
128 |     f.close()
129 |     _type = False
130 |     time = '4'
131 |    # lang = 'C/test_data/' + time
132 |     lang = ''
133 |     
134 |     path = os.path.join(lang, 'api_slices.txt')
135 |     list_all_apilabel, list_all_vulline = make_label(path, dict_vuln2testcase, _type)
136 |     dec_path = os.path.join(lang, 'api_slices_label.pkl')
137 |     f = open(dec_path, 'wb')
138 |     pickle.dump(list_all_apilabel, f, True)
139 |     f.close()
140 |     dec_path = os.path.join(lang, 'api_slices_vulline.pkl')
141 |     f = open(dec_path, 'wb')
142 |     pickle.dump(list_all_vulline, f)
143 |     f.close()
144 |     
145 |     path = os.path.join(lang, 'arraysuse_slices.txt')
146 |     list_all_arraylabel = make_label(path, dict_vuln2testcase, _type)
147 |     dec_path = os.path.join(lang, 'array_slice_label.pkl')
148 |     f = open(dec_path, 'wb')
149 |     pickle.dump(list_all_arraylabel, f, True)
150 |     f.close()
151 |     
152 |     path = os.path.join(lang, 'pointersuse_slices.txt')
153 |     list_all_pointerlabel = make_label(path, dict_vuln2testcase, _type)
154 |     dec_path = os.path.join(lang, 'pointer_slice_label.pkl')
155 |     f = open(dec_path, 'wb')
156 |     pickle.dump(list_all_pointerlabel, f, True)
157 |     f.close()
158 |  
159 |     path = os.path.join(lang, 'integeroverflow_slices.txt')
160 |     list_all_exprlabel = make_label(path, dict_vuln2testcase, _type)
161 |     dec_path = os.path.join(lang, 'expr_slice_label.pkl')
162 |     f = open(dec_path, 'wb')
163 |     pickle.dump(list_all_exprlabel, f, True)
164 |     f.close()
165 |     
166 | 
167 | if __name__ == '__main__':
168 |     main()
169 | 


--------------------------------------------------------------------------------
/points_get.py:
--------------------------------------------------------------------------------
  1 | ## coding:utf-8
  2 | from access_db_operate import *
  3 | 
  4 | 
  5 | def get_all_sensitiveAPI(db):
  6 |     fin = open("sensitive_func.pkl", 'rb')
  7 |     list_sensitive_funcname = pickle.load(fin)
  8 |     fin.close()
  9 | 
 10 |     _dict = {}
 11 |     for func_name in list_sensitive_funcname:
 12 |         list_callee_cfgnodeID = []
 13 |         if func_name.find('main') != -1:
 14 |             list_main_func = []
 15 |             list_mainfunc_node = getFunctionNodeByName(db, func_name)
 16 | 
 17 |             if list_mainfunc_node != []:
 18 |                 file_path = getFuncFile(db, list_mainfunc_node[0]._id)
 19 |                 testID = file_path.split('/')[-2]
 20 |                 for mainfunc in list_mainfunc_node:
 21 |                     list_parameters = get_parameter_by_funcid(db, mainfunc._id)
 22 | 
 23 |                     if list_parameters != []:
 24 |                         list_callee_cfgnodeID.append([testID, ([str(v) for v in list_parameters], str(mainfunc._id), func_name)])
 25 | 
 26 |                     else:
 27 |                         continue
 28 | 
 29 |         else:
 30 |             list_callee_id = get_calls_id(db, func_name)
 31 |             if list_callee_id == []:
 32 |                 continue
 33 | 
 34 |             
 35 |             for _id in list_callee_id:
 36 |                 cfgnode = getCFGNodeByCallee(db, _id)
 37 |                 if cfgnode != None:
 38 |                     file_path = getFuncFile(db, int(cfgnode.properties['functionId']))
 39 |                     testID = file_path.split('/')[-2]
 40 |                     list_callee_cfgnodeID.append([testID, ([str(cfgnode._id)], str(cfgnode.properties['functionId']), func_name)])
 41 | 
 42 |         if list_callee_cfgnodeID != []:
 43 |             for _l in list_callee_cfgnodeID:
 44 |                 if _l[0] in _dict.keys():
 45 |                     _dict[_l[0]].append(_l[1])
 46 |                 else:
 47 |                     _dict[_l[0]] = [_l[1]]
 48 | 
 49 |         else:
 50 |             continue
 51 | 
 52 |     return _dict
 53 | 
 54 | 
 55 | def get_all_pointer(db):
 56 |     _dict = {}
 57 |     list_pointers_node = get_pointers_node(db)
 58 |     for cfgnode in list_pointers_node:
 59 |         file_path = getFuncFile(db, int(cfgnode.properties['functionId']))
 60 |         testID = file_path.split('/')[-2]
 61 |         pointer_defnode = get_def_node(db, cfgnode._id)
 62 |         pointer_name = []
 63 |         for node in pointer_defnode:
 64 |             name = node.properties['code'].replace('*', '').strip()
 65 |             if name not in pointer_name:
 66 |                 pointer_name.append(name)
 67 | 
 68 |         if testID in _dict.keys():
 69 |             _dict[testID].append(([str(cfgnode._id)], str(cfgnode.properties['functionId']), pointer_name))
 70 |         else:
 71 |             _dict[testID] = [([str(cfgnode._id)], str(cfgnode.properties['functionId']), pointer_name)]
 72 | 
 73 |     return _dict
 74 | 
 75 | 
 76 | def get_all_array(db):
 77 |     _dict = {}
 78 |     list_arrays_node = get_arrays_node(db)
 79 |     for cfgnode in list_arrays_node:
 80 |         file_path = getFuncFile(db, int(cfgnode.properties['functionId']))
 81 |         testID = file_path.split('/')[-2]
 82 |         array_defnode = get_def_node(db, cfgnode._id)
 83 |         array_name = []
 84 |         for node in array_defnode:
 85 |             name = node.properties['code'].replace('[', '').replace(']', '').replace('*', '').strip()
 86 |             if name not in array_name:
 87 |                 array_name.append(name)
 88 | 
 89 |         if testID in _dict.keys():
 90 |             _dict[testID].append(([str(cfgnode._id)], str(cfgnode.properties['functionId']), array_name))
 91 |         else:
 92 |             _dict[testID] = [([str(cfgnode._id)], str(cfgnode.properties['functionId']), array_name)]
 93 | 
 94 |     return _dict
 95 | 
 96 | 
 97 | def get_all_pointer_use(db):
 98 |     _dict = {}
 99 |     list_pointers_node = get_pointers_node(db)
100 |     for cfgnode in list_pointers_node:
101 |         file_path = getFuncFile(db, int(cfgnode.properties['functionId']))
102 |         testID = file_path.split('/')[-2]
103 |         pointer_defnode = get_def_node(db, cfgnode._id)
104 |         
105 |         _temp_list = []
106 |         for node in pointer_defnode:
107 |             name = node.properties['code'].strip()
108 |             list_usenodes = get_all_use_bydefnode(db, node._id)
109 |             list_defnodes = get_all_def_bydefnode(db, node._id)
110 | 
111 |             i = 0
112 |             while i < len(list_defnodes):
113 |                 if list_defnodes[i]._id == cfgnode._id:
114 |                     del list_defnodes[i]
115 |                 else:
116 |                     i += 1
117 | 
118 |             list_usenodes += list_defnodes
119 |             
120 |             print len(list_usenodes)
121 |             for i in list_usenodes:
122 |                 if str(i).find(",location")==-1:
123 |                     list_usenodes.remove(i)
124 |             loc_list=[]
125 |             final_list=[]
126 |             for i in list_usenodes:
127 |                 #print(i)
128 |                 if ',location' in str(i):
129 |                     print(str(i))
130 |                     location=str(i).split(",type:")[0].split(",location:")[1][1:-1].split(":")
131 |                     count=int(location[0])
132 |                     loc_list.append(count)
133 |             print loc_list
134 |             if len(loc_list)!=0:
135 |                 a=loc_list.index(max(loc_list))
136 |                 final_list.append(list_usenodes[a])
137 |             for use_node in final_list:
138 |                 if use_node._id in _temp_list:
139 |                     continue
140 |                 else:
141 |                     _temp_list.append(use_node._id)
142 | 
143 |                 if testID in _dict.keys():
144 |                     _dict[testID].append(([str(use_node._id)], str(use_node.properties['functionId']), name))
145 |                 else:
146 |                     _dict[testID] = [([str(use_node._id)], str(use_node.properties['functionId']), name)]
147 | 
148 |     return _dict
149 | 
150 | 
151 | def get_all_array_use(db):
152 |     _dict = {}
153 |     list_arrays_node = get_arrays_node(db)
154 |     for cfgnode in list_arrays_node:
155 |         file_path = getFuncFile(db, int(cfgnode.properties['functionId']))
156 |         testID = file_path.split('/')[-2]
157 |         array_defnode = get_def_node(db, cfgnode._id)
158 |         _temp_list = []
159 |         for node in array_defnode:
160 |             name = node.properties['code'].strip()
161 |             list_usenodes = get_all_use_bydefnode(db, node._id)
162 |             list_defnodes = get_all_def_bydefnode(db, node._id)
163 | 
164 |             i = 0
165 |             while i < len(list_defnodes):
166 |                 if list_defnodes[i]._id == cfgnode._id:
167 |                     del list_defnodes[i]
168 |                 else:
169 |                     i += 1
170 | 
171 |             list_usenodes += list_defnodes
172 | 			
173 |             for use_node in list_usenodes:
174 |                 if use_node._id in _temp_list:
175 |                     continue
176 |                 else:
177 |                     _temp_list.append(use_node._id)
178 | 
179 |                 if testID in _dict.keys():
180 |                     _dict[testID].append(([str(use_node._id)], str(use_node.properties['functionId']), name))
181 |                 else:
182 |                     _dict[testID] = [([str(use_node._id)], str(use_node.properties['functionId']), name)]
183 | 
184 |     return _dict
185 | 
186 | 
187 | def get_all_integeroverflow_point(db):
188 |     _dict = {}
189 |     list_exprstmt_node = get_exprstmt_node(db)
190 |     for cfgnode in list_exprstmt_node:
191 |         if cfgnode.properties['code'].find(' = ') > -1:
192 |             code = cfgnode.properties['code'].split(' = ')[-1]
193 |             pattern = re.compile("((?:_|[A-Za-z])\w*(?:\s(?:\+|\-|\*|\/)\s(?:_|[A-Za-z])\w*)+)")                
194 |             result = re.search(pattern, code)
195 |        
196 |             if result == None:
197 |                 continue
198 |             else:
199 |                 file_path = getFuncFile(db, int(cfgnode.properties['functionId']))
200 |                 testID = file_path.split('/')[-2]
201 |                 name = cfgnode.properties['code'].strip()
202 | 
203 |                 if testID in _dict.keys():
204 |                     _dict[testID].append(([str(cfgnode._id)], str(cfgnode.properties['functionId']), name))
205 |                 else:
206 |                     _dict[testID] = [([str(cfgnode._id)], str(cfgnode.properties['functionId']), name)]
207 | 
208 |         else:
209 |             code = cfgnode.properties['code']
210 |             pattern = re.compile("(?:\s\/\s(?:_|[A-Za-z])\w*\s)")
211 |             result = re.search(pattern, code)
212 |             if result == None:
213 |                 continue
214 | 
215 |             else:
216 |                 file_path = getFuncFile(db, int(cfgnode.properties['functionId']))
217 |                 testID = file_path.split('/')[-2]
218 |                 name = cfgnode.properties['code'].strip()
219 | 
220 |                 if testID in _dict.keys():
221 |                     _dict[testID].append(([str(cfgnode._id)], str(cfgnode.properties['functionId']), name))
222 |                 else:
223 |                     _dict[testID] = [([str(cfgnode._id)], str(cfgnode.properties['functionId']), name)]
224 | 
225 |     return _dict
226 | 
227 | 
228 | if __name__ == '__main__':
229 |     j = JoernSteps()
230 |     j.connectToDatabase()
231 |     
232 | #    _dict = get_all_sensitiveAPI(j)
233 | #    f = open("sensifunc_slice_points.pkl", 'wb')
234 | #    pickle.dump(_dict, f, True)
235 | #    f.close()
236 | #    print _dict
237 |     
238 | #    _dict = get_all_pointer_use(j)
239 | #    f = open("pointuse_slice_points.pkl", 'wb')
240 | #    pickle.dump(_dict, f, True)
241 | #    f.close()
242 | #    print _dict 
243 |     
244 | #    _dict = get_all_array_use(j)
245 | #    f = open("arrayuse_slice_points.pkl", 'wb')
246 | #    pickle.dump(_dict, f, True)
247 | #    f.close()
248 | #    print _dict
249 |     
250 |     _dict = get_all_integeroverflow_point(j)
251 |     f = open("integeroverflow_slice_points_new.pkl", 'wb')
252 |     pickle.dump(_dict, f, True)
253 |     f.close()
254 | 	
255 |     
256 | 


--------------------------------------------------------------------------------
/slice_op.py:
--------------------------------------------------------------------------------
  1 | from general_op import *
  2 | 
  3 | 
  4 | def sub_slice_backwards(startnode, list_node, not_scan_list):
  5 |     if startnode['name'] in not_scan_list:
  6 |         return list_node, not_scan_list
  7 | 
  8 |     else:
  9 |         list_node.append(startnode)
 10 |         not_scan_list.append(startnode['name'])
 11 | 
 12 |     predecessors = startnode.predecessors()
 13 |     
 14 |     if predecessors != []:
 15 |         for p_node in predecessors:                
 16 |             list_node, not_scan_list = sub_slice_backwards(p_node, list_node, not_scan_list)
 17 | 
 18 |     return list_node, not_scan_list
 19 | 
 20 | 
 21 | def program_slice_backwards(pdg, list_startNode):#startNode is a list
 22 |     list_all_node = []
 23 |     not_scan_list = []
 24 |     for startNode in list_startNode:
 25 |         list_node = [startNode]
 26 |         not_scan_list.append(startNode['name'])
 27 |         predecessors = startNode.predecessors()
 28 |         if predecessors != []:
 29 |             for p_node in predecessors:
 30 |                 list_node, not_scan_list = sub_slice_backwards(p_node, list_node, not_scan_list)
 31 | 
 32 |         list_all_node += list_node
 33 |        
 34 |         #Add function define line
 35 |         if startNode['functionId'] in not_scan_list:
 36 |             continue
 37 |         for node in pdg.vs:
 38 |             if node['name'] == startNode['functionId']:
 39 |                 list_all_node.append(node)
 40 |                 not_scan_list.append(node['name'])
 41 |                 break
 42 |         
 43 |     print("list_all_node:", list_all_node)
 44 |     list_ordered_node = sortedNodesByLoc(list_all_node)
 45 | 
 46 |     _list_re = []
 47 |     a = 0
 48 |     while a < len(list_ordered_node):
 49 |         if list_ordered_node[a]['name'] not in _list_re:
 50 |             _list_re.append(list_ordered_node[a]['name'])
 51 |             a += 1
 52 |         else:
 53 |             del list_ordered_node[a]
 54 |     return list_ordered_node
 55 | 
 56 | 
 57 | def sub_slice_forward(startnode, list_node, not_scan_list):
 58 |     if startnode['name'] in not_scan_list:
 59 |         return list_node, not_scan_list
 60 | 
 61 |     else:
 62 |         list_node.append(startnode)
 63 |         not_scan_list.append(startnode['name'])
 64 | 
 65 |     successors = startnode.successors()
 66 |     if successors != []:
 67 |         for p_node in successors:        
 68 |             list_node, not_scan_list = sub_slice_forward(p_node, list_node, not_scan_list)
 69 | 
 70 |     return list_node, not_scan_list
 71 | 
 72 | 
 73 | def program_slice_forward(pdg, list_startNode):#startNode is a list of parameters, only consider data dependency
 74 |     pdg = del_ctrl_edge(pdg)
 75 |             
 76 |     list_all_node = []
 77 |     not_scan_list = []
 78 |     for startNode in list_startNode:
 79 |         list_node = [startNode]
 80 |         not_scan_list.append(startNode['name'])
 81 |         successors = startNode.successors()
 82 |         
 83 |         if successors != []:
 84 |             for p_node in successors:
 85 |                 list_node, not_scan_list = sub_slice_forward(p_node, list_node, not_scan_list)
 86 | 
 87 |         list_all_node += list_node
 88 | 
 89 |     list_ordered_node = sortedNodesByLoc(list_all_node)
 90 | 
 91 |     a = 0
 92 |     _list_re = []
 93 |     while a < len(list_ordered_node):
 94 |         if list_ordered_node[a]['name'] not in _list_re:
 95 |             _list_re.append(list_ordered_node[a]['name'])
 96 |             a += 1
 97 |         else:
 98 |             del list_ordered_node[a]
 99 | 
100 |     return list_ordered_node
101 | 
102 | 
103 | def process_cross_func(to_scan_list, testID, slicetype, list_result_node, not_scan_func_list):
104 |     if to_scan_list == []:
105 |         return list_result_node, not_scan_func_list
106 | 
107 |     for node in to_scan_list:
108 |         if node['name'] in not_scan_func_list:
109 |             continue
110 | 
111 |         ret = isNewOrDelOp(node, testID)
112 |         if ret:
113 |             funcname = ret
114 |             pdg = getFuncPDGByNameAndtestID(funcname, testID)              
115 | 
116 |             
117 |             if pdg == False:
118 |                 not_scan_func_list.append(node['name'])
119 |                 continue
120 | 
121 |             else:
122 |                 result_list = sortedNodesByLoc(pdg.vs)
123 | 
124 |                 not_scan_func_list.append(node['name'])
125 | 
126 |                 index = 0
127 |                 for result_node in list_result_node:
128 |                     if result_node['name'] == node['name']:
129 |                         break
130 |                     else:
131 |                         index += 1
132 | 
133 |                 list_result_node = list_result_node[:index+1] + result_list + list_result_node[index+1:]
134 | 
135 |                 list_result_node, not_scan_func_list = process_cross_func(result_list, testID, slicetype, list_result_node, not_scan_func_list)
136 | 
137 | 
138 |         else:          
139 |             ret = isFuncCall(node)#if funccall ,if so ,return funcnamelist
140 |             if ret:
141 | 
142 |                 for funcname in ret:
143 |                     if funcname.find('->') != -1:
144 |                         real_funcname = funcname.split('->')[-1].strip()
145 |                         objectname = funcname.split('->')[0].strip()
146 | 
147 |                         funcID = node['functionId']
148 |                         src_pdg = getFuncPDGByfuncIDAndtestID(funcID, testID)
149 |                         if src_pdg == False:
150 |                             continue
151 |                             
152 |                         classname = False
153 |                         for src_pnode in src_pdg.vs:
154 |                             if src_pnode['code'].find(objectname) != -1 and src_pnode['code'].find(' new ') != -1:
155 |                                 tempvalue = src_pnode['code'].split(' new ')[1].replace('*', '').strip()
156 |                                 if tempvalue.split(' ')[0] != 'const':
157 |                                     classname = tempvalue.split(' ')[0]
158 |                                 else:
159 |                                     classname = tempvalue.split(' ')[1]
160 | 
161 |                                 break
162 | 
163 |                         if classname == False:
164 |                             continue
165 | 
166 |                         funcname = classname + ' :: ' + real_funcname
167 |                         pdg = getFuncPDGByNameAndtestID_noctrl(funcname, testID)
168 | 
169 | 
170 |                     elif funcname.find('.') != -1:
171 |                         real_funcname = funcname.split('.')[-1].strip()
172 |                         objectname = funcname.split('.')[0].strip()
173 | 
174 |                         funcID = node['functionId']
175 |                         src_pdg = getFuncPDGByNameAndtestID_noctrl(funcID, testID)
176 |                         if src_pdg == False:
177 |                             continue
178 |                         classname = False
179 |                         for src_pnode in src_pdg.vs:
180 |                             if src_pnode['code'].find(objectname) != -1 and src_pnode['code'].find(' new ') != -1:
181 |                                 tempvalue = src_pnode['code'].split(' new ')[1].replace('*', '').strip()
182 |                                 if tempvalue.split(' ')[0] != 'const':
183 |                                     classname = tempvalue.split(' ')[0]
184 |                                 else:
185 |                                     classname = tempvalue.split(' ')[1]
186 | 
187 |                                 break
188 | 
189 |                         if classname == False:
190 |                             continue
191 | 
192 |                         funcname = classname + ' :: ' + real_funcname
193 |                         pdg = getFuncPDGByNameAndtestID(funcname, testID)
194 | 
195 |                     elif funcname.find('.') != -1:
196 |                         real_funcname = funcname.split('.')[-1].strip()
197 |                         objectname = funcname.split('.')[0].strip()
198 | 
199 |                         funcID = node['functionId']
200 |                         src_pdg = getFuncPDGByfuncIDAndtestID(funcID, testID)
201 |                         classname = False
202 |                         for src_pnode in src_pdg.vs:
203 |                             if src_pnode['code'].find(objectname) != -1 and src_pnode['code'].find(' new ') != -1:
204 |                                 tempvalue = src_pnode['code'].split(' new ')[1].replace('*', '').strip()
205 |                                 if tempvalue.split(' ')[0] != 'const':
206 |                                     classname = tempvalue.split(' ')[0]
207 |                                 else:
208 |                                     classname = tempvalue.split(' ')[1]
209 | 
210 |                                 break
211 | 
212 |                         if classname == False:
213 |                             continue
214 | 
215 |                         funcname = classname + ' :: ' + real_funcname
216 |                         pdg = getFuncPDGByNameAndtestID(funcname, testID)
217 | 
218 |                     else:
219 |                         pdg = getFuncPDGByNameAndtestID(funcname, testID)
220 | 
221 |                     if pdg == False:
222 |                         not_scan_func_list.append(node['name'])
223 |                         continue
224 | 
225 |                     else:
226 |                         if slicetype == 0:
227 |                             ret_node = []
228 |                             for vertex in pdg.vs:
229 |                                 if vertex['type'] == 'ReturnStatement':
230 |                                     ret_node.append(vertex)
231 | 
232 |                             result_list = program_slice_backwards(pdg, ret_node)
233 |                             not_scan_func_list.append(node['name'])
234 | 
235 |                             index = 0
236 |                             for result_node in list_result_node:
237 |                                 if result_node['name'] == node['name']:
238 |                                     break
239 |                                 else:
240 |                                     index += 1
241 |                                 
242 |                             list_result_node = list_result_node[:index+1] + result_list + list_result_node[index+1:]
243 | 
244 |                             list_result_node, not_scan_func_list = process_cross_func(result_list, testID, slicetype, list_result_node, not_scan_func_list)
245 | 
246 |                         elif slicetype == 1:
247 |                             param_node = []
248 |                             FuncEntryNode = False
249 |                             for vertex in pdg.vs:
250 |                                 if vertex['type'] == 'Parameter':
251 |                                     param_node.append(vertex)
252 |                                 elif vertex['type'] == 'Function':
253 |                                     FuncEntryNode = vertex
254 | 
255 |                             if param_node != []:
256 |                                 result_list = program_slice_forward(pdg, param_node)
257 |                             else:
258 |                                 result_list = sortedNodesByLoc(pdg.vs)
259 | 
260 |                             not_scan_func_list.append(node['name'])
261 |                             index = 0
262 | 
263 |                             for result_node in list_result_node:
264 |                                 if result_node['name'] == node['name']:
265 |                                     break
266 |                                 else:
267 |                                     index += 1
268 | 
269 |                             if FuncEntryNode != False:
270 |                                 result_list.insert(0, FuncEntryNode)
271 |                                 
272 |                             list_result_node = list_result_node[:index+1] + result_list + list_result_node[index+1:]
273 | 
274 |                             list_result_node, not_scan_func_list = process_cross_func(result_list, testID, slicetype, list_result_node, not_scan_func_list)
275 | 
276 | 
277 |     return list_result_node, not_scan_func_list
278 | 
279 | 
280 | def process_crossfuncs_back_byfirstnode(list_tuple_results_back, testID, i, not_scan_func_list):
281 |     #is not a good way in time, list_tuple_results_back=[(results_back, itertimes)]
282 |     while i < len(list_tuple_results_back):
283 |         iter_time = list_tuple_results_back[i][1]
284 |         if iter_time == 3 or iter_time == -1:#allow cross 3 funcs:
285 |             i += 1
286 |             continue
287 | 
288 |         else:
289 |             list_node = list_tuple_results_back[i][0]
290 | 
291 |             if len(list_node) == 1:
292 |                 i += 1
293 |                 continue
294 | 
295 |             if list_node[1]['type'] == 'Parameter':   
296 |                 func_name = list_node[0]['name']
297 |                 path = os.path.join('dict_call2cfgNodeID_funcID', testID, 'dict.pkl')
298 | 
299 |                 if not os.path.exists(path):
300 |                     i += 1
301 |                     continue
302 | 
303 |                 fin = open(path, 'rb')
304 |                 _dict = pickle.load(fin)
305 |                 fin.close()
306 |                 
307 |                 if func_name not in _dict.keys():
308 |                     list_tuple_results_back[i][1] = -1
309 |                     i += 1
310 |                     continue
311 | 
312 |                 else:                
313 |                     list_cfgNodeID = _dict[func_name]
314 |                     dict_func_pdg = getFuncPDGBynodeIDAndtestID(list_cfgNodeID, testID)
315 |                     iter_time += 1
316 |                     _new_list = []
317 |                     for item in dict_func_pdg.items():
318 |                         targetPDG = item[1]
319 |                         startnode = []
320 |                         for n in targetPDG.vs:
321 |                             if n['name'] == item[0]:#is id
322 |                                 startnode = [n]
323 |                                 break
324 |                         
325 |                         if startnode == []:
326 |                             continue
327 |                         ret_list = program_slice_backwards(targetPDG, startnode)
328 |                         not_scan_func_list.append(startnode[0]['name'])
329 | 
330 |                         ret_list = ret_list + list_node
331 |                         _new_list.append([ret_list, iter_time])
332 | 
333 |                     if _new_list != []:
334 |                         del list_tuple_results_back[i]
335 |                         list_tuple_results_back = list_tuple_results_back + _new_list
336 |                         list_tuple_results_back, not_scan_func_list = process_crossfuncs_back_byfirstnode(list_tuple_results_back, testID, i, not_scan_func_list)
337 |                     else:
338 |                         list_tuple_results_back[i][1] = -1
339 |                         i += 1
340 |                         continue
341 |                         
342 | 
343 |             else:
344 |                 funcname = list_node[0]['code']
345 |                 if funcname.find("::") > -1:
346 | 
347 | 
348 |                     path = os.path.join('dict_call2cfgNodeID_funcID', testID, 'dict.pkl')#get funname and it call place
349 | 
350 |                     #Modified by kingnop
351 |                     if not os.path.exists(path):
352 |                         i += 1
353 |                         continue
354 |                     ####################
355 | 
356 |                     fin = open(path, 'rb')
357 |                     _dict = pickle.load(fin)
358 |                     fin.close()
359 | 
360 | 
361 | 
362 |                     func_name = list_node[0]['name']
363 |                     if func_name not in _dict.keys():
364 |                         list_tuple_results_back[i][1] = -1
365 |                         i += 1
366 |                         continue
367 | 
368 |                     else:               
369 |                         list_cfgNodeID = _dict[func_name]
370 |                         dict_func_pdg = getFuncPDGBynodeIDAndtestID(list_cfgNodeID, testID)
371 |                         
372 |                         iter_time += 1
373 |                         _new_list = []
374 |                         for item in dict_func_pdg.items():
375 |                             targetPDG = item[1]
376 |                             startnode = []
377 |                             for n in targetPDG.vs:
378 |                                 if n['name'] == item[0]:#is id
379 |                                     startnode = [n]
380 |                                     break
381 |                             if startnode == []:
382 |                                 continue    
383 |                             ret_list = program_slice_backwards(targetPDG, startnode)
384 |                             not_scan_func_list.append(startnode[0]['name'])
385 |                             
386 |                             
387 |                             ret_list = ret_list + list_node
388 |                             _new_list.append([ret_list, iter_time])
389 | 
390 |                         if _new_list != []:
391 |                             del list_tuple_results_back[i]
392 |                             list_tuple_results_back = list_tuple_results_back + _new_list
393 |                             list_tuple_results_back, not_scan_func_list = process_crossfuncs_back_byfirstnode(list_tuple_results_back, testID, i, not_scan_func_list)
394 | 
395 |                         else:
396 |                             list_tuple_results_back[i][1] = -1
397 |                             i += 1
398 |                             continue
399 | 
400 |                 else:
401 |                     i += 1
402 |                     continue
403 |                    
404 |     return list_tuple_results_back, not_scan_func_list
405 | 


--------------------------------------------------------------------------------