├── .gitattributes
├── .gitignore
├── .gitmodules
├── FuncParser-opt
    ├── FuncParser-opt.jar
    ├── build.sh
    ├── jar
    │   └── antlr-4.7.1-complete.jar
    ├── manifest.mf
    └── src
    │   ├── CoarseSimpleDecl.g4
    │   ├── Common.g4
    │   ├── Expressions.g4
    │   ├── FineSimpleDecl.g4
    │   ├── Function.g4
    │   ├── FunctionDef.g4
    │   ├── Main.java
    │   ├── Module.g4
    │   ├── ModuleLex.g4
    │   └── SimpleDecl.g4
├── LICENSE.md
├── README.md
├── checker
    └── check_clones.py
├── config.py
├── dep.sh
├── docs
    ├── examples.md
    └── 취약점 데이터베이스 생성 솔루션 매뉴얼 V1.0.pdf
├── hmark
    ├── FuncParser-opt.jar
    ├── README.md
    ├── __init__.py
    ├── get_cpu_count.py
    ├── hmark.py
    ├── icon.gif
    ├── icon.ico
    ├── parseutility2.py
    ├── spec_generator.py
    └── version.py
├── initialize.py
├── paper
    └── SNP17.pdf
├── src
    ├── get_cvepatch_from_git.py
    ├── get_source_from_cvepatch.py
    ├── repo_updater.py
    ├── vul_dup_remover.py
    ├── vul_hidx_generator.py
    └── vul_verifier.py
├── testcode
    ├── async.c
    ├── configs.c
    ├── module.c
    └── wrong_case.c
└── tools
    ├── FuncParser-opt.jar
    ├── __init__.py
    ├── cvedatagen
        ├── README.md
        ├── __init__.py
        ├── common.py
        ├── cveXmlDownloader.py
        ├── cveXmlParser.py
        └── cveXmlUpdater.py
    └── parseutility.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.pkl binary
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *test.py
 3 | tmp_*
 4 | 
 5 | diff/
 6 | vul/
 7 | diff*/
 8 | vul-*/
 9 | repolists/
10 | hidx/
11 | hidx*/
12 | result-0901*
13 | *.tar.gz
14 | *.html
15 | *.spec
16 | *.pkl
17 | 
18 | testcode/
19 | misc/
20 | experiments-related/
21 | codesensor2python/build/
22 | codesensor2python/
23 | FuncParser/build/
24 | kernel44/
25 | FuncParser/FuncParser.jar
26 | 
27 | hmark/build/
28 | hmark/dist/
29 | 
30 | NVDCVEcrawler/*.xml
31 | 
32 | # Pycharm
33 | .idea/
34 | 
35 | # CVE
36 | *.xml
37 | *.zip
38 | 
39 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "vulnDBGen"]
2 | 	path = vulnDBGen
3 | 	url = https://github.com/squizz617/vulnDBGen
4 | 


--------------------------------------------------------------------------------
/FuncParser-opt/FuncParser-opt.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/squizz617/vuddy/33cdab1ad04a6dcc76011b92821dbeb055c6691e/FuncParser-opt/FuncParser-opt.jar


--------------------------------------------------------------------------------
/FuncParser-opt/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -o errexit
 4 | 
 5 | SOURCE_DIR='src'
 6 | BUILD_DIR='build'
 7 | JAR_DIR='jar'
 8 | ANTLR_JAR='antlr-4.7.1-complete.jar'
 9 | RESULT_JAR='FuncParser-471.jar'
10 | 
11 | rm -rf ${RESULT_JAR}
12 | rm -rf ${BUILD_DIR}
13 | mkdir ${BUILD_DIR}
14 | 
15 | # Copy source files to build dir
16 | cp ./${SOURCE_DIR}/*.g4 ${BUILD_DIR}
17 | cp ./${SOURCE_DIR}/*.java ${BUILD_DIR}
18 | cp ./manifest.mf ${BUILD_DIR}
19 | cp ./${JAR_DIR}/$ANTLR_JAR ${BUILD_DIR}
20 | 
21 | cd ${BUILD_DIR}
22 | 
23 | # Generate Lexer and Parser from Grammar
24 | 
25 | java -cp ./${ANTLR_JAR} org.antlr.v4.Tool Module.g4 Function.g4
26 | 
27 | # Compile java-files
28 | #javac -cp ./${ANTLR_JAR_1} ./*.java -Xlint:unchecked
29 | javac -cp ./${ANTLR_JAR} ./*.java -Xlint:deprecation
30 | 
31 | # unpack ANTLR-jar since we need some of the class files
32 | jar xf ./${ANTLR_JAR}
33 | 
34 | # Create ${RESULT_JAR}
35 | #jar cvfm ${RESULT_JAR} manifest.mf *.class org > /dev/null
36 | jar cvfm ${RESULT_JAR} ../manifest.mf ./*.class org > out
37 | cp ${RESULT_JAR} ../
38 | 


--------------------------------------------------------------------------------
/FuncParser-opt/jar/antlr-4.7.1-complete.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/squizz617/vuddy/33cdab1ad04a6dcc76011b92821dbeb055c6691e/FuncParser-opt/jar/antlr-4.7.1-complete.jar


--------------------------------------------------------------------------------
/FuncParser-opt/manifest.mf:
--------------------------------------------------------------------------------
1 | Main-Class: Main
2 | 


--------------------------------------------------------------------------------
/FuncParser-opt/src/CoarseSimpleDecl.g4:
--------------------------------------------------------------------------------
 1 | grammar CoarseSimpleDecl;
 2 | 
 3 | import SimpleDecl;
 4 | 
 5 | // The following two contain 'water'-rules for expressions
 6 | 
 7 | init_declarator : declarator (('(' expr? ')') | ('=' assign_expr_w_))?;
 8 | declarator: ptrs? identifier type_suffix?;
 9 | 
10 | 
11 | type_suffix : ('[' constant_expr_w_ ']') | param_type_list;
12 | 
13 | // water rules for expressions
14 | 
15 | assign_expr_w_: assign_water*
16 |         (('{' assign_expr_w__l2 '}' | '(' assign_expr_w__l2 ')' | '[' assign_expr_w__l2 ']')
17 |              assign_water*)*;
18 | 
19 | assign_expr_w__l2: assign_water_l2* (('{' assign_expr_w__l2 '}' | '(' assign_expr_w__l2 ')' | '[' assign_expr_w__l2 ']')
20 |              assign_water_l2*)*;
21 | 
22 | constant_expr_w_: no_squares* ('[' constant_expr_w_ ']' no_squares*)*;
23 | 
24 | 


--------------------------------------------------------------------------------
/FuncParser-opt/src/Common.g4:
--------------------------------------------------------------------------------
  1 | grammar Common;
  2 | 
  3 | @header{
  4 |   import java.util.Stack;
  5 | }
  6 | 
  7 | 
  8 | @parser::members
  9 | {
 10 |             public boolean skipToEndOfObject()
 11 |             {
 12 |                 Stack<Object> CurlyStack = new Stack<Object>();
 13 |                 Object o = new Object();
 14 |                 int t = _input.LA(1);
 15 | 
 16 |                 while(t != EOF && !(CurlyStack.empty() && t == CLOSING_CURLY)){
 17 |                     
 18 |                     if(t == PRE_ELSE){
 19 |                         Stack<Object> ifdefStack = new Stack<Object>();
 20 |                         consume();
 21 |                         t = _input.LA(1);
 22 |                         
 23 |                         while(t != EOF && !(ifdefStack.empty() && (t == PRE_ENDIF))){
 24 |                             if(t == PRE_IF)
 25 |                                 ifdefStack.push(o);
 26 |                             else if(t == PRE_ENDIF)
 27 |                                 ifdefStack.pop();
 28 |                             consume();
 29 |                             t = _input.LA(1);
 30 |                         }
 31 |                     }
 32 |                     
 33 |                     if(t == OPENING_CURLY)
 34 |                         CurlyStack.push(o);
 35 |                     else if(t == CLOSING_CURLY)
 36 |                         CurlyStack.pop();
 37 |                     
 38 |                     consume();
 39 |                     t = _input.LA(1);
 40 |                 }
 41 |                 if(t != EOF)
 42 |                     consume();
 43 |                 return true;
 44 |             }
 45 | 
 46 |    // this should go into FunctionGrammar but ANTLR fails
 47 |    // to join the parser::members-section on inclusion
 48 |    
 49 |    public boolean preProcSkipToEnd()
 50 |    {
 51 |                 Stack<Object> CurlyStack = new Stack<Object>();
 52 |                 Object o = new Object();
 53 |                 int t = _input.LA(1);
 54 | 
 55 |                 while(t != EOF && !(CurlyStack.empty() && t == PRE_ENDIF)){
 56 |                                         
 57 |                     if(t == PRE_IF)
 58 |                         CurlyStack.push(o);
 59 |                     else if(t == PRE_ENDIF)
 60 |                         CurlyStack.pop();
 61 |                     
 62 |                     consume();
 63 |                     t = _input.LA(1);
 64 |                 }
 65 |                 if(t != EOF)
 66 |                     consume();
 67 |                 return true;
 68 |    }
 69 | 
 70 | }
 71 | 
 72 | unary_operator : '&' | '*' | '+'| '-' | '~' | '!';
 73 | relational_operator: ('<'|'>'|'<='|'>=');
 74 | 
 75 | constant
 76 |     :   HEX_LITERAL
 77 |     |   OCTAL_LITERAL
 78 |     |   DECIMAL_LITERAL
 79 | 	|	STRING
 80 |     |   CHAR
 81 |     |   FLOATING_POINT_LITERAL
 82 |     ;
 83 | 
 84 | // keywords & operators
 85 | 
 86 | function_decl_specifiers: ('inline' | 'virtual' | 'explicit' | 'friend' | 'static');
 87 | ptr_operator: ('*' | '&');
 88 | 
 89 | access_specifier: ('public' | 'private' | 'protected');
 90 | 
 91 | operator: (('new' | 'delete' ) ('[' ']')?)
 92 |   | '+' | '-' | '*' | '/' | '%' |'^' | '&' | '|' | '~'
 93 |   | '!' | '=' | '<' | '>' | '+=' | '-=' | '*='
 94 |   | '/=' | '%=' | '^=' | '&=' | '|=' | '>>'
 95 |   |'<<'| '>>=' | '<<=' | '==' | '!='
 96 |   | '<=' | '>=' | '&&' | '||' | '++' | '--'
 97 |   | ',' | '->*' | '->' | '(' ')' | '[' ']'
 98 |   ;
 99 | 
100 | assignment_operator: '=' | '*=' | '/=' | '%=' | '+=' | '-=' | '<<=' | '>>=' | '&=' | '^=' | '|='; 
101 | equality_operator: ('=='| '!=');
102 | 
103 | template_decl_start : TEMPLATE '<' template_param_list '>';
104 | 
105 | 
106 | // template water
107 | template_param_list : (('<' template_param_list '>') |
108 |                        ('(' template_param_list ')') | 
109 |                        no_angle_brackets_or_brackets)+
110 | ;
111 | 
112 | // water
113 | 
114 | no_brackets: ~('(' | ')');
115 | no_brackets_curlies_or_squares: ~('(' | ')' | '{' | '}' | '[' | ']');
116 | no_brackets_or_semicolon: ~('(' | ')' | ';');
117 | no_angle_brackets_or_brackets : ~('<' | '>' | '(' | ')');
118 | no_curlies: ~('{' | '}');
119 | no_squares: ~('[' | ']');
120 | no_squares_or_semicolon: ~('[' | ']' | ';');
121 | no_comma_or_semicolon: ~(',' | ';');
122 | 
123 | assign_water: ~('(' | ')' | '{' | '}' | '[' | ']' | ';' | ',');
124 | assign_water_l2: ~('(' | ')' | '{' | '}' | '[' | ']');
125 | 
126 | water: .;
127 | 


--------------------------------------------------------------------------------
/FuncParser-opt/src/Expressions.g4:
--------------------------------------------------------------------------------
 1 | grammar Expressions;
 2 | 
 3 | expr: assign_expr (',' expr)?;
 4 | 
 5 | assign_expr: conditional_expression (assignment_operator assign_expr)?;
 6 | conditional_expression: or_expression #normOr
 7 | 		      | or_expression ('?' expr ':' conditional_expression) #cndExpr;
 8 | 
 9 | 
10 | or_expression : and_expression ('||' or_expression)?;
11 | and_expression : inclusive_or_expression ('&&' and_expression)?;
12 | inclusive_or_expression: exclusive_or_expression ('|' inclusive_or_expression)?;
13 | exclusive_or_expression: bit_and_expression ('^' exclusive_or_expression)?;
14 | bit_and_expression: equality_expression ('&' bit_and_expression)?;
15 | equality_expression: relational_expression (equality_operator equality_expression)?;
16 | relational_expression: shift_expression (relational_operator relational_expression)?;
17 | shift_expression: additive_expression ( ('<<'|'>>') shift_expression)?;
18 | additive_expression: multiplicative_expression (('+'| '-') additive_expression)?;
19 | multiplicative_expression: cast_expression ( ('*'| '/'| '%') multiplicative_expression)?;
20 | 
21 | cast_expression: ('(' cast_target ')' cast_expression)
22 |                | unary_expression
23 | ;
24 | 
25 | cast_target: type_name ptr_operator*;
26 | 
27 | // currently does not implement delete
28 | 
29 | unary_expression: inc_dec cast_expression
30 |                 | unary_op_and_cast_expr
31 |                 | sizeof_expression 
32 |                 | new_expression
33 |                 | postfix_expression
34 |                 ;
35 | 
36 | new_expression: '::'? NEW type_name '[' conditional_expression? ']' 
37 |               | '::'? NEW type_name '(' expr? ')'
38 |               ;
39 | 
40 | unary_op_and_cast_expr: unary_operator cast_expression;
41 | 
42 | sizeof_expression: sizeof '(' sizeof_operand ')'
43 |                  | sizeof sizeof_operand2;
44 | 
45 | sizeof: 'sizeof';
46 | 
47 | sizeof_operand: type_name ptr_operator *;
48 | sizeof_operand2: unary_expression;
49 | 
50 | inc_dec: ('--' | '++');
51 | 
52 | // this is a bit misleading. We're just allowing access_specifiers
53 | // here because C programs can use 'public', 'protected' or 'private'
54 | // as variable names.
55 | 
56 | postfix_expression: postfix_expression '[' expr ']' #arrayIndexing
57 |                   | postfix_expression '(' function_argument_list ')' #funcCall
58 |                   | postfix_expression '.' TEMPLATE? (identifier) #memberAccess
59 |                   | postfix_expression '->' TEMPLATE? (identifier) #ptrMemberAccess
60 |                   | postfix_expression inc_dec #incDecOp
61 |                   | primary_expression # primaryOnly
62 |                   ;
63 | 
64 | function_argument_list: ( function_argument (',' function_argument)* )?;
65 | function_argument: assign_expr;
66 | 
67 | 
68 | primary_expression: identifier | constant | '(' expr ')';
69 | 
70 | 


--------------------------------------------------------------------------------
/FuncParser-opt/src/FineSimpleDecl.g4:
--------------------------------------------------------------------------------
 1 | grammar FineSimpleDecl;
 2 | 
 3 | import SimpleDecl;
 4 | 
 5 | init_declarator: declarator '(' expr? ')' #initDeclWithCall
 6 |                | declarator '=' initializer #initDeclWithAssign
 7 |                | declarator #initDeclSimple
 8 |                ;
 9 | 
10 | declarator: ptrs? identifier type_suffix?;
11 | 
12 | type_suffix : ('[' conditional_expression? ']') | param_type_list;
13 | 
14 | 


--------------------------------------------------------------------------------
/FuncParser-opt/src/Function.g4:
--------------------------------------------------------------------------------
 1 | grammar Function;
 2 | import ModuleLex, Common, Expressions, FineSimpleDecl;
 3 | /*
 4 | @header{
 5 | 	package antlr.C;
 6 | }
 7 | */
 8 | 
 9 | statements: (pre_opener
10 |             | pre_closer
11 |             | pre_else {preProcSkipToEnd(); }
12 |             | statement)*;
13 | 
14 | statement: opening_curly
15 |          | closing_curly
16 |          | block_starter
17 |          | jump_statement
18 |          | label 
19 |          | simple_decl
20 |          | expr_statement
21 |          | water
22 |         ;
23 | 
24 | pre_opener: PRE_IF;
25 | pre_else: PRE_ELSE;
26 | pre_closer: PRE_ENDIF;
27 | opening_curly: '{';
28 | closing_curly: '}';
29 |                 
30 | block_starter: selection_or_iteration;
31 | 
32 | selection_or_iteration: TRY                      #Try_statement
33 |                       | CATCH '(' param_type ')' #Catch_statement
34 |                       | IF '(' condition ')'     #If_statement
35 |                       | ELSE                     #Else_statement
36 |                       | SWITCH '(' condition ')' #Switch_statement
37 |                       | FOR '(' (for_init_statement | ';') condition? ';'  expr? ')' #For_statement
38 |                       | DO                          #Do_statement
39 |                       | WHILE '(' condition ')'     #While_statement
40 | ;
41 | 
42 | // Don't know why, but: introducing this unused rule results
43 | // in a performance boost.
44 | 
45 | do_statement1: DO statement WHILE '(' expr ')';
46 | 
47 | for_init_statement : simple_decl
48 |                    | expr ';'
49 |                    ;
50 | 
51 | jump_statement: BREAK ';'		#breakStatement
52 |               | CONTINUE ';' 		#continueStatement
53 |               | GOTO identifier ';'	#gotoStatement
54 |               | RETURN expr? ';'	#returnStatement
55 |               ;
56 | 
57 | label: CASE? (identifier | number | CHAR ) ':' ;
58 | 
59 | expr_statement: expr? ';';
60 | 
61 | condition: expr
62 | 	 | type_name declarator '=' assign_expr;
63 | 


--------------------------------------------------------------------------------
/FuncParser-opt/src/FunctionDef.g4:
--------------------------------------------------------------------------------
 1 | grammar FunctionDef;
 2 | import ModuleLex, Function;
 3 | 
 4 | function_def : template_decl_start? return_type? function_name
 5 |             function_param_list ctor_list? compound_statement;
 6 | 
 7 | return_type : (function_decl_specifiers* type_name) ptr_operator*;
 8 | 
 9 | function_param_list : '(' parameter_decl_clause? ')' CV_QUALIFIER* exception_specification?;
10 | 
11 | parameter_decl_clause: (parameter_decl (',' parameter_decl)*) (',' '...')?
12 |                      | VOID;
13 | parameter_decl : param_decl_specifiers parameter_id | param_decl_specifiers | parameter_id;
14 | parameter_id: ptrs? ('(' parameter_id ')' | parameter_name) type_suffix?;
15 | 
16 | compound_statement: OPENING_CURLY { skipToEndOfObject(); };
17 | //compound_statement: statements;
18 | 
19 | ctor_list: ':'  ctor_initializer (',' ctor_initializer)*;
20 | ctor_initializer:  initializer_id ctor_expr;
21 | initializer_id : '::'? identifier;
22 | ctor_expr:  '(' expr? ')';
23 | 
24 | function_name: '(' function_name ')' | identifier | OPERATOR operator;
25 | 
26 | exception_specification : THROW '(' type_id_list ')';
27 | type_id_list: no_brackets* ('(' type_id_list ')' no_brackets*)*;
28 | 


--------------------------------------------------------------------------------
/FuncParser-opt/src/Main.java:
--------------------------------------------------------------------------------
  1 | 
  2 | import org.antlr.v4.runtime.ANTLRFileStream;
  3 | import org.antlr.v4.runtime.ANTLRInputStream;
  4 | import org.antlr.v4.runtime.CommonTokenStream;
  5 | import org.antlr.v4.runtime.tree.ParseTree;
  6 | import org.antlr.v4.runtime.tree.ParseTreeListener;
  7 | import org.antlr.v4.runtime.Parser;
  8 | //import org.antlr.v4.runtime.tree.TerminalNode;
  9 | import org.antlr.v4.runtime.tree.*;
 10 | import org.antlr.v4.runtime.ParserRuleContext;
 11 | import org.antlr.v4.runtime.RuleContext;
 12 | import org.antlr.v4.runtime.misc.Utils;
 13 | import org.antlr.v4.runtime.misc.ParseCancellationException;
 14 | 
 15 | import org.antlr.v4.runtime.CharStream;
 16 | import org.antlr.v4.runtime.misc.Interval;
 17 | 
 18 | import org.antlr.v4.runtime.DefaultErrorStrategy;
 19 | import org.antlr.v4.runtime.BailErrorStrategy;
 20 | import org.antlr.v4.runtime.atn.PredictionMode;
 21 | 
 22 | import org.antlr.v4.runtime.Token;
 23 | 
 24 | import java.io.*;
 25 | import java.util.List;
 26 | import java.util.Arrays;
 27 | import java.util.ArrayList;
 28 | 
 29 | import java.util.concurrent.ExecutorService;
 30 | import java.util.concurrent.Executors;
 31 | import java.util.concurrent.Future;
 32 | import java.util.concurrent.Callable;
 33 | 
 34 | 
 35 | public class Main {
 36 | 	public static void main(String[] args) throws IOException {
 37 | 		List<function> ret;
 38 | 		long t1, t2, t3;
 39 | 		try {
 40 | 			String inputFilename = parseCommandLine(args);
 41 | 			String bParseBody = "1"; // "1": with body parser, "0": without body parser
 42 | 			if (args.length > 1)
 43 | 				bParseBody = args[args.length - 1];
 44 | 			if (!bParseBody.equals("0") && !bParseBody.equals("1"))
 45 | 				throw new Exception("argument bParseBody(last argument) required.");
 46 | 			
 47 | 			//System.out.println("processors: " + Runtime.getRuntime().availableProcessors());
 48 | 			t1 = System.currentTimeMillis();
 49 | 			if (bParseBody.equals("1")) {
 50 | 				TreeParser tp = new TreeParser();
 51 | 				ret = tp.ParseFile(inputFilename);
 52 | 			}
 53 | 			else {
 54 | 				TreeParser1 tp = new TreeParser1();
 55 | 				ret = tp.ParseFile(inputFilename);
 56 | 			}
 57 | 			t2 = System.currentTimeMillis();
 58 | 		} catch (Exception e) {
 59 | 			e.printStackTrace();
 60 | 			return;
 61 | 		}
 62 | 		
 63 | 		print_functions_all(ret); // print_functions() or print_functions_all()
 64 | 		t3 = System.currentTimeMillis();
 65 | 		
 66 | 		//System.out.println("parse " + (t2 - t1) / 1000.0);
 67 | 		//System.out.println("print " + (t3 - t2) / 1000.0);
 68 | 	}
 69 | 	
 70 | 	// Print all elements in function class seperated with CR, LF, TAB.
 71 | 	// Please refer function.toString() method.
 72 | 	private static void print_functions_all(List<function> func_list) {
 73 | 		for (function f : func_list)
 74 | 			System.out.print(f);
 75 | 	}
 76 | 	
 77 | 	// Print name, line, parameter, variable, datatype and funccallee in function class.
 78 | 	private static void print_functions(List<function> func_list) {
 79 | 		System.out.println("func_list.size(): " + func_list.size());
 80 | 		int i = 0;
 81 | 		for (function f : func_list) {
 82 | 			System.out.println("" + (i++) + ": " + f.name + 
 83 | 					"(" + f.lineStart + ", " + f.lineStop + ")");
 84 | 			
 85 | 			System.out.print("  PARAM\t[");
 86 | 			for (String element : f.parameterList)
 87 | 				System.out.print(element + ", ");
 88 | 			System.out.println("]");
 89 | 			
 90 | 			System.out.print("  LVARS\t[");
 91 | 			for (String element : f.variableList)
 92 | 				System.out.print(element + ", ");
 93 | 			System.out.println("]");
 94 | 			
 95 | 			System.out.print("  DTYPE\t[");
 96 | 			for (String element : f.dataTypeList)
 97 | 				System.out.print(element + ", ");
 98 | 			System.out.println("]");
 99 | 			
100 | 			System.out.print("  CALLS\t[");
101 | 			for (String element : f.funcCalleeList)
102 | 				System.out.print(element + ", ");
103 | 			System.out.println("]\n");
104 | 		}
105 | 	}
106 | 	
107 | 	private static String parseCommandLine(String[] args) throws Exception {
108 | 		if(args.length < 1) {
109 | 			throw new Exception("filename required.");
110 | 		}
111 | 		
112 | 		return args[0];
113 | 	}
114 | }
115 | 
116 | class function {
117 | 	public String parentFile;
118 | 	public int parentNumLoc = 0;
119 | 	public String name;
120 | 	public int lineStart = 0;
121 | 	public int lineStop = 0;
122 | 	public int funcId = 0;
123 | 	public String funcBody;
124 | 	
125 | 	public List<String> parameterList;
126 | 	public List<String> variableList;
127 | 	public List<String> dataTypeList;
128 | 	public List<String> funcCalleeList;
129 | 	
130 | 	function(String fileName) {
131 | 		this.parentFile = fileName;
132 | 		this.parameterList = new ArrayList<String>();
133 | 		this.variableList = new ArrayList<String>();
134 | 		this.dataTypeList = new ArrayList<String>();
135 | 		this.funcCalleeList = new ArrayList<String>();
136 | 	}
137 | 	
138 | 	public String toString() {
139 | 		StringBuilder ret = new StringBuilder();
140 | 		
141 | 		ret.append("\r\0?\r?\0\r"); // function string start
142 | 		ret.append('\n');
143 | 		
144 | 		ret.append(parentFile);
145 | 		ret.append('\n');
146 | 		
147 | 		ret.append(String.valueOf(parentNumLoc));
148 | 		ret.append('\n');
149 | 		
150 | 		ret.append(name);
151 | 		ret.append('\n');
152 | 		
153 | 		ret.append(String.valueOf(lineStart));
154 | 		ret.append('\t');
155 | 		ret.append(String.valueOf(lineStop));
156 | 		ret.append('\n');
157 | 		
158 | 		ret.append(String.valueOf(funcId));
159 | 		ret.append('\n');
160 | 		
161 | 		for (String s : this.parameterList) {
162 | 			ret.append(s);
163 | 			ret.append('\t');
164 | 		}
165 | 		ret.append('\n');
166 | 		for (String s : this.variableList) {
167 | 			ret.append(s);
168 | 			ret.append('\t');
169 | 		}
170 | 		ret.append('\n');
171 | 		for (String s : this.dataTypeList) {
172 | 			ret.append(s);
173 | 			ret.append('\t');
174 | 		}
175 | 		ret.append('\n');
176 | 		for (String s : this.funcCalleeList) {
177 | 			ret.append(s);
178 | 			ret.append('\t');
179 | 		}
180 | 		ret.append('\n');
181 | 		
182 | 		ret.append(this.funcBody);
183 | 		ret.append('\n');
184 | 		
185 | 		return ret.toString();
186 | 	}	
187 | }
188 | 
189 | class JobInstance implements Callable<function> {
190 | 	public function functionInstance;
191 | 	public String string;
192 | 	public int line;
193 | 	public int enableSLL;
194 | 	
195 | 	public JobInstance(String s, function f, int l, int e) {
196 | 		this.functionInstance = f;
197 | 		this.string = s;
198 | 		this.line = l;
199 | 		this.enableSLL = e;
200 | 	}
201 | 	
202 | 	public function call() throws Exception {
203 | 		//System.err.println("call() called: " + Thread.currentThread().getName());
204 | 		BodyParser p = new BodyParser();
205 | 		p.ParseString(this.string, this.functionInstance, this.line, this.enableSLL);
206 | 		return this.functionInstance;
207 | 	}
208 | }
209 | 
210 | class BodyParser implements ParseTreeListener {
211 | 	private static int IS_FIRST = 1;
212 | 	
213 | 	public final static int FUNCTION_DEF = 0;
214 | 	public final static int FUNCTION_NAME = 1;
215 | 	public final static int PARAMETER_NAME = 2;
216 | 	public final static int DECLARATOR = 3;
217 | 	public final static int TYPE_NAME = 4;
218 | 	public final static int FUNCTION_CALL = 5;
219 | 	public final static int COMPOUND_STMT = 6;
220 | 	
221 | 	private final static String[] table = {"function_def", "function_name", "parameter_name", 
222 | 				"declarator", "type_name", "identifier", "compound_statement"};
223 | 	private static int[] IDX = {0, 0, 0, 0, 0, 0, 0};
224 | 	
225 | 	private static List<String> ruleNames;
226 | 	
227 | 	private function functionInstance = null;
228 | 	
229 | 	// Function body's base line
230 | 	private int defaultLine = 0;
231 | 	
232 | 	// Local variable's name
233 | 	private int declaratorFlag = 0;
234 | 	private StringBuilder declaratorStr = new StringBuilder();
235 | 	
236 | 	// type (return type, parameter type, local variable type)
237 | 	private int typeNameFlag = 0;
238 | 	private StringBuilder typeNameStr = new StringBuilder();
239 | 	
240 | 	private int funcCallFlag = 0;
241 | 	private StringBuilder funcCallStr = new StringBuilder();
242 | 	
243 | 	// set SLL option
244 | 	private int enableSLL = 0;
245 | 
246 | 	public BodyParser() {
247 | 		this.functionInstance = null;
248 | 		
249 | 		this.defaultLine = 0;
250 | 
251 | 		this.declaratorFlag = 0;
252 | 		this.declaratorStr = new StringBuilder();
253 | 		
254 | 		// type (return type, parameter type, local variable type)
255 | 		this.typeNameFlag = 0;
256 | 		this.typeNameStr = new StringBuilder();
257 | 		
258 | 		this.funcCallFlag = 0;
259 | 		this.funcCallStr = new StringBuilder();
260 | 		
261 | 		// set SLL option
262 | 		this.enableSLL = 0;
263 | 	}
264 | 	
265 | 	private void _init(FunctionParser parser) {
266 | 		//this();
267 | 		this.functionInstance = null;
268 | 		
269 | 		this.defaultLine = 0;
270 | 
271 | 		this.declaratorFlag = 0;
272 | 		this.declaratorStr = new StringBuilder();
273 | 		
274 | 		this.typeNameFlag = 0;
275 | 		this.typeNameStr = new StringBuilder();
276 | 		
277 | 		this.funcCallFlag = 0;
278 | 		this.funcCallStr = new StringBuilder();
279 | 		
280 | 		this.enableSLL = 0;
281 | 		
282 | 		if (BodyParser.IS_FIRST != 0) {
283 | 			this.ruleNames = Arrays.asList(parser.getRuleNames());
284 | 			
285 | 			for (int i = 0; i < parser.ruleNames.length; i++) {
286 | 				for (int j = 0; j < BodyParser.table.length; j++) {
287 | 					if (parser.ruleNames[i].equals(BodyParser.table[j]))
288 | 						BodyParser.IDX[j] = i;
289 | 				}
290 | 			}
291 | 			BodyParser.IS_FIRST = 0;
292 | 		}
293 | 	}
294 | 		
295 | 	public void ParseString(String string, function functionInstance) {
296 | 		this.ParseString(string, functionInstance, 0);
297 | 	}
298 | 	public void ParseString(String string, function functionInstance, int line) {
299 | 		this.ParseString(string, functionInstance, line, 1);
300 | 	}
301 | 	public void ParseString(String string, function funcinstance, int line, int bSLL) {
302 | 		try {
303 | 			ANTLRInputStream input = new ANTLRInputStream(string);
304 | 			FunctionLexer lexer = new FunctionLexer(input);
305 | 			CommonTokenStream tokens = new CommonTokenStream(lexer);
306 | 			FunctionParser parser = new FunctionParser(tokens);
307 | 			parser.removeErrorListeners(); // remove error listener
308 | 			
309 | 			if (bSLL != 0) {
310 | 				//print "start parsing in BodyParser class with SLL mode"
311 | 				parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
312 | 				parser.setErrorHandler(new BailErrorStrategy());
313 | 			}
314 | 			
315 | 			ParseTree tree;
316 | 			try {
317 | 				tree = parser.statements();
318 | 			}
319 | 			catch (ParseCancellationException e) {
320 | 				//print "Exception found in BodyParser class. set LL mode"
321 | 				parser.reset();
322 | 				parser.getInterpreter().setPredictionMode(PredictionMode.LL);
323 | 				parser.setErrorHandler(new DefaultErrorStrategy());
324 | 				tree = parser.statements();
325 | 			}
326 | 			this._init(parser); // reset before traverse a parse tree
327 | 			this.enableSLL = bSLL;
328 | 			this.functionInstance = funcinstance;
329 | 			
330 | 			if (line != 0) // if line is zero, self.defaultLine is also zero
331 | 				this.defaultLine = (line - 1);
332 | 			
333 | 			//ParseTreeWalker ptw = new ParseTreeWalker();
334 | 			//ptw.walk(this, tree);
335 | 			ParseTreeWalker.DEFAULT.walk(this, tree);
336 | 		}
337 | 		catch (Exception e) {
338 | 			e.printStackTrace();
339 | 		}
340 | 		return;
341 | 	}
342 | 	
343 | 	@Override
344 | 	public void enterEveryRule(ParserRuleContext ctx) {
345 | 		int ruleIndex = ctx.getRuleIndex();
346 | 		
347 | 		if (ruleIndex == BodyParser.IDX[BodyParser.DECLARATOR])
348 | 			this.declaratorFlag = 1;
349 | 		else if (ruleIndex == BodyParser.IDX[BodyParser.TYPE_NAME])
350 | 			this.typeNameFlag = 1;
351 | 		else if (ruleIndex == BodyParser.IDX[BodyParser.FUNCTION_CALL])
352 | 			this.funcCallFlag = 1;
353 | 	}
354 | 	
355 | 	
356 | 	@Override
357 | 	public void exitEveryRule(ParserRuleContext ctx) {
358 | 		int ruleIndex = ctx.getRuleIndex();
359 | 		
360 | 		if (ruleIndex == BodyParser.IDX[BodyParser.DECLARATOR] && this.declaratorFlag != 0) {// useless if-statement (because, enter declarator -> exit identifier)
361 | 			//print "LVAR"
362 | 			this.functionInstance.variableList.add(this.declaratorStr.toString().trim());
363 | 			this.declaratorFlag = 0;
364 | 			this.declaratorStr.setLength(0);
365 | 		}
366 | 		else if (ruleIndex == BodyParser.IDX[BodyParser.TYPE_NAME] && this.typeNameFlag != 0) {
367 | 			//print "DTYPE"
368 | 			this.functionInstance.dataTypeList.add(this.typeNameStr.toString().trim());
369 | 			this.typeNameFlag = 0;
370 | 			this.typeNameStr.setLength(0);
371 | 		}
372 | 		else if (ruleIndex == BodyParser.IDX[BodyParser.FUNCTION_CALL] && this.funcCallFlag != 0) {
373 | 			//print "CALL"
374 | 			if (this.funcCallFlag == 2)
375 | 				this.functionInstance.funcCalleeList.add(this.funcCallStr.toString().trim());
376 | 			this.funcCallFlag = 0;
377 | 			this.funcCallStr.setLength(0);
378 | 			
379 | 			if (this.declaratorFlag != 0) {// [enter declarator -> exit identifier]: avoid "a [ 1 ]" in local variable name
380 | 				this.functionInstance.variableList.add(this.declaratorStr.toString().trim());
381 | 				this.declaratorFlag = 0;
382 | 				this.declaratorStr.setLength(0);
383 | 			}
384 | 		}
385 | 	}
386 | 		
387 | 	@Override
388 | 	public void visitTerminal(TerminalNode node) {
389 | 		if (this.declaratorFlag != 0) {
390 | 			String tmpText = Trees.getNodeText(node, this.ruleNames);
391 | 			
392 | 			if (!tmpText.equals("*")) {
393 | 				this.declaratorStr.append(tmpText);
394 | 				this.declaratorStr.append(' ');
395 | 			}
396 | 		}
397 | 		else if (this.typeNameFlag != 0) {
398 | 			this.typeNameStr.append(Trees.getNodeText(node, this.ruleNames));
399 | 			this.typeNameStr.append(' ');
400 | 		}
401 | 		else if (this.funcCallFlag != 0) {
402 | 			try {
403 | 				ParseTree p1 = node.getParent().getParent().getParent().getParent();
404 | 				
405 | 				//System.out.println("-----funcCallFlag: " + p1.getClass());
406 | 				if (p1 instanceof FunctionParser.FuncCallContext) {
407 | 					//System.out.println("found");
408 | 					this.funcCallStr.append(Trees.getNodeText(node ,this.ruleNames));
409 | 					this.funcCallStr.append(' ');
410 | 					this.funcCallFlag = 2;
411 | 				}
412 | 			}
413 | 			catch (Exception e) { // useless?
414 | 				//System.out.println("-----funcCallFlag: Exception found");
415 | 			}
416 | 		}
417 | 	}
418 | 	
419 | 	@Override
420 | 	public void visitErrorNode(ErrorNode node) { }
421 | }
422 | 
423 | class TreeParser implements ParseTreeListener {
424 | 	private static int IS_FIRST = 1;
425 | 	
426 | 	public final static int FUNCTION_DEF = 0;
427 | 	public final static int FUNCTION_NAME = 1;
428 | 	public final static int PARAMETER_NAME = 2;
429 | 	public final static int DECLARATOR = 3;
430 | 	public final static int TYPE_NAME = 4;
431 | 	public final static int FUNCTION_CALL = 5;
432 | 	public final static int COMPOUND_STMT = 6;
433 | 	
434 | 	private final static String[] table = {"function_def", "function_name", "parameter_name", 
435 | 				"declarator", "type_name", "identifier", "compound_statement"};
436 | 	private static int[] IDX = {0, 0, 0, 0, 0, 0, 0};
437 | 	
438 | 	private static List<String> ruleNames;
439 | 	
440 | 	private ExecutorService executorService;
441 | 	private List<Future<function>> future_list = new ArrayList<Future<function>>(); // for multithread
442 | 	//private List<JobInstance> job_list = new ArrayList<JobInstance>(); // for singlethread
443 | 	private function functionInstance = null;
444 | 	
445 | 	// Function's name
446 | 	private int funcNameFlag = 0;
447 | 	private StringBuilder funcNameStr = new StringBuilder();
448 | 	
449 | 	// Function parameter's name
450 | 	private int paramNameFlag = 0;
451 | 	private StringBuilder paramNameStr = new StringBuilder(); //final?
452 | 	
453 | 	// type (return type, parameter type, local variable type)
454 | 	private int typeNameFlag = 0;
455 | 	private StringBuilder typeNameStr = new StringBuilder();
456 | 	
457 | 	// function definition
458 | 	private int funcDefFlag = 0;
459 | 	
460 | 	// function body (compund_statement)
461 | 	private int compoundStmtFlag = 0;
462 | 	
463 | 	private String srcFileName;
464 | 	private int numLines = 0;
465 | 	
466 | 	// set SLL option
467 | 	private int enableSLL = 0;
468 | 	
469 | 	
470 | 	public TreeParser() {
471 | 		this.functionInstance = null;
472 | 		
473 | 		this.funcNameFlag = 0;
474 | 		this.funcNameStr = new StringBuilder();
475 | 		
476 | 		this.paramNameFlag = 0;
477 | 		this.paramNameStr = new StringBuilder();
478 | 		
479 | 		this.typeNameFlag = 0;
480 | 		this.typeNameStr = new StringBuilder();
481 | 		
482 | 		this.funcDefFlag = 0;
483 | 		
484 | 		this.compoundStmtFlag = 0;
485 | 		
486 | 		this.enableSLL = 0;
487 | 	}
488 | 	
489 | 	private void _init(ModuleParser parser) {
490 | 		//this();
491 | 		this.executorService = Executors.newFixedThreadPool(
492 | 			Runtime.getRuntime().availableProcessors()
493 | 		);
494 | 		
495 | 		this.functionInstance = null;
496 | 		
497 | 		this.funcNameFlag = 0;
498 | 		this.funcNameStr = new StringBuilder();
499 | 		
500 | 		this.paramNameFlag = 0;
501 | 		this.paramNameStr = new StringBuilder();
502 | 		
503 | 		this.typeNameFlag = 0;
504 | 		this.typeNameStr = new StringBuilder();
505 | 		
506 | 		this.funcDefFlag = 0;
507 | 		
508 | 		this.compoundStmtFlag = 0;
509 | 		
510 | 		this.enableSLL = 0;
511 | 		
512 | 		
513 | 		if (TreeParser.IS_FIRST != 0) {
514 | 			this.ruleNames = Arrays.asList(parser.getRuleNames());
515 | 			
516 | 			for (int i = 0; i < parser.ruleNames.length; i++) {
517 | 				for (int j = 0; j < TreeParser.table.length; j++) {
518 | 					if (parser.ruleNames[i].equals(TreeParser.table[j]))
519 | 						TreeParser.IDX[j] = i;
520 | 				}
521 | 			}
522 | 		}
523 | 		TreeParser.IS_FIRST = 0;
524 | 	}
525 | 	
526 | 	public List<function> ParseFile(String srcFileName) {
527 | 		return this.ParseFile(srcFileName, 1);
528 | 	}
529 | 	public List<function> ParseFile(String srcFileName, int bSLL) {
530 | 		List<function> ret = new ArrayList<function>();
531 | 		try {
532 | 			ANTLRFileStream  antlrFileStream = new ANTLRFileStream(srcFileName);
533 | 			ModuleLexer lexer = new ModuleLexer(antlrFileStream);
534 | 			CommonTokenStream tokens = new CommonTokenStream(lexer);
535 | 			ModuleParser parser = new ModuleParser(tokens);
536 | 			parser.removeErrorListeners(); // remove error listener
537 | 			
538 | 			if (bSLL != 0) {
539 | 				parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
540 | 				parser.setErrorHandler(new BailErrorStrategy());
541 | 			}
542 | 			
543 | 			//long t1 = System.currentTimeMillis();
544 | 			ParseTree tree;
545 | 			try {
546 | 				tree = parser.code();
547 | 			}
548 | 			catch (ParseCancellationException e) {
549 | 				parser.reset();
550 | 				parser.getInterpreter().setPredictionMode(PredictionMode.LL);
551 | 				parser.setErrorHandler(new DefaultErrorStrategy());
552 | 				tree = parser.code();
553 | 			}
554 | 			//long t2 = System.currentTimeMillis();
555 | 			//System.err.println("time: " + (t2 - t1) / 1000.0);
556 | 			this._init(parser); // reset before traverse a parse tree
557 | 			this.enableSLL = bSLL;
558 | 			
559 | 			LineNumberReader lnr = new LineNumberReader(new FileReader(new File(srcFileName)));
560 | 			while (lnr.skip(Long.MAX_VALUE) > 0);
561 | 			this.numLines = lnr.getLineNumber() + 1;
562 | 			lnr.close();
563 | 			
564 | 			this.srcFileName = new String(srcFileName);
565 | 			
566 | 			ParseTreeWalker.DEFAULT.walk(this, tree);
567 | 			
568 | 			//System.err.println("before get()");
569 | 			//long t3 = System.currentTimeMillis();
570 | 			for (Future<function> future : this.future_list) {
571 | 				ret.add(future.get());
572 | 			}
573 | 			//long t4 = System.currentTimeMillis();
574 | 			//System.err.println("time: " + (t4 - t3) / 1000.0);
575 | 			//System.err.println("after get()");
576 | 			/*
577 | 			for (int i = 0; i < job_list.size(); i++) { // singlethread
578 | 				JobInstance j = job_list.get(i);
579 | 				BodyParser p = new BodyParser();
580 | 				p.ParseString(j.string, j.functionInstance, j.line, j.enableSLL);
581 | 				ret.add(j.functionInstance);
582 | 			}
583 | 			*/
584 | 
585 | 		} catch (Exception e) {
586 | 			e.printStackTrace();
587 | 			this.executorService.shutdownNow();
588 | 			return null;
589 | 		}
590 | 		this.executorService.shutdown();
591 | 		return ret;
592 | 	}
593 | 	
594 | 	@Override
595 | 	public void enterEveryRule(ParserRuleContext ctx) {
596 | 		int ruleIndex = ctx.getRuleIndex();
597 | 		
598 | 		if (ruleIndex == TreeParser.IDX[TreeParser.FUNCTION_DEF]) {
599 | 			this.funcDefFlag = 1;
600 | 			this.functionInstance = new function(this.srcFileName);
601 | 			this.functionInstance.parentNumLoc = this.numLines;
602 | 			this.functionInstance.funcId = this.future_list.size() + 1;
603 | 			this.functionInstance.lineStart = ctx.getStart().getLine();
604 | 			this.functionInstance.lineStop = ctx.getStop().getLine();
605 | 		}
606 | 		else if (this.funcDefFlag == 0)
607 | 			return;
608 | 		else if (ruleIndex == TreeParser.IDX[TreeParser.FUNCTION_NAME])
609 | 			this.funcNameFlag = 1;
610 | 		else if (ruleIndex == TreeParser.IDX[TreeParser.PARAMETER_NAME])
611 | 			this.paramNameFlag = 1;
612 | 		else if (ruleIndex == TreeParser.IDX[TreeParser.TYPE_NAME])
613 | 			this.typeNameFlag = 1;
614 | 		else if (ruleIndex == TreeParser.IDX[TreeParser.COMPOUND_STMT])
615 | 			this.compoundStmtFlag = 1;
616 | 	}
617 | 	
618 | 	@Override
619 | 	public void exitEveryRule(ParserRuleContext ctx) {
620 | 		int ruleIndex = ctx.getRuleIndex();
621 | 		
622 | 		if (ruleIndex == TreeParser.IDX[TreeParser.FUNCTION_DEF] && this.funcDefFlag != 0)
623 | 			this.funcDefFlag = 0;
624 | 		else if (ruleIndex == TreeParser.IDX[TreeParser.FUNCTION_NAME] && this.funcNameFlag != 0) {
625 | 			this.functionInstance.name = this.funcNameStr.toString().trim();
626 | 			this.funcNameFlag = 0;
627 | 			this.funcNameStr.setLength(0);
628 | 		}
629 | 		else if (ruleIndex == TreeParser.IDX[TreeParser.PARAMETER_NAME] && this.paramNameFlag != 0) {
630 | 			this.functionInstance.parameterList.add(this.paramNameStr.toString().trim());
631 | 			this.paramNameFlag = 0;
632 | 			this.paramNameStr.setLength(0);
633 | 		}
634 | 		else if (ruleIndex == TreeParser.IDX[TreeParser.TYPE_NAME] && this.typeNameFlag != 0) {
635 | 			this.functionInstance.dataTypeList.add(this.typeNameStr.toString().trim());
636 | 			this.typeNameFlag = 0;
637 | 			this.typeNameStr.setLength(0);
638 | 		}
639 | 		else if (ruleIndex == TreeParser.IDX[TreeParser.COMPOUND_STMT] && this.compoundStmtFlag != 0) {
640 | 			this.compoundStmtFlag = 0;
641 | 			
642 | 			CharStream inputStream = ctx.start.getInputStream();
643 | 			int start_index = ctx.start.getStopIndex();
644 | 			int stop_index = ctx.stop.getStopIndex();
645 | 			String string = inputStream.getText(new Interval(start_index + 1, stop_index - 1));
646 | 			int line = ctx.start.getLine();
647 | 			
648 | 			// add function's body
649 | 			this.functionInstance.funcBody = string;
650 | 			
651 | 			//this.job_list.add(new JobInstance(string, this.functionInstance, line, this.enableSLL)); // for singlethread
652 | 			this.future_list.add(
653 | 				this.executorService.submit(new JobInstance(string, this.functionInstance, line, this.enableSLL))
654 | 			); // for multithread
655 | 		}
656 | 	}
657 | 	
658 | 	@Override
659 | 	public void visitTerminal(TerminalNode node) {
660 | 		if (this.compoundStmtFlag != 0 || this.funcDefFlag == 0)
661 | 			return;
662 | 		else if (this.funcNameFlag != 0) {
663 | 			this.funcNameStr.append(Trees.getNodeText(node, this.ruleNames));
664 | 			this.funcNameStr.append(' ');
665 | 		}
666 | 		else if (this.paramNameFlag != 0) {
667 | 			this.paramNameStr.append(Trees.getNodeText(node, this.ruleNames));
668 | 			this.paramNameStr.append(' ');
669 | 		}
670 | 		else if (this.typeNameFlag != 0) {
671 | 			this.typeNameStr.append(Trees.getNodeText(node, this.ruleNames));
672 | 			this.typeNameStr.append(' ');
673 | 		}
674 | 	}
675 | 	
676 | 	@Override
677 | 	public void visitErrorNode(ErrorNode node) { }
678 | }
679 | 
680 | class TreeParser1 implements ParseTreeListener {
681 | 	private static int IS_FIRST = 1;
682 | 	
683 | 	public final static int FUNCTION_DEF = 0;
684 | 	public final static int FUNCTION_NAME = 1;
685 | 	public final static int PARAMETER_NAME = 2;
686 | 	public final static int DECLARATOR = 3;
687 | 	public final static int TYPE_NAME = 4;
688 | 	public final static int FUNCTION_CALL = 5;
689 | 	public final static int COMPOUND_STMT = 6;
690 | 	
691 | 	private final static String[] table = {"function_def", "function_name", "parameter_name", 
692 | 				"declarator", "type_name", "identifier", "compound_statement"};
693 | 	private static int[] IDX = {0, 0, 0, 0, 0, 0, 0};
694 | 	
695 | 	private static List<String> ruleNames;
696 | 	
697 | 	//private ExecutorService executorService;
698 | 	//private List<Future<function>> future_list = new ArrayList<Future<function>>(); // for multithread
699 | 	//private List<JobInstance> job_list = new ArrayList<JobInstance>(); // for singlethread
700 | 	
701 | 	private List<function> ret;
702 | 	
703 | 	private function functionInstance = null;
704 | 	
705 | 	// Function's name
706 | 	private int funcNameFlag = 0;
707 | 	private StringBuilder funcNameStr = new StringBuilder();
708 | 	
709 | 	// Function parameter's name
710 | 	private int paramNameFlag = 0;
711 | 	private StringBuilder paramNameStr = new StringBuilder(); //final?
712 | 	
713 | 	// type (return type, parameter type, local variable type)
714 | 	private int typeNameFlag = 0;
715 | 	private StringBuilder typeNameStr = new StringBuilder();
716 | 	
717 | 	// function definition
718 | 	private int funcDefFlag = 0;
719 | 	
720 | 	// function body (compund_statement)
721 | 	private int compoundStmtFlag = 0;
722 | 	
723 | 	private String srcFileName;
724 | 	private int numLines = 0;
725 | 	
726 | 	// set SLL option
727 | 	private int enableSLL = 0;
728 | 	
729 | 	
730 | 	public TreeParser1() {
731 | 		this.ret = new ArrayList<function>();
732 | 		
733 | 		this.functionInstance = null;
734 | 		
735 | 		this.funcNameFlag = 0;
736 | 		this.funcNameStr = new StringBuilder();
737 | 		
738 | 		this.paramNameFlag = 0;
739 | 		this.paramNameStr = new StringBuilder();
740 | 		
741 | 		this.typeNameFlag = 0;
742 | 		this.typeNameStr = new StringBuilder();
743 | 		
744 | 		this.funcDefFlag = 0;
745 | 		
746 | 		this.compoundStmtFlag = 0;
747 | 		
748 | 		this.enableSLL = 0;
749 | 	}
750 | 	
751 | 	private void _init(ModuleParser parser) {
752 | 		//this();
753 | 		//this.executorService = Executors.newFixedThreadPool(
754 | 		//	Runtime.getRuntime().availableProcessors()
755 | 		//);
756 | 		this.ret = new ArrayList<function>();
757 | 		
758 | 		this.functionInstance = null;
759 | 		
760 | 		this.funcNameFlag = 0;
761 | 		this.funcNameStr = new StringBuilder();
762 | 		
763 | 		this.paramNameFlag = 0;
764 | 		this.paramNameStr = new StringBuilder();
765 | 		
766 | 		this.typeNameFlag = 0;
767 | 		this.typeNameStr = new StringBuilder();
768 | 		
769 | 		this.funcDefFlag = 0;
770 | 		
771 | 		this.compoundStmtFlag = 0;
772 | 		
773 | 		this.enableSLL = 0;
774 | 		
775 | 		
776 | 		if (TreeParser1.IS_FIRST != 0) {
777 | 			this.ruleNames = Arrays.asList(parser.getRuleNames());
778 | 			
779 | 			for (int i = 0; i < parser.ruleNames.length; i++) {
780 | 				for (int j = 0; j < TreeParser1.table.length; j++) {
781 | 					if (parser.ruleNames[i].equals(TreeParser1.table[j]))
782 | 						TreeParser1.IDX[j] = i;
783 | 				}
784 | 			}
785 | 		}
786 | 		TreeParser1.IS_FIRST = 0;
787 | 	}
788 | 	
789 | 	public List<function> ParseFile(String srcFileName) {
790 | 		return this.ParseFile(srcFileName, 1);
791 | 	}
792 | 	public List<function> ParseFile(String srcFileName, int bSLL) {
793 | 		try {
794 | 			ANTLRFileStream  antlrFileStream = new ANTLRFileStream(srcFileName);
795 | 			ModuleLexer lexer = new ModuleLexer(antlrFileStream);
796 | 			CommonTokenStream tokens = new CommonTokenStream(lexer);
797 | 			ModuleParser parser = new ModuleParser(tokens);
798 | 			parser.removeErrorListeners(); // remove error listener
799 | 			
800 | 			if (bSLL != 0) {
801 | 				parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
802 | 				parser.setErrorHandler(new BailErrorStrategy());
803 | 			}
804 | 			
805 | 			//long t1 = System.currentTimeMillis();
806 | 			ParseTree tree;
807 | 			try {
808 | 				tree = parser.code();
809 | 			}
810 | 			catch (ParseCancellationException e) {
811 | 				parser.reset();
812 | 				parser.getInterpreter().setPredictionMode(PredictionMode.LL);
813 | 				parser.setErrorHandler(new DefaultErrorStrategy());
814 | 				tree = parser.code();
815 | 			}
816 | 			//long t2 = System.currentTimeMillis();
817 | 			//System.err.println("time: " + (t2 - t1) / 1000.0);
818 | 			this._init(parser); // reset before traverse a parse tree
819 | 			this.enableSLL = bSLL;
820 | 			
821 | 			LineNumberReader lnr = new LineNumberReader(new FileReader(new File(srcFileName)));
822 | 			while (lnr.skip(Long.MAX_VALUE) > 0);
823 | 			this.numLines = lnr.getLineNumber() + 1;
824 | 			lnr.close();
825 | 			
826 | 			this.srcFileName = new String(srcFileName);
827 | 			
828 | 			ParseTreeWalker.DEFAULT.walk(this, tree);
829 | 			
830 | 			//System.err.println("before get()");
831 | 			//long t3 = System.currentTimeMillis();
832 | 			//for (Future<function> future : this.future_list) {
833 | 			//	ret.add(future.get());
834 | 			//}
835 | 			//long t4 = System.currentTimeMillis();
836 | 			//System.err.println("time: " + (t4 - t3) / 1000.0);
837 | 			//System.err.println("after get()");
838 | 			/*
839 | 			for (int i = 0; i < job_list.size(); i++) { // singlethread
840 | 				JobInstance j = job_list.get(i);
841 | 				BodyParser p = new BodyParser();
842 | 				p.ParseString(j.string, j.functionInstance, j.line, j.enableSLL);
843 | 				ret.add(j.functionInstance);
844 | 			}
845 | 			*/
846 | 
847 | 		} catch (Exception e) {
848 | 			e.printStackTrace();
849 | 			//this.executorService.shutdownNow();
850 | 			return null;
851 | 		}
852 | 		//this.executorService.shutdown();
853 | 		return this.ret;
854 | 	}
855 | 	
856 | 	@Override
857 | 	public void enterEveryRule(ParserRuleContext ctx) {
858 | 		int ruleIndex = ctx.getRuleIndex();
859 | 		
860 | 		if (ruleIndex == TreeParser1.IDX[TreeParser1.FUNCTION_DEF]) {
861 | 			this.funcDefFlag = 1;
862 | 			this.functionInstance = new function(this.srcFileName);
863 | 			this.functionInstance.parentNumLoc = this.numLines;
864 | 			this.functionInstance.funcId = this.ret.size() + 1;
865 | 			this.functionInstance.lineStart = ctx.getStart().getLine();
866 | 			this.functionInstance.lineStop = ctx.getStop().getLine();
867 | 		}
868 | 		else if (this.funcDefFlag == 0)
869 | 			return;
870 | 		else if (ruleIndex == TreeParser1.IDX[TreeParser1.FUNCTION_NAME])
871 | 			this.funcNameFlag = 1;
872 | 		else if (ruleIndex == TreeParser1.IDX[TreeParser1.PARAMETER_NAME])
873 | 			this.paramNameFlag = 1;
874 | 		else if (ruleIndex == TreeParser1.IDX[TreeParser1.TYPE_NAME])
875 | 			this.typeNameFlag = 1;
876 | 		else if (ruleIndex == TreeParser1.IDX[TreeParser1.COMPOUND_STMT])
877 | 			this.compoundStmtFlag = 1;
878 | 	}
879 | 	
880 | 	@Override
881 | 	public void exitEveryRule(ParserRuleContext ctx) {
882 | 		int ruleIndex = ctx.getRuleIndex();
883 | 		
884 | 		if (ruleIndex == TreeParser1.IDX[TreeParser1.FUNCTION_DEF] && this.funcDefFlag != 0) {
885 | 			this.ret.add(this.functionInstance);
886 | 			this.funcDefFlag = 0;
887 | 		}
888 | 		else if (ruleIndex == TreeParser1.IDX[TreeParser1.FUNCTION_NAME] && this.funcNameFlag != 0) {
889 | 			this.functionInstance.name = this.funcNameStr.toString().trim();
890 | 			this.funcNameFlag = 0;
891 | 			this.funcNameStr.setLength(0);
892 | 		}
893 | 		else if (ruleIndex == TreeParser1.IDX[TreeParser1.PARAMETER_NAME] && this.paramNameFlag != 0) {
894 | 			this.functionInstance.parameterList.add(this.paramNameStr.toString().trim());
895 | 			this.paramNameFlag = 0;
896 | 			this.paramNameStr.setLength(0);
897 | 		}
898 | 		else if (ruleIndex == TreeParser1.IDX[TreeParser1.TYPE_NAME] && this.typeNameFlag != 0) {
899 | 			this.functionInstance.dataTypeList.add(this.typeNameStr.toString().trim());
900 | 			this.typeNameFlag = 0;
901 | 			this.typeNameStr.setLength(0);
902 | 		}
903 | 		else if (ruleIndex == TreeParser1.IDX[TreeParser1.COMPOUND_STMT] && this.compoundStmtFlag != 0) {
904 | 			this.compoundStmtFlag = 0;
905 | 			
906 | 			CharStream inputStream = ctx.start.getInputStream();
907 | 			int start_index = ctx.start.getStopIndex();
908 | 			int stop_index = ctx.stop.getStopIndex();
909 | 			String string = inputStream.getText(new Interval(start_index + 1, stop_index - 1));
910 | 			int line = ctx.start.getLine();
911 | 			
912 | 			// add function's body
913 | 			this.functionInstance.funcBody = string;
914 | 			
915 | 			//this.job_list.add(new JobInstance(string, this.functionInstance, line, this.enableSLL)); // for singlethread
916 | 			//this.future_list.add(
917 | 			//	this.executorService.submit(new JobInstance(string, this.functionInstance, line, this.enableSLL))
918 | 			//); // for multithread
919 | 		}
920 | 	}
921 | 	
922 | 	@Override
923 | 	public void visitTerminal(TerminalNode node) {
924 | 		if (this.compoundStmtFlag != 0 || this.funcDefFlag == 0)
925 | 			return;
926 | 		else if (this.funcNameFlag != 0) {
927 | 			this.funcNameStr.append(Trees.getNodeText(node, this.ruleNames));
928 | 			this.funcNameStr.append(' ');
929 | 		}
930 | 		else if (this.paramNameFlag != 0) {
931 | 			this.paramNameStr.append(Trees.getNodeText(node, this.ruleNames));
932 | 			this.paramNameStr.append(' ');
933 | 		}
934 | 		else if (this.typeNameFlag != 0) {
935 | 			this.typeNameStr.append(Trees.getNodeText(node, this.ruleNames));
936 | 			this.typeNameStr.append(' ');
937 | 		}
938 | 	}
939 | 	
940 | 	@Override
941 | 	public void visitErrorNode(ErrorNode node) { }
942 | }
943 | 


--------------------------------------------------------------------------------
/FuncParser-opt/src/Module.g4:
--------------------------------------------------------------------------------
 1 | grammar Module;
 2 | 
 3 | import ModuleLex, Expressions, Common, FunctionDef, CoarseSimpleDecl;
 4 | 
 5 | /*
 6 |     Copyright (C) 2013 Fabian 'fabs' Yamaguchi <fabs@phenoelit.de>
 7 |     This program is free software: you can redistribute it and/or modify
 8 |     it under the terms of the GNU General Public License as published by
 9 |     the Free Software Foundation, either version 3 of the License, or
10 |     (at your option) any later version.
11 | 
12 |     This program is distributed in the hope that it will be useful,
13 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
14 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 |     GNU General Public License for more details.
16 | 
17 |     You should have received a copy of the GNU General Public License
18 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 | */
20 | /*
21 | @header{
22 | 	package antlr.C;
23 | }
24 | */
25 | code : (function_def | simple_decl | using_directive | water)*;
26 | 
27 | using_directive: USING NAMESPACE identifier ';';
28 | 
29 | 


--------------------------------------------------------------------------------
/FuncParser-opt/src/ModuleLex.g4:
--------------------------------------------------------------------------------
  1 | lexer grammar ModuleLex;
  2 | 
  3 | // Keywords shared among C/C++/Java
  4 | 
  5 | IF: 'if'; ELSE: 'else'; FOR: 'for'; WHILE: 'while';
  6 | 
  7 | BREAK: 'break'; CASE: 'case'; CONTINUE: 'continue'; 
  8 | SWITCH: 'switch'; DO: 'do';
  9 | 
 10 | GOTO: 'goto'; RETURN: 'return';
 11 | 
 12 | TYPEDEF: 'typedef';
 13 | VOID: 'void'; UNSIGNED: 'unsigned'; SIGNED: 'signed';
 14 | LONG: 'long'; CV_QUALIFIER :  'const' | 'volatile';
 15 | 
 16 | // Keywords shared among C++/Java
 17 | 
 18 | VIRTUAL: 'virtual';
 19 | TRY: 'try'; CATCH: 'catch'; THROW: 'throw';
 20 | USING: 'using'; NAMESPACE: 'namespace'; 
 21 | 
 22 | // Keywords shared among C/C++
 23 | 
 24 | AUTO: 'auto'; REGISTER: 'register';
 25 | 
 26 | // C++ keywords
 27 | 
 28 | OPERATOR: 'operator';
 29 | TEMPLATE: 'template';
 30 | NEW: 'new';
 31 | 
 32 | CLASS_KEY: ('struct' | 'class' | 'union' | 'enum');
 33 | 
 34 | ALPHA_NUMERIC: [a-zA-Z_~][a-zA-Z0-9_]*;
 35 | 
 36 | OPENING_CURLY: '{';
 37 | CLOSING_CURLY: '}';
 38 | 
 39 | // pre-processor directives: C/C++
 40 | 
 41 | PRE_IF: ('#if' | '#ifdef' | '#ifndef') ~[\r\n]* '\r'? '\n';
 42 | PRE_ELSE: ('#else' | '#elif') ~[\r\n]* '\r'? '\n';
 43 | PRE_ENDIF: '#endif' ~[\r\n]* '\r'? '\n';
 44 | // PREPROC : '#' ~[\r\n]* '\r'? '\n' -> skip;
 45 | 
 46 | 
 47 | HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
 48 | DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
 49 | OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ;
 50 | 
 51 | FLOATING_POINT_LITERAL
 52 |     :   ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
 53 |     |   '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
 54 |     |   ('0'..'9')+ Exponent FloatTypeSuffix?
 55 |     |   ('0'..'9')+ Exponent? FloatTypeSuffix
 56 | 	;
 57 | 
 58 | CHAR
 59 |     :   '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
 60 |     ;
 61 | 
 62 | STRING
 63 |     :  '"' ( EscapeSequence | ~('\\'|'"') )* '"'
 64 |     ;
 65 | 
 66 | 
 67 | fragment
 68 | IntegerTypeSuffix
 69 | 	:	('u'|'U')? ('l'|'L')
 70 | 	|	('u'|'U')  ('l'|'L')?
 71 | 	;
 72 | 
 73 | fragment
 74 | Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+;
 75 | 
 76 | fragment
 77 | FloatTypeSuffix : ('f'|'F'|'d'|'D');
 78 | 
 79 | 
 80 | fragment
 81 | EscapeSequence
 82 |     :   '\\' .
 83 |     |   UnicodeEscape
 84 |     |   OctalEscape
 85 |     ;
 86 | 
 87 | fragment
 88 | OctalEscape
 89 |     :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
 90 |     |   '\\' ('0'..'7') ('0'..'7')
 91 |     |   '\\' ('0'..'7')
 92 |     ;
 93 | 
 94 | fragment
 95 | UnicodeEscape
 96 |     :   '\\' 'u' HexDigit HexDigit HexDigit HexDigit
 97 |     ;
 98 | 
 99 | fragment
100 | HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
101 | 
102 | COMMENT
103 |     :   '/*' .*? '*/'    -> skip 
104 |     ;
105 | WHITESPACE  :   [ \r\t\u000C\n]+ -> skip
106 |     ;
107 | 
108 | CPPCOMMENT
109 |     : '//' ~[\r\n]* '\r'? '\n' -> skip
110 |     ;
111 | 
112 | OTHER : . -> skip ;
113 | 


--------------------------------------------------------------------------------
/FuncParser-opt/src/SimpleDecl.g4:
--------------------------------------------------------------------------------
 1 | grammar SimpleDecl;
 2 | 
 3 | simple_decl : (TYPEDEF? template_decl_start?) var_decl;
 4 | 
 5 | var_decl : class_def init_declarator_list? #declByClass
 6 |          | type_name init_declarator_list #declByType
 7 |          ;
 8 | 
 9 | init_declarator_list: init_declarator (',' init_declarator)* ';';
10 | 
11 | initializer: assign_expr
12 |            |'{' initializer_list '}'
13 | ;
14 | 
15 | initializer_list: initializer (',' initializer)*;
16 | 
17 | 
18 | class_def: CLASS_KEY class_name? base_classes? OPENING_CURLY {skipToEndOfObject(); } ;
19 | class_name: identifier;
20 | base_classes: ':' base_class (',' base_class)*;
21 | base_class: VIRTUAL? access_specifier? identifier;
22 | 
23 | type_name : (CV_QUALIFIER* (CLASS_KEY | UNSIGNED | SIGNED)?
24 |             base_type ('<' template_param_list '>')? ('::' base_type ('<' template_param_list '>')? )*) CV_QUALIFIER?
25 |           | UNSIGNED
26 |           | SIGNED
27 |           ;
28 | 
29 | 
30 | base_type: (ALPHA_NUMERIC | VOID | LONG | LONG)+;
31 | 
32 | // Parameters
33 | 
34 | param_decl_specifiers : (AUTO | REGISTER)? type_name;
35 | 
36 | // this is a bit misleading. We're just allowing access_specifiers
37 | // here because C programs can use 'public', 'protected' or 'private'
38 | // as variable names.
39 | 
40 | parameter_name: identifier;
41 | 
42 | param_type_list: '(' VOID ')'
43 |                | '(' (param_type (',' param_type)*)? ')';
44 | 
45 | param_type: param_decl_specifiers param_type_id;
46 | param_type_id: ptrs? ('(' param_type_id ')' | parameter_name?) type_suffix?;
47 | 
48 | // operator-identifiers not implemented
49 | identifier : (ALPHA_NUMERIC ('::' ALPHA_NUMERIC)*) | access_specifier;
50 | number: HEX_LITERAL | DECIMAL_LITERAL | OCTAL_LITERAL;
51 | 
52 | ptrs: (ptr_operator 'restrict'?)+;
53 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Seulbae Kim
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the "Software"),
 7 | to deal in the Software without restriction, including without limitation
 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | and/or sell copies of the Software, and to permit persons to whom the Software
10 | is furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included
13 | in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 | IN THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # VUDDY (a.k.a. `hmark`)
 2 | VUDDY is an approach for **scalable** and **accurate** vulnerable code clone
 3 | detection. This approach is specifically designed to accurately find
 4 | vulnerabilities in massive code bases (e.g., Linux kernel, 25 MLoC).
 5 | Its principles and results are discussed in our
 6 | [paper](https://ccs.korea.ac.kr/pds/SNP17.pdf), which was published in 38th
 7 | IEEE Symposium on Security and Privacy (S&P'17).
 8 | 
 9 | `hmark` is the implementation of VUDDY. It is the client-side preprocessing
10 | tool for "Vulnerable Code Clone Detection" testing provided by
11 | [IoTcube](https://iotcube.net), an automated vulnerability testing platform.
12 | Detailed instructions are available [here](https://iotcube.net/userguide/manual/hmark).
13 | 
14 | This project was funded by IITP (Development of Vulnerability Discovery Technologies
15 | for IoT Software Security), and was conducted at [CSSA](https://cssa.korea.ac.kr)
16 | (Center for Software Security and Assurrance) at Korea University.
17 | 
18 | ## Getting Started with `hmark`
19 | 
20 | ### Prerequisites
21 | - **Linux or OS X** - *hmark* is designed to work on any of the operating
22 |   systems. Tested OS distributions include Ubuntu 14.04, 16.04, and 18.04,
23 |   Fedora 25, and OS X. Let me know if your OS is not supported.
24 |   - Confirmed in May 2024: VUDDY works seamlessly on Ubuntu 22.04
25 |   - Confirmed in Jan 2025: VUDDY works also on Windows 10
26 | - **Python 3** - VUDDY is now fully compatible with Python 3 (Jan 2025 onwards)
27 | - **python-tk** package - (only required if you want GUI) install from your
28 |   package manager
29 | - **Java Runtime Environment (JRE)** - We recommend openjdk-8-jre.
30 | 
31 | ### Running `hmark` and checking the result on IoTcube (our web service)
32 | 1. `cd hmark`
33 | 2. `python hmark.py [-h] [-c path ON/OFF] [-n] [-V]`
34 | 
35 | You can see the help message below by passing an `-h` (or `--help`) argument.
36 | ```
37 | usage: python hmark.py [-h] [-c path ON/OFF] [-n] [-V]
38 | 
39 | - optional arguments:
40 |   -h, --help            show this help message and exit
41 | 
42 |   -c path ON/OFF, --cli-mode path ON/OFF
43 |                         run hmark without GUI by specifying the path to the
44 |                         target directory, and the abstraction mode
45 |   -n, --no-updatecheck  bypass update checking (not recommended)
46 |   -V, --version         print hmark version and exit
47 | ```
48 | 3. Upload the resulting `hidx` file on IoTcube's [Vulnerable Code Clone
49 |    Detection](https://iotcube.net/process/type/wf1) testing.
50 | 
51 | ### Running `hmark` and checking the result locally
52 | Follow steps 1 and 2 above to generate the `hidx` of the target program.
53 | Skip step 3.
54 | 
55 | 4. To build your own vulnerability database, checkout `vulnDBGen`,
56 |   which is a subrepo of this repository and follow the guidelines
57 |   to build a vulnerability database locally.
58 |   ```
59 |   $ git submodule update --init
60 |   $ cd vulnDBGen
61 |   $ cat docs/examples.md
62 |   ```
63 | 
64 | 5. After building your own vulnerability database, you can locally run the
65 |    vulnerable clone checker:
66 |   ```
67 |   $ cd ..
68 |   $ python3 checker/check_clones.py --help
69 |   $ python3 checker/check_clones.py --target path_to_target_hidx --database path_to_vulndb
70 |   ```
71 | 
72 | ### Binary Release
73 | Instead of running `hmark` from source code, you can also download and execute
74 | prebuilt binaries. Binaries for Windows, Linux, and OS X are available
75 | [here](https://iotcube.net/downloads).
76 | 
77 | ## Reporting Bugs
78 | For reporting bugs, you can [submit an
79 | issue](https://github.com/iotcube/hmark/issues) to the VUDDY GitHub, or send
80 | me an <a href="mailto:seulbae@gatech.edu">email</a>. Feel free to send pull
81 | requests if you have suggestions or bugfixes!
82 | 
83 | ## About
84 | This program is authored and maintained by **Seulbae Kim**
85 | > GitHub [@seulbae-security](https://github.com/seulbae-security) / seulbae@postech.ac.kr
86 | 
87 | ## TODOs
88 | Please feel free to submit pull requests for the following items:
89 | * Rewrite everything in Python3
90 | * Use a better parser
91 | * Replace all code that rely on stdin/stdout for IPC (e.g., git executions) with API calls
92 | 
93 | 


--------------------------------------------------------------------------------
/checker/check_clones.py:
--------------------------------------------------------------------------------
  1 | #/usr/bin/python3
  2 | 
  3 | import os
  4 | import argparse
  5 | 
  6 | def load_hidx(hidx_file):
  7 |     len_hash_dict = dict()
  8 |     hash_file_dict = dict()
  9 | 
 10 |     with open(hidx_file, "r") as f:
 11 |         lines = f.readlines()
 12 | 
 13 |     init = True
 14 |     delim = False
 15 |     for line in lines:
 16 |         ls = line.strip()
 17 |         if init:
 18 |             init = False
 19 |             continue # skip first line (metadata)
 20 | 
 21 |         if len(ls) == 0:
 22 |             continue
 23 | 
 24 |         if "=====" in ls:
 25 |             delim = True
 26 |             continue
 27 | 
 28 |         tokens = ls.split("\t")
 29 | 
 30 |         if not delim: # before delimiter: len to hash list
 31 |             func_len = int(tokens[0])
 32 |             hash_list = set(tokens[1:])
 33 |             len_hash_dict[func_len] = hash_list
 34 | 
 35 |         else: # after delimiter: hash to file and line
 36 |             hash_val = tokens[0]
 37 |             file_name = tokens[1]
 38 |             line_num = tokens[2]
 39 |             hash_file_dict[hash_val] = [file_name, line_num]
 40 | 
 41 |     return hidx_file, len_hash_dict, hash_file_dict
 42 | 
 43 | 
 44 | if __name__ == "__main__":
 45 |     parser = argparse.ArgumentParser()
 46 |     parser.add_argument(
 47 |         "-t",
 48 |         "--target",
 49 |         type=str,
 50 |         required=True,
 51 |         help="hidx of the target program to find vulnerable clones"
 52 |     )
 53 | 
 54 |     parser.add_argument(
 55 |         "-d",
 56 |         "--database",
 57 |         type=str,
 58 |         required=True,
 59 |         help="Path to the directory storing vulnerablility database hidx generated by vulnDBGen"
 60 |     )
 61 | 
 62 |     args = parser.parse_args()
 63 | 
 64 |     if not os.path.exists(args.target):
 65 |         print(f"[-] {args.target} does not exist")
 66 |         exit(1)
 67 | 
 68 |     if not args.target.endswith(".hidx"):
 69 |         print(f"[-] {args.target} does not appear to be a hidx file")
 70 |         exit(1)
 71 | 
 72 |     if not os.path.exists(args.database):
 73 |         print(f"[-] {args.database} does not exist")
 74 |         exit(1)
 75 | 
 76 |     target, target_len_hash_dict, target_hash_file_dict = load_hidx(args.target)
 77 | 
 78 |     vdb_list = list()
 79 |     vdb_len_hash_dict_list = list()
 80 |     vdb_hash_file_dict_list = list()
 81 | 
 82 |     for hidx_file in os.listdir(args.database):
 83 |         file_ = os.path.join(args.database, hidx_file)
 84 | 
 85 |         vdb, vdb_len_hash_dict, vdb_hash_file_dict = load_hidx(file_)
 86 |         vdb_list.append(vdb)
 87 |         vdb_len_hash_dict_list.append(vdb_len_hash_dict)
 88 |         vdb_hash_file_dict_list.append(vdb_hash_file_dict)
 89 | 
 90 |     collision_set = set()
 91 | 
 92 |     for vdb_idx, vdb in enumerate(vdb_list):
 93 |         print(f"Target {target} vs VDB {vdb}")
 94 |         for func_len in target_len_hash_dict:
 95 |             if func_len not in vdb_len_hash_dict_list[vdb_idx]:
 96 |                 continue
 97 | 
 98 |             target_hash_list = target_len_hash_dict[func_len]
 99 |             vdb_hash_list = vdb_len_hash_dict_list[vdb_idx][func_len]
100 | 
101 |             collision = target_hash_list.intersection(vdb_hash_list)
102 | 
103 |             if len(collision) == 0:
104 |                 continue
105 | 
106 |             collision_set.update(collision)
107 | 
108 |         for hash_ in collision_set:
109 |             print(hash_)
110 |             file_info = target_hash_file_dict[hash_]
111 |             vuln_info = vdb_hash_file_dict_list[vdb_idx][hash_]
112 |             print(f"[+] {file_info[1]}-th function in {file_info[0]}"
113 |                   f"is a clone of vulnerability at {vuln_info[0]}")
114 | 
115 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | 
 3 | """
 4 | [Note for Windows]
 5 | - Use '\\' or '/' in path
 6 | Ex) gitStoragePath = "D:\\Source\\gitrepos"
 7 | - Install 'Git for Windows'
 8 | - Windows version of VUDDY use its own JRE
 9 | 
10 | [Note for POSIX]
11 | - Use '/' for path
12 | Ex) gitStoragePath = "/home/ubuntu/gitrepos/"
13 | - Java binary is only needed in POSIX
14 | """
15 | 
16 | gitStoragePath = "/home/ubuntu/gitrepos/"
17 | 
18 | pf = platform.platform()
19 | if "Windows" in pf:  # Windows
20 |     gitBinary = "C:\\Program Files\\Git\\bin\\git.exe"
21 |     diffBinary = "C:\\Program Files\\Git\\usr\\bin\\diff.exe"
22 | else:  # POSIX
23 |     gitBinary = "git"
24 |     diffBinary = "diff"
25 |     javaBinary = "java"
26 | 


--------------------------------------------------------------------------------
/dep.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | sudo apt-get -y install python-tk
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/examples.md:
--------------------------------------------------------------------------------
  1 | Vulnerability Database Generator (vulnDBGen) - Use Examples
  2 | ====
  3 | 
  4 | ## 1. Configuration Settings
  5 | Set `gitStoragePath`, `gitBinary`, `diffBinary`, and `javaBinary` in `config.py`.
  6 | ```
  7 | ~$ cd ~/vulnDBGen
  8 | ~/vulnDBGen$ cat config.py
  9 | ```
 10 | Result
 11 | ```
 12 | import platform
 13 | 
 14 | gitStoragePath = r"/home/squizz/gitrepos"
 15 | version = "3.0.3" # for use in IoTcube.
 16 | pf = platform.platform()
 17 | if "Windows" in pf:  # Windows
 18 |     gitBinary = r"C:\Program Files\Git\bin\git.exe"
 19 |     diffBinary = r"C:\Program Files\Git\usr\bin\diff.exe"
 20 | else:  # POSIX
 21 |     gitBinary = "git"
 22 |     diffBinary = "diff"
 23 |     javaBinary = "java"
 24 | 
 25 | ```
 26 | 
 27 | ## 2. Cloning repositories and collecting vulnerabilities
 28 | 
 29 | ### A. ChakraCore (Microsoft)
 30 | ```
 31 | ~$ cd ~/gitrepos
 32 | ~/gitrepos$ git clone https://github.com/Microsoft/ChakraCore.git
 33 | 
 34 | ~$ cd ~/vulnDBGen
 35 | ~/vulnDBGen$ python initialize.py
 36 | ~/vulnDBGen$ python src/get_cvepatch_from_git.py ChakraCore
 37 | ~/vulnDBGen$ python src/get_source_from_cvepatch.py ChakraCore
 38 | ```
 39 | 
 40 | ### B. FreeBSD (FreeBSD Foundation)
 41 | ```
 42 | ~$ cd ~/gitrepos
 43 | ~/gitrepos$ git clone https://github.com/freebsd/freebsd.git
 44 | 
 45 | ~$ cd ~/vulnDBGen
 46 | ~/vulnDBGen$ python initialize.py
 47 | ~/vulnDBGen$ python src/get_cvepatch_from_git.py freebsd
 48 | ~/vulnDBGen$ python src/get_source_from_cvepatch.py freebsd
 49 | ```
 50 | 
 51 | ### C. Gecko (Mozilla)
 52 | ```
 53 | ~$ cd ~/gitrepos
 54 | ~/gitrepos$ git clone https://github.com/mozilla/gecko-dev.git
 55 | 
 56 | ~$ cd ~/vulnDBGen
 57 | ~/vulnDBGen$ python initialize.py
 58 | ~/vulnDBGen$ python src/get_cvepatch_from_git.py gecko-dev
 59 | ~/vulnDBGen$ python src/get_source_from_cvepatch.py gecko-dev
 60 | ```
 61 | 
 62 | ### D. glibc (GNU)
 63 | ```
 64 | ~$ cd ~/gitrepos
 65 | ~/gitrepos$ git clone git://sourceware.org/git/glibc.git
 66 | 
 67 | ~$ cd ~/vulnDBGen
 68 | ~/vulnDBGen$ python initialize.py
 69 | ~/vulnDBGen$ python src/get_cvepatch_from_git.py glibc
 70 | ~/vulnDBGen$ python src/get_source_from_cvepatch.py glibc
 71 | ```
 72 | 
 73 | ### E. httpd (APACHE)
 74 | ```
 75 | ~$ cd ~/gitrepos
 76 | ~/gitrepos$ git clone https://github.com/apache/httpd.git
 77 | 
 78 | ~$ cd ~/vulnDBGen
 79 | ~/vulnDBGen$ python initialize.py
 80 | ~/vulnDBGen$ python src/get_cvepatch_from_git.py httpd
 81 | ~/vulnDBGen$ python src/get_source_from_cvepatch.py httpd
 82 | ```
 83 | 
 84 | ### F. Kerberos Version 5 (MIT)
 85 | ```
 86 | ~$ cd ~/gitrepos
 87 | ~/gitrepos$ git clone https://github.com/krb5/krb5.git
 88 | 
 89 | ~$ cd ~/vulnDBGen
 90 | ~/vulnDBGen$ python initialize.py
 91 | ~/vulnDBGen$ python src/get_cvepatch_from_git.py krb5
 92 | ~/vulnDBGen$ python src/get_source_from_cvepatch.py krb5
 93 | ```
 94 | 
 95 | ### G. Linux kernel (Linux Foundation)
 96 | ```
 97 | ~$ cd ~/gitrepos
 98 | ~/gitrepos$ git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
 99 | 
100 | ~$ cd ~/vulnDBGen
101 | ~/vulnDBGen$ python initialize.py
102 | ~/vulnDBGen$ python src/get_cvepatch_from_git.py linux
103 | ~/vulnDBGen$ python src/get_source_from_cvepatch.py linux
104 | ```
105 | 
106 | ### H. OpenSSL (OpenSSL Software Foundation)
107 | ```
108 | ~$ cd ~/gitrepos
109 | ~/gitrepos$ git clone https://github.com/openssl/openssl.git
110 | 
111 | ~$ cd ~/vulnDBGen
112 | ~/vulnDBGen$ python initialize.py
113 | ~/vulnDBGen$ python src/get_cvepatch_from_git.py openssl
114 | ~/vulnDBGen$ python src/get_source_from_cvepatch.py openssl
115 | ```
116 | 
117 | ### I. PostgreSQL DBMS (The PostgreSQL Global Development Group)
118 | ```
119 | ~$ cd ~/gitrepos
120 | ~/gitrepos$ git clone https://github.com/postgres/postgres.git
121 | 
122 | ~$ cd ~/vulnDBGen
123 | ~/vulnDBGen$ python initialize.py
124 | ~/vulnDBGen$ python src/get_cvepatch_from_git.py postgres
125 | ~/vulnDBGen$ python src/get_source_from_cvepatch.py postgres
126 | ```
127 | 
128 | ### J. Ubuntu Trusty (Canonical Ltd.)
129 | ```
130 | ~$ cd ~/gitrepos
131 | ~/gitrepos$ git clone git://kernel.ubuntu.com/ubuntu/ubuntu-trusty.git
132 | 
133 | ~$ cd ~/vulnDBGen
134 | ~/vulnDBGen$ python initialize.py
135 | ~/vulnDBGen$ python src/get_cvepatch_from_git.py ubuntu-trusty
136 | ~/vulnDBGen$ python src/get_source_from_cvepatch.py ubuntu-trusty
137 | ```
138 | 
139 | ## 3. Filtering vulnerabilities and generating hash index
140 | 
141 | ### From all repositories
142 | ```
143 | ~$ cd ~/vulnDBGen
144 | ~/vulnDBGen$ python src/vul_dup_remover.py
145 | ~/vulnDBGen$ python src/vul_verifier.py
146 | 
147 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 0 ChakraCore
148 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 4 ChakraCore
149 | 
150 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 0 freebsd
151 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 4 freebsd
152 | 
153 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 0 gecko-dev
154 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 4 gecko-dev
155 | 
156 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 0 glibc
157 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 4 glibc
158 | 
159 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 0 httpd
160 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 4 httpd
161 | 
162 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 0 krb5
163 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 4 krb5
164 | 
165 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 0 linux
166 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 4 linux
167 | 
168 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 0 openssl
169 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 4 openssl
170 | 
171 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 0 postgres
172 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 4 postgres
173 | 
174 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 0 ubuntu-trusty
175 | ~/vulnDBGen$ python src/vul_hidx_generator.py -a 4 ubuntu-trusty
176 | ```
177 | 


--------------------------------------------------------------------------------
/docs/취약점 데이터베이스 생성 솔루션 매뉴얼 V1.0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/squizz617/vuddy/33cdab1ad04a6dcc76011b92821dbeb055c6691e/docs/취약점 데이터베이스 생성 솔루션 매뉴얼 V1.0.pdf


--------------------------------------------------------------------------------
/hmark/FuncParser-opt.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/squizz617/vuddy/33cdab1ad04a6dcc76011b92821dbeb055c6691e/hmark/FuncParser-opt.jar


--------------------------------------------------------------------------------
/hmark/README.md:
--------------------------------------------------------------------------------
 1 | # hmark - Hash index generator for IoTcube's vulnerable code clone detection.
 2 | *hmark* is the preprocessor for the "vulnerable code clone detection" 
 3 | in IoTcube (https://iotcube.korea.ac.kr). 
 4 | 
 5 | ## How to run
 6 | This documentation addresses how to run hmark on various platforms,
 7 | on the basis of *hmark* version 3.0.3. 
 8 | *hmark* has no application-specific requirements.
 9 | 
10 | ### Running on Linux
11 | Ubuntu 14.04 and 16.04 (32 and 64-bits) are officially supported by *hmark*.
12 | 1. Change access permissions if necesary.
13 |     - 32-bit system: $ sudo chmod a+x hmark_3.0.3_linux_x86
14 |     - 64-bit system: $ sudo chmod a+x hmark_3.0.3_linux_x64
15 | 2. Run with or without optional arguments.
16 |     - In terminal:
17 |         - 32-bit system: $ ./hmark_3.0.3_linux_x86 [-h] [-c path ON/OFF] [-n] [-V]
18 |         - 64-bit system: $ ./hmark_3.0.3_linux_x64 [-h] [-c path ON/OFF] [-n] [-V]
19 |     - Graphic user interface:
20 |         - You can launch app in GUI (e.g., in Nautilus), 
21 |           but you cannot pass command line arguments.
22 | 
23 | ### Running on Mac OS X (macOS)
24 | *hmark* for MAC supports 64-bit architecture.
25 | 1. Change access permissions if necessary.
26 |     - $ sudo chmod a+x hmark_3.0.3_osx
27 | 2. Run with or without optional arguments.
28 |     - In terminal:
29 |         - $ ./hmark_3.0.3_osx [-h] [-c path ON/OFF] [-n] [-V]
30 | 
31 | ### Running on Windows
32 | *hmark* works on both 32-bit and 64-bit windows.
33 | The execution is tested on Windows 7, 8, and 10.
34 | 1. Execute the application.
35 |     - In terminal:
36 |         - 32-bit system: hmark_3.0.3_win_x86.exe [-h] [-c path ON/OFF] [-n] [-V]
37 |         - 64-bit system: hmark_3.0.3_win_x64.exe [-h] [-c path ON/OFF] [-n] [-V]
38 |     - Graphic user interface:
39 |         - You can launch app in GUI (e.g., in Explorer),
40 |           but you cannot pass command line arguments.
41 | 
42 | ## Optional arguments
43 | You can see the help message below by passing an `-h` (or `--help`) argument.
44 | ```
45 | usage: ./hmark_3.0.3_linux_x64 [-h] [-c path ON/OFF] [-n] [-V]
46 | 
47 | - optional arguments:
48 |     -h, --help            show this help message and exit
49 |     
50 |   -c path ON/OFF, --cli-mode path ON/OFF
51 |                         run hmark without GUI by specifying the path to the
52 |                         target directory, and the abstraction mode
53 |   -n, --no-updatecheck  bypass update checking (not recommended)
54 |   -V, --version         print hmark version and exit
55 | ```
56 | 
57 | ## Troubleshooting
58 | 1. Cannot execute *hmark* in GUI mode.
59 |     - Some systems might require you to install several packages.
60 |         - Oftentimes, `sudo apt-get install python-tk` will do.
61 |         - If not, please contact us (cssa@korea.ac.kr) with the error message.
62 |     - You can still use the same functionality as GUI in cli-mode (option `-c`).
63 | 2. App does not run.
64 |     - Check the path to *hmark*
65 |         - The path should not have any non-ascii, unicode characters.


--------------------------------------------------------------------------------
/hmark/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/hmark/get_cpu_count.py:
--------------------------------------------------------------------------------
 1 | def get_cpu_count():
 2 |     try:
 3 |         import multiprocessing
 4 |         return multiprocessing.cpu_count()
 5 |     except (ImportError, NotImplementedError):
 6 |         pass
 7 | 
 8 |     # http://code.google.com/p/psutil/
 9 |     try:
10 |         import psutil
11 |         return psutil.cpu_count()  # psutil.NUM_CPUS on old versions
12 |     except (ImportError, AttributeError):
13 |         pass
14 | 
15 |     return 1
16 | 


--------------------------------------------------------------------------------
/hmark/hmark.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | Version 3.0~ of Hashmarker (CSSA)
  4 | Author: Seulbae Kim (seulbae@korea.ac.kr)
  5 | http://github.com/squizz617/discovuler-advanced/hmark
  6 | """
  7 | #import urllib2
  8 | from urllib import request
  9 | import platform
 10 | import sys
 11 | import os
 12 | 
 13 | import time
 14 | from re import compile, findall
 15 | import webbrowser
 16 | # import Tkinter
 17 | # import ttk
 18 | from hashlib import md5
 19 | 
 20 | import multiprocessing
 21 | import subprocess
 22 | import parseutility2 as pu
 23 | import version
 24 | import get_cpu_count
 25 | 
 26 | import argparse
 27 | from distutils.version import LooseVersion
 28 | 
 29 | """ GLOBALS """
 30 | localVersion = version.version
 31 | osName = ""
 32 | bits = ""
 33 | urlBase = "http://iotcube.korea.ac.kr/"
 34 | urlCheck = urlBase + "getbinaryversion/wf1/"
 35 | urlDownload = urlBase + "downloads"
 36 | 
 37 | 
 38 | def get_platform():
 39 |     global osName
 40 |     global bits
 41 | 
 42 |     pf = platform.platform()
 43 |     bits, _ = platform.architecture()
 44 |     if "Windows" in pf:
 45 |         osName = "win"
 46 |         if "64" in bits:
 47 |             bits = "64"
 48 |         else:
 49 |             bits = "86"
 50 |     elif "Linux" in pf:
 51 |         osName = "linux"
 52 |         if "64" in bits:
 53 |             bits = "64"
 54 |         else:
 55 |             bits = "86"
 56 |     else:
 57 |         osName = "osx"
 58 |         bits = ""
 59 | 
 60 | 
 61 | def check_update():
 62 |     global localVersion
 63 |     global osName
 64 | 
 65 |     if len(localVersion.split('.')) < 3:
 66 |         localVersion += ".0"
 67 | 
 68 |     if osName == "win":
 69 |         url = urlCheck + osName[0] + bits  # ~/w64, or ~/w86
 70 |     elif osName == "linux":
 71 |         url = urlCheck + osName[0] + bits  # ~/l64, or ~/l86
 72 |     elif osName == "osx":
 73 |         url = urlCheck + osName  # ~/osx
 74 |     try:
 75 |         #response = urllib2.urlopen(url)
 76 |         response = request.urlopen(url)
 77 |     except Exception:
 78 |         print("[-] Update server is not responding.")
 79 |         print("    Please check your network connection or firewall and try again.")
 80 |         print("    To bypass update checking, run with [--no-updatecheck] option.")
 81 |         #raw_input("Press Enter to continue...")
 82 |         input("Press Enter to continue...")
 83 |         sys.exit()
 84 |     latestVersion = "0.0.0"  # for exception handling
 85 | 
 86 |     #html = response.read()
 87 |     html = response.read().decode('utf-8')
 88 |     latestVersion = html
 89 | 
 90 |     if latestVersion == "-1":
 91 |         print("[-] There's something wrong with the server.")
 92 |         print("    You can report this issue to cssa@korea.ac.kr, with your version info.")
 93 |         print("    To bypass update checking, run with [--no-updatecheck] option.")
 94 |         #raw_input("Press Enter to continue...")
 95 |         input("Press Enter to continue...")
 96 |         sys.exit()
 97 | 
 98 |     if len(latestVersion.split('.')) < 3:
 99 |         latestVersion += '.0'
100 | 
101 |     print("Latest server version: " + latestVersion)
102 |     print("Current local version: " + localVersion),
103 | 
104 |     if LooseVersion(localVersion) < LooseVersion(latestVersion):
105 |         print("(out-of-date)")
106 |         print("[-] Your hmark is not up-to-date.")
107 |         print("    Please download and run the latest version.")
108 |         print("    Proceeding to the download page.")
109 |         print("    To bypass update checking, run with [--no-updatecheck] option.")
110 | 
111 |         webbrowser.open(urlDownload)
112 |         #raw_input("Press Enter to continue...")
113 |         input("Press Enter to continue...")
114 |         sys.exit()
115 |     else:
116 |         print("(up-to-date)")
117 | 
118 | 
119 | def parseFile_shallow_multi(f):
120 |     functionInstanceList = pu.parseFile_shallow(f, "GUI")
121 |     return (f, functionInstanceList)
122 | 
123 | 
124 | def parseFile_deep_multi(f):
125 |     functionInstanceList = pu.parseFile_deep(f, "GUI")
126 |     return (f, functionInstanceList)
127 | 
128 | 
129 | class App:
130 |     def __init__(self, master):
131 |         self.master = master
132 |         self.defaultbg = master.cget('bg')
133 | 
134 |         self.mainWidth = 900  # width for the Tk root (root == master of this class)
135 |         if osName == "osx":
136 |             self.mainHeight = 700  # height for the Tk root
137 |         else:
138 |             self.mainHeight = 650
139 | 
140 |         self.screenWidth = master.winfo_screenwidth()  # width of the screen
141 |         self.screenHeight = master.winfo_screenheight()  # height of the screen
142 | 
143 |         #self.x = (self.screenWidth / 2) - (self.mainWidth / 2)
144 |         #self.y = (self.screenHeight / 2) - (self.mainHeight / 2)
145 |         self.x = (self.screenWidth // 2) - (self.mainWidth // 2)
146 |         self.y = (self.screenHeight // 2) - (self.mainHeight // 2)
147 | 
148 | 
149 |         master.geometry("%dx%d+%d+%d" % (self.mainWidth, self.mainHeight, self.x, self.y))
150 |         master.resizable(width=False, height=False)
151 | 
152 |         """ MENU """
153 |         self.menubar = Tkinter.Menu(master, tearoff=1)
154 |         self.menubar.add_command(label="HELP", command=self.show_help)
155 |         self.menubar.add_command(label="ABOUT", command=self.show_about)
156 |         master.config(menu=self.menubar)
157 | 
158 |         """ BROWSE DIRECTORY """
159 |         frmDirectory = Tkinter.Frame(master)
160 |         frmDirectory.pack(fill=Tkinter.BOTH, padx=50, pady=(20, 0))
161 | 
162 |         self.directory = Tkinter.StringVar()
163 |         self.directory.set('Choose the root directory of your program.')
164 |         self.btnDirectory = Tkinter.Button(frmDirectory, text="Browse directory", command=self.askDirectory)
165 |         self.btnDirectory.pack(side=Tkinter.LEFT)
166 | 
167 |         self.lblSelected = Tkinter.Label(frmDirectory, fg=self.defaultbg, text="Selected: ")
168 |         self.lblSelected.pack(side=Tkinter.LEFT, padx=(10, 0))
169 |         self.lblDirectory = Tkinter.Label(frmDirectory, fg="Red", textvariable=self.directory)
170 |         self.lblDirectory.pack(side=Tkinter.LEFT)
171 | 
172 |         """ ABSTRACTION """
173 |         frmAbstraction = Tkinter.Frame(master)
174 |         frmAbstraction.pack(fill=Tkinter.BOTH)
175 | 
176 |         lblfrmAbstraction = Tkinter.LabelFrame(frmAbstraction, text="Select abstraction mode")
177 |         lblfrmAbstraction.pack(fill=Tkinter.BOTH, expand="yes", padx=50, pady=10)
178 | 
179 |         self.absLevel = Tkinter.IntVar()
180 |         R1 = Tkinter.Radiobutton(
181 |             lblfrmAbstraction,
182 |             text="Abstraction OFF: Detect exact clones only",
183 |             variable=self.absLevel,
184 |             value=0,
185 |             command=self.selectAbst
186 |         )
187 |         R2 = Tkinter.Radiobutton(
188 |             lblfrmAbstraction,
189 |             text="Abstraction ON: Detect near-miss (similar) clones, as well as exact clones",
190 |             variable=self.absLevel,
191 |             value=4,
192 |             command=self.selectAbst
193 |         )
194 |         R1.pack(side=Tkinter.LEFT, anchor=Tkinter.W)
195 |         R2.pack(side=Tkinter.RIGHT, anchor=Tkinter.W)
196 | 
197 |         """ GENERATE """
198 |         frmGenerate = Tkinter.Frame(master)
199 |         frmGenerate.pack(fill=Tkinter.BOTH, padx=50, pady=5)
200 |         self.btnGenerate = Tkinter.Button(
201 |             frmGenerate,
202 |             width=10000,
203 |             text="----- Generate hashmark -----",
204 |             state="disabled",
205 |             # command=lambda:self.generate("GUI", "", "")
206 |             command=self.generate
207 |         )
208 |         self.btnGenerate.pack(side=Tkinter.BOTTOM)
209 | 
210 |         """ PROCESS """
211 |         frmProcess = Tkinter.Frame(master)
212 |         frmProcess.pack(fill=Tkinter.X)
213 | 
214 |         scrollbar = Tkinter.Scrollbar(frmProcess)
215 |         scrollbar.pack(side=Tkinter.RIGHT, fill=Tkinter.Y)
216 |         self.listProcess = Tkinter.Listbox(frmProcess, state="disabled", width=600, height=26,
217 |                                            yscrollcommand=scrollbar.set, selectmode=Tkinter.SINGLE)
218 |         # self.listProcess.insert(END, "")
219 |         self.listProcess.pack(side=Tkinter.LEFT, fill=Tkinter.BOTH)
220 |         scrollbar.config(command=self.listProcess.yview)
221 | 
222 |         """ PROGRESSBAR """
223 |         frmPgbar = ttk.Frame(master)
224 |         frmPgbar.pack(expand=True, fill=Tkinter.BOTH, side=Tkinter.TOP)
225 | 
226 |         self.progress = 0
227 |         self.progressbar = ttk.Progressbar(
228 |             frmPgbar,
229 |             orient="horizontal",
230 |             mode="determinate",
231 |             value=self.progress,
232 |             maximum=1
233 |         )
234 |         self.progressbar.pack(expand=True, fill=Tkinter.BOTH, side=Tkinter.TOP)
235 | 
236 |         """ QUIT """
237 |         frmBottom = Tkinter.Frame(master)  # , bd=20)
238 |         frmBottom.pack(fill=Tkinter.BOTH)
239 | 
240 |         self.btnOpenFolder = Tkinter.Button(
241 |             frmBottom,
242 |             width=15,
243 |             text="Open hidx folder",
244 |             state="disabled",
245 |             command=self.openFolder
246 |         )
247 |         self.btnQuit = Tkinter.Button(
248 |             frmBottom,
249 |             width=15,
250 |             text="QUIT",
251 |             command=frmBottom.quit
252 |         )
253 |         self.btnOpenFolder.pack(side=Tkinter.LEFT, padx=50)
254 |         self.btnQuit.pack(side=Tkinter.RIGHT, padx=50, pady=15)
255 | 
256 |     def openFolder(self):
257 |         path = os.path.join(os.getcwd(), "hidx")
258 |         if osName == "win":
259 |             subprocess.Popen(
260 |                 ["explorer", "/select,", path],
261 |                 stdout=subprocess.PIPE,
262 |                 stderr=subprocess.PIPE
263 |             )
264 |         elif osName == "linux":
265 |             subprocess.Popen(
266 |                 ["xdg-open", path],
267 |                 stdout=subprocess.PIPE,
268 |                 stderr=subprocess.PIPE
269 |             )
270 |         elif osName == "osx":
271 |             subprocess.Popen(
272 |                 ["open", "-R", path],
273 |                 stdout=subprocess.PIPE,
274 |                 stderr=subprocess.PIPE
275 |             )
276 | 
277 |     def generate(self):
278 |         directory = self.directory.get()
279 |         absLevel = int(self.absLevel.get())
280 |         self.progress = 0
281 | 
282 |         proj = directory.replace('\\', '/').split('/')[-1]
283 |         timeIn = time.time()
284 |         numFile = 0
285 |         numFunc = 0
286 |         numLine = 0
287 | 
288 |         projDic = {}
289 |         hashFileMap = {}
290 | 
291 |         self.listProcess.config(state="normal")
292 |         self.listProcess.insert(Tkinter.END,
293 |                                 "Loading source files... This may take a few minutes."
294 |                                 )
295 |         self.listProcess.update()
296 | 
297 |         fileList = pu.loadSource(directory)
298 |         numFile = len(fileList)
299 | 
300 |         if numFile == 0:
301 |             self.listProcess.insert(Tkinter.END,
302 |                                     "Error: Failed loading source files."
303 |                                     )
304 |             self.listProcess.insert(Tkinter.END,
305 |                                     "- Check if you selected proper directory, or if your project contains .c or .cpp files."
306 |                                     )
307 |         else:
308 |             # self.listProcess.insert(END, "")
309 |             self.listProcess.insert(Tkinter.END,
310 |                                     "Load complete. Generating hashmark..."
311 |                                     )
312 |             # self.listProcess.insert(END, "")
313 |             # self.listProcess.insert(END, "")
314 | 
315 |             if absLevel == 0:
316 |                 func = parseFile_shallow_multi
317 |             else:
318 |                 func = parseFile_deep_multi
319 | 
320 |             cpu_count = get_cpu_count.get_cpu_count()
321 |             if cpu_count != 1:
322 |                 cpu_count -= 1
323 | 
324 |             pool = multiprocessing.Pool(processes=cpu_count)
325 |             for idx, tup in enumerate(pool.imap_unordered(func, fileList)):
326 |                 f = tup[0]
327 | 
328 |                 functionInstanceList = tup[1]
329 |                 pathOnly = f.split(proj, 1)[1][1:]
330 |                 progress = float(idx + 1) / numFile
331 | 
332 |                 self.progressbar["value"] = progress
333 |                 self.progressbar.update()
334 |                 self.listProcess.insert(Tkinter.END, "[+] " + f)
335 |                 self.listProcess.see("end")
336 | 
337 |                 numFunc += len(functionInstanceList)
338 | 
339 |                 if len(functionInstanceList) > 0:
340 |                     numLine += functionInstanceList[0].parentNumLoc
341 | 
342 |                 for f in functionInstanceList:
343 |                     f.removeListDup()
344 |                     path = f.parentFile
345 |                     absBody = pu.abstract(f, absLevel)[1]
346 |                     # self.listProcess.insert(Tkinter.END, absBody)
347 |                     absBody = pu.normalize(absBody)
348 |                     funcLen = len(absBody)
349 | 
350 |                     if funcLen > 50:
351 |                         hashValue = md5(absBody).hexdigest()
352 | 
353 |                         try:
354 |                             projDic[funcLen].append(hashValue)
355 |                         except KeyError:
356 |                             projDic[funcLen] = [hashValue]
357 |                         try:
358 |                             hashFileMap[hashValue].extend([pathOnly, f.funcId])
359 |                         except KeyError:
360 |                             hashFileMap[hashValue] = [pathOnly, f.funcId]
361 |                     else:
362 |                         numFunc -= 1  # decrement numFunc by 1 if funclen is under threshold
363 | 
364 |             self.listProcess.insert(Tkinter.END, "")
365 |             self.listProcess.insert(Tkinter.END, "Hash index successfully generated.")
366 |             self.listProcess.see("end")
367 |             self.listProcess.insert(Tkinter.END, "")
368 |             self.listProcess.see("end")
369 |             self.listProcess.insert(Tkinter.END, "Saving hash index to file...")
370 |             self.listProcess.see("end")
371 | 
372 |             try:
373 |                 os.mkdir("hidx")
374 |             except:
375 |                 pass
376 |             packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str(numFile) + ' ' + str(numFunc) + ' ' + str(
377 |                 numLine) + '\n'
378 |             with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx", 'w') as fp:
379 |                 fp.write(packageInfo)
380 | 
381 |                 for key in sorted(projDic):
382 |                     fp.write(str(key) + '\t')
383 |                     for h in list(set(projDic[key])):
384 |                         fp.write(h + '\t')
385 |                     fp.write('\n')
386 | 
387 |                 fp.write('\n=====\n')
388 | 
389 |                 for key in sorted(hashFileMap):
390 |                     fp.write(str(key) + '\t')
391 |                     for f in hashFileMap[key]:
392 |                         fp.write(str(f) + '\t')
393 |                     fp.write('\n')
394 | 
395 |             timeOut = time.time()
396 | 
397 |             self.listProcess.insert(Tkinter.END, "Done.")
398 |             self.listProcess.see("end")
399 |             self.listProcess.insert(Tkinter.END, "")
400 |             self.listProcess.insert(Tkinter.END, "Elapsed time: %.02f sec." % (timeOut - timeIn))
401 |             self.listProcess.see("end")
402 | 
403 |             self.listProcess.insert(Tkinter.END, "Program statistics:")
404 |             self.listProcess.insert(Tkinter.END, " - " + str(numFile) + ' files;')
405 |             self.listProcess.insert(Tkinter.END, " - " + str(numFunc) + ' functions;')
406 |             self.listProcess.insert(Tkinter.END, " - " + str(numLine) + ' lines of code.')
407 |             self.listProcess.see("end")
408 | 
409 |             self.listProcess.insert(Tkinter.END, "")
410 |             self.listProcess.insert(Tkinter.END,
411 |                                     "Hash index saved to: " + os.getcwd().replace("\\", "/") + "/hidx/hashmark_" + str(
412 |                                         absLevel) + "_" + proj + ".hidx")
413 |             self.listProcess.see("end")
414 |             self.btnOpenFolder.config(state="normal")
415 | 
416 |         return 0
417 | 
418 |     def selectAbst(self):
419 |         selection = str(self.absLevel.get())
420 | 
421 |     def askDirectory(self):
422 |         selectedDirectory = tkFileDialog.askdirectory()
423 |         if len(selectedDirectory) > 1:
424 |             self.lblSelected.config(fg="Black")
425 |             self.lblDirectory.config(fg="Black")
426 |             self.directory.set(selectedDirectory)
427 |             self.btnGenerate.config(state="normal")
428 | 
429 |     def show_about(self):
430 |         top = Tkinter.Toplevel(padx=20, pady=10)
431 |         if osName == "win":  # this only works for windows.
432 |             top.withdraw()  # temporarily hide widget for better UI
433 |         aboutMessage = """
434 | hmark is an hash index generator for vulnerable code clone detection.
435 | 
436 | Developed by Seulbae Kim @CSSA.
437 | https://iotcube.net
438 | cssa@korea.ac.kr
439 | 
440 | """
441 |         msg = Tkinter.Message(top, text=aboutMessage)
442 |         msg.pack()
443 |         btnOkay = Tkinter.Button(top, text="Okay", command=top.destroy)
444 |         btnOkay.pack()
445 | 
446 |         top.update()
447 |         # self.master.update_idletasks()
448 |         topw = top.winfo_reqwidth()  # width of this widget
449 |         toph = top.winfo_reqheight()  # height of this widget
450 |         parentGeo = self.master.geometry().split('+')
451 |         parentX = int(parentGeo[1])  # X coordinate of parent (the main window)
452 |         parentY = int(parentGeo[2])  # Y coordinate of parent
453 | 
454 |         #top.geometry("+%d+%d" % (parentX + self.mainWidth / 2 - topw / 2, parentY + self.mainHeight / 2 - toph / 2))
455 |         top.geometry("+%d+%d" % (parentX + self.mainWidth // 2 - topw // 2, parentY + self.mainHeight // 2 - toph / 2))
456 |         top.resizable(width=False, height=False)
457 |         top.grab_set_global()
458 |         top.title("About hmark...")
459 |         if osName == "win":
460 |             top.deiconify()  # show widget, as its position is set
461 | 
462 |     def show_help(self):
463 |         top = Tkinter.Toplevel(padx=20, pady=10)
464 |         if osName == "win":  # this only works for windows.
465 |             top.withdraw()  # temporarily hide widget
466 | 
467 |         helpMessage = """
468 | 1. Select the root directory of your package under which source code is located.\n
469 | 2. Choose the abstraction mode.
470 | - OFF: hmark detects only exact clones.
471 | - ON:  hmark detects near-miss clones along with exact clones, by tolerating changes in parameter, variable names, types, and names of the called functions.\n
472 | 3. Generate Hashmark.
473 | """
474 |         msg = Tkinter.Message(top, text=helpMessage)
475 |         btnOkay = Tkinter.Button(top, text="Okay", command=top.destroy)
476 |         self.master.update_idletasks()
477 | 
478 |         msg.pack()
479 |         btnOkay.pack()
480 | 
481 |         top.update()
482 |         topw = top.winfo_reqwidth()  # width of this widget
483 |         toph = top.winfo_reqheight()  # height of this widget
484 | 
485 |         parentGeo = self.master.geometry().split('+')
486 |         parentX = int(parentGeo[1])  # width of parent (the main window)
487 |         parentY = int(parentGeo[2])  # height of parent
488 | 
489 |         #top.geometry("+%d+%d" % (parentX + self.mainWidth / 2 - topw / 2, parentY + self.mainHeight / 2 - toph / 2))
490 |         top.geometry("+%d+%d" % (parentX + self.mainWidth // 2 - topw // 2, parentY + self.mainHeight // 2 - toph // 2))
491 |         top.resizable(width=False, height=False)
492 |         top.grab_set_global()
493 |         top.title("Help")
494 |         if osName == "win":
495 |             top.deiconify()  # show widget, as its position is set
496 | 
497 | 
498 | def run_gui():
499 |     global localVersion
500 |     global icon
501 |     global Tkinter
502 |     global tkFileDialog
503 |     global ttk
504 | 
505 |     #import Tkinter
506 |     #import tkFileDialog
507 |     #import ttk
508 |     import tkinter as Tkinter
509 |     from tkinter import filedialog as tkFileDialog
510 |     from tkinter import ttk
511 | 
512 | 
513 |     root = Tkinter.Tk()
514 |     app = App(root)
515 |     root.title("hmark ver " + str(localVersion))
516 | 
517 |     try:  # if icon is available
518 |         icon = resource_path("icon.gif")
519 |         img = Tkinter.PhotoImage(file=icon)
520 |         root.tk.call('wm', 'iconphoto', root._w, img)
521 |     except Tkinter.TclError:  # if, for some reason, icon isn't available
522 |         pass
523 | 
524 |     root.mainloop()
525 | 
526 |     try:
527 |         root.destroy()
528 |         print("Farewell!")
529 |     except Tkinter.TclError:
530 |         print("GUI process terminated.")
531 | 
532 | 
533 | def generate_cli(targetPath, isAbstraction):
534 |     import subprocess
535 |     directory = targetPath.rstrip('/').rstrip("\\")
536 | 
537 |     if isAbstraction.lower() == "on":
538 |         absLevel = 4
539 |     else:
540 |         absLevel = 0
541 | 
542 |     proj = directory.replace('\\', '/').split('/')[-1]
543 |     print("PROJ:", proj)
544 |     timeIn = time.time()
545 |     numFile = 0
546 |     numFunc = 0
547 |     numLine = 0
548 | 
549 |     projDic = {}
550 |     hashFileMap = {}
551 | 
552 |     print("[+] Loading source files... This may take a few minutes.")
553 | 
554 |     fileList = pu.loadSource(directory)
555 |     numFile = len(fileList)
556 | 
557 |     if numFile == 0:
558 |         print("[-] Error: Failed loading source files.")
559 |         print("    Check if you selected proper directory, or if your project contains .c or .cpp files.")
560 |         sys.exit()
561 |     else:
562 |         print ("[+] Load complete. Generating hashmark...")
563 | 
564 |         if absLevel == 0:
565 |             func = parseFile_shallow_multi
566 |         else:
567 |             func = parseFile_deep_multi
568 | 
569 |         cpu_count = get_cpu_count.get_cpu_count()
570 |         if cpu_count != 1:
571 |             cpu_count -= 1
572 | 
573 |         pool = multiprocessing.Pool(processes=cpu_count)
574 |         for idx, tup in enumerate(pool.imap_unordered(func, fileList)):
575 |             f = tup[0]
576 |             functionInstanceList = tup[1]
577 | 
578 |             fullName = proj + f.split(proj, 1)[1]
579 |             pathOnly = f.split(proj, 1)[1][1:]
580 | 
581 |             if osName == "win":
582 |                 columns = 80
583 |             else:
584 |                 try:
585 |                     # http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python
586 |                     #rows, columns = subprocess.check_output(['stty', 'size']).split()
587 |                     rows, columns = subprocess.check_output(['stty', 'size']).decode().split()
588 |                 except:
589 |                     columns = 80
590 | 
591 |             #progress = 100 * float(idx + 1) / numFile
592 |             progress = 100 * (idx + 1) / float(numFile)
593 |             buf = "\r%.2f%% %s" % (progress, fullName)
594 |             buf += " " * (int(columns) - len(buf))
595 |             sys.stdout.write(buf)
596 |             sys.stdout.flush()
597 | 
598 |             numFunc += len(functionInstanceList)
599 | 
600 |             if len(functionInstanceList) > 0:
601 |                 numLine += functionInstanceList[0].parentNumLoc
602 | 
603 |             for f in functionInstanceList:
604 |                 f.removeListDup()
605 |                 path = f.parentFile
606 |                 # print "\nORIGINALLY:", f.funcBody
607 |                 absBody = pu.abstract(f, absLevel)[1]
608 |                 absBody = pu.normalize(absBody)
609 |                 funcLen = len(absBody)
610 |                 # print "\n", funcLen, absBody
611 | 
612 |                 if funcLen > 50:
613 |                     #hashValue = md5(absBody).hexdigest()
614 |                     hashValue = md5(absBody.encode('utf-8')).hexdigest()
615 | 
616 |                     try:
617 |                         projDic[funcLen].append(hashValue)
618 |                     except KeyError:
619 |                         projDic[funcLen] = [hashValue]
620 |                     try:
621 |                         hashFileMap[hashValue].extend([pathOnly, f.funcId])
622 |                     except KeyError:
623 |                         hashFileMap[hashValue] = [pathOnly, f.funcId]
624 |                 else:
625 |                     numFunc -= 1  # decrement numFunc by 1 if funclen is under threshold
626 | 
627 |         print("")
628 |         print("[+] Hash index successfully generated.")
629 |         print("[+] Saving hash index to file..."),
630 | 
631 |         packageInfo = str(localVersion) + ' ' + str(proj) + ' ' + str(numFile) + ' ' + str(numFunc) + ' ' + str(
632 |             numLine) + '\n'
633 |         with open("hidx/hashmark_" + str(absLevel) + "_" + proj + ".hidx", 'w') as fp:
634 |             fp.write(packageInfo)
635 | 
636 |             for key in sorted(projDic):
637 |                 fp.write(str(key) + '\t')
638 |                 for h in list(set(projDic[key])):
639 |                     fp.write(h + '\t')
640 |                 fp.write('\n')
641 | 
642 |             fp.write('\n=====\n')
643 | 
644 |             for key in sorted(hashFileMap):
645 |                 fp.write(str(key) + '\t')
646 |                 for f in hashFileMap[key]:
647 |                     fp.write(str(f) + '\t')
648 |                 fp.write('\n')
649 | 
650 |         timeOut = time.time()
651 | 
652 |         print("(Done)")
653 |         print("")
654 |         print("[+] Elapsed time: %.02f sec." % (timeOut - timeIn))
655 |         print("Program statistics:")
656 |         print(" - " + str(numFile) + ' files;')
657 |         print(" - " + str(numFunc) + ' functions;')
658 |         print(" - " + str(numLine) + ' lines of code.')
659 |         print("")
660 |         print("[+] Hash index saved to: " + os.getcwd().replace("\\", "/") + "/hidx/hashmark_" + str(
661 |             absLevel) + "_" + proj + ".hidx")
662 | 
663 | 
664 | def run_cli(targetPath, isAbstraction):
665 |     generate_cli(targetPath, isAbstraction)
666 |     print("Farewell!")
667 | 
668 | 
669 | def main():
670 |     try:
671 |         os.mkdir("hidx")
672 |     except:
673 |         pass
674 | 
675 |     get_platform()
676 | 
677 |     progStr = "hmark_" + localVersion + "_" + osName
678 |     if osName == "win":
679 |         progStr += "_x" + bits + ".exe"
680 |     elif osName == "linux":
681 |         progStr = "./" + progStr + "_x" + bits
682 |     elif osName == "osx":
683 |         progStr = "./" + progStr
684 | 
685 |     ap = argparse.ArgumentParser(
686 |         prog=progStr
687 |     )
688 | 
689 |     ap.add_argument(
690 |         "-c",
691 |         "--cli-mode",
692 |         dest="cli_mode",
693 |         nargs=2,
694 |         metavar=("path", "ON/OFF"),
695 |         required=False,
696 |         help="run hmark without GUI by specifying the path to the target directory, and the abstraction mode"
697 |     )
698 | 
699 |     ap.add_argument(
700 |         "-n",
701 |         "--no-updatecheck",
702 |         dest="no_update_check",
703 |         action="store_true",
704 |         required=False,
705 |         help="bypass update checking (not recommended)"
706 |     )
707 |     ap.add_argument(
708 |         "-V",
709 |         "--version",
710 |         dest="version",
711 |         action="store_true",
712 |         required=False,
713 |         help="print hmark version and exit"
714 |     )
715 |     args = ap.parse_args()
716 | 
717 |     if args.version:
718 |         versionString = "hmark" + localVersion + " for " + osName
719 |         if osName == "linux" or osName == "win":
720 |             versionString = versionString + " (x" + bits + ")"
721 |         print(versionString)
722 |         sys.exit()
723 | 
724 |     if args.no_update_check:
725 |         print("Bypassed the update checker.")
726 |     else:
727 |         check_update()
728 | 
729 |     if osName == "linux" or osName == "osx":
730 |         try:
731 |             msg = subprocess.check_output("java -version", stderr=subprocess.STDOUT, shell=True)
732 |         except subprocess.CalledProcessError as e:
733 |             print("Java error:", e)
734 |             print("Please try again after installing JDK.")
735 |             sys.exit()
736 | 
737 |     if args.cli_mode:
738 |         if os.path.isdir(args.cli_mode[0]) is False:
739 |             print("[-] Directory does not exist:", args.cli_mode[0])
740 |             print("    Please specify the right directory to your target.")
741 |             sys.exit()
742 | 
743 |         if args.cli_mode[1].isalpha():
744 |             if args.cli_mode[1].lower() == "on" or args.cli_mode[1].lower() == "off":
745 |                 print("Running in CLI mode")
746 |                 print("TARGET: " + args.cli_mode[0])
747 |                 print("ABSTRACTION: " + args.cli_mode[1])
748 |                 run_cli(args.cli_mode[0], args.cli_mode[1])
749 |             else:
750 |                 print("[-] Bad parameter: " + args.cli_mode[1])
751 |                 print("    Accepted values are ON or OFF.")
752 |                 sys.exit()
753 |         else:
754 |             print("[-] Bad parameter: " + args.cli_mode[1])
755 |             print("    Accepted values are ON or OFF.")
756 |             sys.exit()
757 | 
758 |     else:
759 |         print("Running GUI")
760 |         run_gui()
761 | 
762 | 
763 | def resource_path(relative_path):
764 |     """ Get absolute path to resource, works for dev and for PyInstaller """
765 |     try:
766 |         base_path = sys._MEIPASS
767 |     except Exception:
768 |         base_path = os.path.abspath(".")
769 | 
770 |     return os.path.join(base_path, relative_path)
771 | 
772 | 
773 | try:
774 |     # Python 3.4+
775 |     if sys.platform.startswith("win"):
776 |         import multiprocessing.popen_spawn_win32 as forking
777 |     else:
778 |         import multiprocessing.popen_fork as forking
779 | except ImportError:
780 |     import multiprocessing.forking as forking
781 | 
782 | if sys.platform.startswith("win"):
783 |     # First define a modified version of Popen.
784 |     class _Popen(forking.Popen):
785 |         def __init__(self, *args, **kw):
786 |             if hasattr(sys, 'frozen'):
787 |                 # We have to set original _MEIPASS2 value from sys._MEIPASS
788 |                 # to get --onefile mode working.
789 |                 os.putenv('_MEIPASS2', sys._MEIPASS)
790 |             try:
791 |                 super(_Popen, self).__init__(*args, **kw)
792 |             finally:
793 |                 if hasattr(sys, 'frozen'):
794 |                     # On some platforms (e.g. AIX) 'os.unsetenv()' is not
795 |                     # available. In those cases we cannot delete the variable
796 |                     # but only set it to the empty string. The bootloader
797 |                     # can handle this case.
798 |                     if hasattr(os, 'unsetenv'):
799 |                         os.unsetenv('_MEIPASS2')
800 |                     else:
801 |                         os.putenv('_MEIPASS2', '')
802 | 
803 | 
804 |     # Second override 'Popen' class with our modified version.
805 |     forking.Popen = _Popen
806 | 
807 | """ EXECUTE """
808 | if __name__ == "__main__":
809 |     multiprocessing.freeze_support()
810 |     main()
811 | 


--------------------------------------------------------------------------------
/hmark/icon.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/squizz617/vuddy/33cdab1ad04a6dcc76011b92821dbeb055c6691e/hmark/icon.gif


--------------------------------------------------------------------------------
/hmark/icon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/squizz617/vuddy/33cdab1ad04a6dcc76011b92821dbeb055c6691e/hmark/icon.ico


--------------------------------------------------------------------------------
/hmark/parseutility2.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | Parser utility.
  4 | Author: Seulbae Kim
  5 | Created: August 03, 2016
  6 | """
  7 | 
  8 | import os
  9 | import sys
 10 | import subprocess
 11 | import re
 12 | import platform
 13 | 
 14 | 
 15 | def get_platform():
 16 |     global osName
 17 |     global bits
 18 | 
 19 |     pf = platform.platform()
 20 |     bits, _ = platform.architecture()
 21 |     if "Windows" in pf:
 22 |         osName = "win"
 23 |         bits = ""
 24 |     elif "Linux" in pf:
 25 |         osName = "linux"
 26 |         if "64" in bits:
 27 |             bits = "64"
 28 |         else:
 29 |             bits = "86"
 30 |     else:
 31 |         osName = "osx"
 32 |         bits = ""
 33 | 
 34 | 
 35 | def setEnvironment(caller):
 36 |     get_platform()
 37 |     global javaCallCommand
 38 |     if caller == "GUI":
 39 |         # try:
 40 |         #   base_path = sys._MEIPASS
 41 |         # except:
 42 |         #   base_path = os.path.abspath(".")
 43 |         cwd = os.getcwd()
 44 |         if osName == "win":
 45 |             # full_path = os.path.join(base_path, "FuncParser.exe")
 46 |             # javaCallCommand = os.path.join(cwd, "FuncParser-opt.exe ")
 47 |             base_path = os.path.dirname(os.path.abspath(__file__))  # vuddy/hmark root directory
 48 |             javaCallCommand = "\"{0}\" -Xmx1024m -jar \"{1}\" ".format("java", os.path.join(base_path, "FuncParser-opt.jar"))
 49 | 
 50 |         elif osName == "linux" or osName == "osx":
 51 |             # full_path = os.path.join(base_path, "FuncParser.jar")
 52 |             # javaCallCommand = "java -Xmx1024m -jar " + full_path + " "
 53 |             javaCallCommand = "\"{0}\" -Xmx1024m -jar \"{1}\" ".format("java", os.path.join(cwd, "FuncParser-opt.jar"))
 54 | 
 55 |     else:
 56 |         if osName == "win":
 57 |             base_path = os.path.dirname(os.path.abspath(__file__))  # vuddy/hmark root directory
 58 |             # javaCallCommand = os.path.join(base_path, "FuncParser-opt.exe ")
 59 |             javaCallCommand = "\"{0}\" -Xmx1024m -jar \"{1}\" ".format("java", os.path.join(base_path, "FuncParser-opt.jar"))
 60 |         elif osName == "linux" or osName == "osx":
 61 |             base_path = os.path.dirname(os.path.abspath(__file__))  # vuddy/hmark root directory
 62 |             javaCallCommand = "\"{0}\" -Xmx1024m -jar \"{1}\" ".format("java", os.path.join(base_path, "FuncParser-opt.jar"))
 63 | 
 64 | 
 65 | class function:
 66 |     parentFile = None  # Absolute file which has the function
 67 |     parentNumLoc = None  # Number of LoC of the parent file
 68 |     name = None  # Name of the function
 69 |     lines = None  # Tuple (lineFrom, lineTo) that indicates the LoC of function
 70 |     funcId = None  # n, indicating n-th function in the file
 71 |     parameterList = []  # list of parameter variables
 72 |     variableList = []  # list of local variables
 73 |     dataTypeList = []  # list of data types, including user-defined types
 74 |     funcCalleeList = []  # list of called functions' names
 75 |     funcBody = None
 76 | 
 77 |     def __init__(self, fileName):
 78 |         self.parentFile = fileName
 79 |         self.parameterList = []
 80 |         self.variableList = []
 81 |         self.dataTypeList = []
 82 |         self.funcCalleeList = []
 83 | 
 84 |     def removeListDup(self):
 85 |         # for best performance, must execute this method
 86 |         # for every instance before applying the abstraction.
 87 |         self.parameterList = list(set(self.parameterList))
 88 |         self.variableList = list(set(self.variableList))
 89 |         self.dataTypeList = list(set(self.dataTypeList))
 90 |         self.funcCalleeList = list(set(self.funcCalleeList))
 91 | 
 92 |         # def getOriginalFunction(self):
 93 |         #   # returns the original function back from the instance.
 94 |         #   fp = open(self.parentFile, 'r')
 95 |         #   srcFileRaw = fp.readlines()
 96 |         #   fp.close()
 97 |         #   return ''.join(srcFileRaw[self.lines[0]-1:self.lines[1]])
 98 | 
 99 | 
100 | def loadSource(rootDirectory):
101 |     # returns the list of .src files under the specified root directory.
102 |     maxFileSizeInBytes = None
103 |     maxFileSizeInBytes = 2*1024*1024  # remove this line if you don't want to restrict
104 |     # the maximum file size that you process.
105 |     walkList = os.walk(rootDirectory)
106 |     srcFileList = []
107 |     for path, dirs, files in walkList:
108 |         for fileName in files:
109 |             ext = fileName.lower()
110 |             if ext.endswith('.c') or ext.endswith('.cpp') or ext.endswith('.cc') or ext.endswith('.c++') or ext.endswith('.cxx'):
111 |                 absPathWithFileName = path.replace('\\', '/') + '/' + fileName
112 |                 if maxFileSizeInBytes is not None:
113 |                     if os.path.getsize(absPathWithFileName) < maxFileSizeInBytes:
114 |                         srcFileList.append(absPathWithFileName)
115 |                 else:
116 |                     srcFileList.append(absPathWithFileName)
117 |     return srcFileList
118 | 
119 | 
120 | def loadVul(rootDirectory):
121 |     # returns the list of .vul files under the specified root directory.
122 |     maxFileSizeInBytes = None
123 |     # maxFileSizeInBytes = 2097152  # remove this line if you don't want to restrict
124 |     # the maximum file size that you process.
125 |     walkList = os.walk(rootDirectory)
126 |     srcFileList = []
127 |     for path, dirs, files in walkList:
128 |         for fileName in files:
129 |             if fileName.endswith('OLD.vul'):
130 |                 absPathWithFileName = path.replace('\\', '/') + '/' + fileName
131 |                 if maxFileSizeInBytes is not None:
132 |                     if os.path.getsize(absPathWithFileName) < maxFileSizeInBytes:
133 |                         srcFileList.append(absPathWithFileName)
134 |                 else:
135 |                     srcFileList.append(absPathWithFileName)
136 |     return srcFileList
137 | 
138 | 
139 | def removeComment(string):
140 |     # Code for removing C/C++ style comments. (Imported from ReDeBug.)
141 |     c_regex = re.compile(
142 |         r'(?P<comment>//.*?$|[{}]+)|(?P<multilinecomment>/\*.*?\*/)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"]*)',
143 |         re.DOTALL | re.MULTILINE)
144 |     #return ''.join([c.group('noncomment') for c in c_regex.finditer(string) if c.group('noncomment')])
145 |     return ''.join([c.group('noncomment') for c in c_regex.finditer(string.decode('latin-1')) if c.group('noncomment')])
146 | 
147 | # def getBody(originalFunction):
148 | #   # returns the function's body as a string.
149 | #   return originalFunction[originalFunction.find('{')+1:originalFunction.rfind('}')]
150 | 
151 | 
152 | def normalize(string):
153 |     # Code for normalizing the input string.
154 |     # LF and TAB literals, curly braces, and spaces are removed,
155 |     # and all characters are lowercased.
156 |     return ''.join(string.replace('\n', '').replace('\r', '').replace('\t', '').replace('{', '').replace('}', '').split(
157 |         ' ')).lower()
158 | 
159 | 
160 | def abstract(instance, level):
161 |     # Applies abstraction on the function instance,
162 |     # and then returns a tuple consisting of the original body and abstracted body.
163 |     originalFunctionBody = instance.funcBody
164 |     # print "==================="
165 |     originalFunctionBody = removeComment(originalFunctionBody)
166 |     # print originalFunctionBody
167 |     # print '===================================================='
168 |     if int(level) >= 0:  # No abstraction.
169 |         abstractBody = originalFunctionBody
170 | 
171 |     if int(level) >= 1:  # PARAM
172 |         parameterList = instance.parameterList
173 |         for param in parameterList:
174 |             if len(param) == 0:
175 |                 continue
176 |             try:
177 |                 paramPattern = re.compile("(^|\\W)" + param + "(\\W)")
178 |                 abstractBody = paramPattern.sub("\\g<1>FPARAM\\g<2>", abstractBody)
179 |             except:
180 |                 pass
181 | 
182 |     if int(level) >= 2:  # DTYPE
183 |         dataTypeList = instance.dataTypeList
184 |         for dtype in dataTypeList:
185 |             if len(dtype) == 0:
186 |                 continue
187 |             try:
188 |                 dtypePattern = re.compile("(^|\\W)" + dtype + "(\\W)")
189 |                 abstractBody = dtypePattern.sub("\\g<1>DTYPE\\g<2>", abstractBody)
190 |             except:
191 |                 pass
192 | 
193 |     if int(level) >= 3:  # LVAR
194 |         variableList = instance.variableList
195 |         for lvar in variableList:
196 |             if len(lvar) == 0:
197 |                 continue
198 |             try:
199 |                 lvarPattern = re.compile("(^|\\W)" + lvar + "(\\W)")
200 |                 abstractBody = lvarPattern.sub("\\g<1>LVAR\\g<2>", abstractBody)
201 |             except:
202 |                 pass
203 | 
204 |     if int(level) >= 4:  # FUNCCALL
205 |         funcCalleeList = instance.funcCalleeList
206 |         for fcall in funcCalleeList:
207 |             if len(fcall) == 0:
208 |                 continue
209 |             try:
210 |                 fcallPattern = re.compile("(^|\\W)" + fcall + "(\\W)")
211 |                 abstractBody = fcallPattern.sub("\\g<1>FUNCCALL\\g<2>", abstractBody)
212 |             except:
213 |                 pass
214 | 
215 |     return (originalFunctionBody, abstractBody)
216 | 
217 | 
218 | delimiter = "\r\0?\r?\0\r"
219 | 
220 | 
221 | def parseFile_shallow(srcFileName, caller):
222 |     # this does not parse body.
223 |     global javaCallCommand
224 |     global delimiter
225 |     setEnvironment(caller)
226 |     javaCallCommand += "\"" + srcFileName + "\" 0"
227 |     functionInstanceList = []
228 |     try:
229 |         astString = subprocess.check_output(javaCallCommand, stderr=subprocess.STDOUT, shell=True)
230 |     except subprocess.CalledProcessError as e:
231 |         #print "Parser Error:", e
232 |         print("Parser Error:", e)
233 |         astString = ""
234 |     #funcList = astString.split(delimiter)
235 |     funcList = astString.split(delimiter.encode('utf-8'))
236 |     for func in funcList[1:]:
237 |         functionInstance = function(srcFileName)
238 |         #elemsList = func.split('\n')[1:-1]
239 |         elemsList = func.split(b'\n')[1:-1]
240 |         if len(elemsList) > 9:
241 |             functionInstance.parentNumLoc = int(elemsList[1])
242 |             functionInstance.name = elemsList[2]
243 |             #functionInstance.lines = (int(elemsList[3].split('\t')[0]), int(elemsList[3].split('\t')[1]))
244 |             functionInstance.lines = (int(elemsList[3].split(b'\t')[0]), int(elemsList[3].split(b'\t')[1]))
245 |             functionInstance.funcId = int(elemsList[4])
246 |             #functionInstance.funcBody = '\n'.join(elemsList[9:])
247 |             functionInstance.funcBody = b'\n'.join(elemsList[9:])
248 |             # print functionInstance.funcBody
249 |             # print "-------------------"
250 | 
251 |             functionInstanceList.append(functionInstance)
252 | 
253 |     return functionInstanceList
254 | 
255 | 
256 | def parseFile_deep(srcFileName, caller):
257 |     global javaCallCommand
258 |     global delimiter
259 |     setEnvironment(caller)
260 |     # this parses function definition plus body.
261 |     javaCallCommand += "\"" + srcFileName + "\" 1"
262 |     functionInstanceList = []
263 | 
264 |     try:
265 |         astString = subprocess.check_output(javaCallCommand, stderr=subprocess.STDOUT, shell=True)
266 |     except subprocess.CalledProcessError as e:
267 |         print("Parser Error:", e)
268 |         astString = ""
269 | 
270 |     funcList = astString.split(delimiter)
271 |     for func in funcList[1:]:
272 |         functionInstance = function(srcFileName)
273 | 
274 |         elemsList = func.split('\n')[1:-1]
275 |         # print elemsList
276 |         if len(elemsList) > 9:
277 |             functionInstance.parentNumLoc = int(elemsList[1])
278 |             functionInstance.name = elemsList[2]
279 |             functionInstance.lines = (int(elemsList[3].split('\t')[0]), int(elemsList[3].split('\t')[1]))
280 |             functionInstance.funcId = int(elemsList[4])
281 |             functionInstance.parameterList = elemsList[5].rstrip().split('\t')
282 |             functionInstance.variableList = elemsList[6].rstrip().split('\t')
283 |             functionInstance.dataTypeList = elemsList[7].rstrip().split('\t')
284 |             functionInstance.funcCalleeList = elemsList[8].rstrip().split('\t')
285 |             functionInstance.funcBody = '\n'.join(elemsList[9:])
286 |             # print '\n'.join(elemsList[9:])
287 |             functionInstanceList.append(functionInstance)
288 | 
289 |     return functionInstanceList
290 | 


--------------------------------------------------------------------------------
/hmark/spec_generator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import platform
  3 | import version
  4 | 
  5 | pf = platform.platform()
  6 | bits, _ = platform.architecture()
  7 | 
  8 | if 'Windows' in pf:
  9 |     osName = "win"
 10 |     if "64" in bits:
 11 |         bits = "_x64"
 12 |     else:
 13 |         bits = "_x86"
 14 | elif 'Linux' in pf:
 15 |     osName = 'linux'
 16 |     if "64" in bits:
 17 |         bits = "_x64"
 18 |     else:
 19 |         bits = "_x86"
 20 | else:
 21 |     osName = "osx"
 22 |     bits = ""
 23 | 
 24 | # if '64' in bits:
 25 | # 	bits = 'x64'
 26 | # else:
 27 | # 	bits = 'x86'
 28 | 
 29 | # if osName == 'OSX':
 30 | # 	bits = ''
 31 | 
 32 | version = version.version
 33 | 
 34 | fp = open("hmark_" + version + '_' + osName + bits + ".spec", "w")
 35 | cwd = os.getcwd()
 36 | if osName == "linux":
 37 |     fp.write("\
 38 | # -*- mode: python -*-\n\n\
 39 | block_cipher = None\n\n\n\
 40 | a = Analysis(['hmark.py'],\n\
 41 |              pathex=[r'" + cwd + "'],\n\
 42 |              binaries=None,\n\
 43 |              datas=None,\n\
 44 |              hiddenimports=[],\n\
 45 |              hookspath=[],\n\
 46 |              runtime_hooks=[],\n\
 47 |              excludes=[],\n\
 48 |              win_no_prefer_redirects=False,\n\
 49 |              win_private_assemblies=False,\n\
 50 |              cipher=block_cipher)\n\
 51 | a.datas += [('icon.gif', r'" + os.path.join(cwd, 'icon.gif') + "', 'DATA')]\n\
 52 | pyz = PYZ(a.pure, a.zipped_data,\n\
 53 |              cipher=block_cipher)\n\
 54 | exe = EXE(pyz,\n\
 55 |           a.scripts,\n\
 56 |           a.binaries,\n\
 57 |           a.zipfiles,\n\
 58 |           a.datas,\n\
 59 |           name='hmark_" + version + "_" + osName + bits + "',\n\
 60 |           debug=False,\n\
 61 |           strip=False,\n\
 62 |           upx=True,\n\
 63 |           console=True )\n\
 64 | """)
 65 | 
 66 | elif osName == "osx":
 67 |     fp.write("\
 68 | # -*- mode: python -*-\n\n\
 69 | block_cipher = None\n\n\n\
 70 | a = Analysis(['hmark.py'],\n\
 71 |              pathex=[r'" + cwd + "'],\n\
 72 |              binaries=None,\n\
 73 |              datas=None,\n\
 74 |              hiddenimports=[],\n\
 75 |              hookspath=[],\n\
 76 |              runtime_hooks=[],\n\
 77 |              excludes=[],\n\
 78 |              win_no_prefer_redirects=False,\n\
 79 |              win_private_assemblies=False,\n\
 80 |              cipher=block_cipher)\n\
 81 | a.datas += [('icon.gif', r'" + os.path.join(cwd, 'icon.gif') + "', 'DATA')]\n\
 82 | pyz = PYZ(a.pure, a.zipped_data,\n\
 83 |              cipher=block_cipher)\n\
 84 | exe = EXE(pyz,\n\
 85 |           a.scripts,\n\
 86 |           a.binaries,\n\
 87 |           a.zipfiles,\n\
 88 |           a.datas,\n\
 89 |           name='hmark_" + version + "_" + osName + "',\n\
 90 |           debug=False,\n\
 91 |           strip=False,\n\
 92 |           upx=True,\n\
 93 |           console=True )\n\
 94 | """)
 95 | 
 96 | elif osName == "win":
 97 |     fp.write("\
 98 | # -*- mode: python -*-\n\n\
 99 | block_cipher = None\n\n\n\
100 | a = Analysis(['hmark.py'],\n\
101 |              pathex=[r'" + cwd + "'],\n\
102 |              binaries=None,\n\
103 |              datas=None,\n\
104 |              hiddenimports=[],\n\
105 |              hookspath=[],\n\
106 |              runtime_hooks=[],\n\
107 |              excludes=[],\n\
108 |              win_no_prefer_redirects=False,\n\
109 |              win_private_assemblies=False,\n\
110 |              cipher=block_cipher)\n\
111 | a.datas += [('icon.gif', r'" + os.path.join(cwd, 'icon.gif') + "', 'DATA')]\n\
112 | pyz = PYZ(a.pure, a.zipped_data,\n\
113 |              cipher=block_cipher)\n\
114 | exe = EXE(pyz,\n\
115 |           a.scripts,\n\
116 |           a.binaries,\n\
117 |           a.zipfiles,\n\
118 |           a.datas,\n\
119 |           name='hmark_" + version + "_" + osName + bits + "',\n\
120 |           debug=False,\n\
121 |           strip=False,\n\
122 |           upx=True,\n\
123 |           console=True,\n\
124 |           icon='icon.ico')\
125 | """)
126 | 
127 | fp.close()
128 | print "Pyinstaller spec file generated: " + "hmark_" + version + '_' + osName + bits + ".spec"
129 | 


--------------------------------------------------------------------------------
/hmark/version.py:
--------------------------------------------------------------------------------
1 | version = "3.1.0"
2 | 


--------------------------------------------------------------------------------
/initialize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import platform
 4 | # cveXmlDownloader.py 파일이 있는 경로를 추가
 5 | sys.path.append(r'E:\jnu\vuddy-demo\vulnDBGen\tools\cvedatagen')
 6 | 
 7 | originalDir = os.path.dirname(os.path.abspath(__file__))  # vuddy root directory
 8 | pf = platform.platform()
 9 | 
10 | try:
11 |     import tools.cvedatagen.cveXmlDownloader as Downloader
12 | except ImportError:
13 |     import cveXmlDownloader as Downloader
14 | try:
15 |     import tools.cvedatagen.cveXmlParser as Parser
16 | except ImportError:
17 |     import cveXmlParser as Parser
18 | try:
19 |     import tools.cvedatagen.cveXmlUpdater as Updater
20 | except ImportError:
21 |     import cveXmlUpdater as Updater
22 | 
23 | import tools.cvedatagen.common as common
24 | 
25 | 
26 | def main():
27 |     print("Making directories...")
28 |     dataDir = os.path.join(originalDir, "data", "repolists")
29 |     if os.path.exists(dataDir) is False:
30 |         os.makedirs(dataDir)
31 |     diffDir = os.path.join(originalDir, "diff")
32 |     if os.path.exists(diffDir) is False:
33 |         os.makedirs(diffDir)
34 |     vulDir = os.path.join(originalDir, "vul")
35 |     if os.path.exists(vulDir) is False:
36 |         os.makedirs(vulDir)
37 |     hidxDir = os.path.join(originalDir, "hidx")
38 |     if os.path.exists(hidxDir) is False:
39 |         os.makedirs(hidxDir)
40 | 
41 | 
42 |     print("Running CVE data generator...")
43 | 
44 |     os.chdir(os.path.join(originalDir, "data")) 
45 |     if "cvedata.pkl" not in os.listdir("./"):
46 |         print("cvedata.pkl not found. Proceeding to download..")
47 |         print("[+] cveXmlDownloader")
48 |         Downloader.process()
49 | 
50 |         print("[+] cveXmlParser")
51 |         Parser.process()
52 |     else:
53 |         print("cvedata.pkl found. Omitting download..")
54 | 
55 |     print("[+] cveXmlUpdater")
56 |     Updater.process()
57 | 
58 |     os.chdir(originalDir)
59 |     print("cvedata.pkl is now up-to-date.\n")
60 |     
61 | 
62 |     if "Windows" in pf:  # Windows
63 |         if os.path.exists(os.path.join(originalDir, "tools", "FuncParser-opt.exe")) is False:
64 |             print("Downloading function parser for Windows...")
65 |             os.chdir(os.path.join(originalDir, "tools"))
66 |             url = "https://github.com/squizz617/FuncParser-opt/raw/master/FuncParser-opt.zip"
67 |             fileName = "FuncParser-opt.zip"
68 |             common.download_url(url, fileName)
69 |             common.unzip(fileName)
70 |             os.remove(fileName)
71 | 
72 |     print("*** Please modify config.py before running scripts in src/ ***")
73 | 
74 | if __name__ == '__main__':
75 |     main()
76 | 


--------------------------------------------------------------------------------
/paper/SNP17.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/squizz617/vuddy/33cdab1ad04a6dcc76011b92821dbeb055c6691e/paper/SNP17.pdf


--------------------------------------------------------------------------------
/src/get_cvepatch_from_git.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | import subprocess
  5 | import re
  6 | import time
  7 | import argparse
  8 | import sys
  9 | import platform
 10 | import multiprocessing as mp
 11 | from functools import partial
 12 | 
 13 | try:
 14 |     import cPickle as pickle
 15 | except ImportError:
 16 |     import pickle
 17 | 
 18 | # Import from parent directory
 19 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 20 | import config
 21 | 
 22 | 
 23 | class InfoStruct:
 24 |     RepoName = ''  # repository name
 25 |     OriginalDir = ''  # vuddy root directory
 26 |     DiffDir = ''
 27 |     MultimodeFlag = 0
 28 |     MultiRepoList = []
 29 |     GitBinary = config.gitBinary
 30 |     GitStoragePath = config.gitStoragePath
 31 |     CveDict = {}
 32 |     keyword = "CVE-20"
 33 |     cveID = None 
 34 |     DebugMode = False
 35 | 
 36 |     def __init__(self, originalDir, CveDataPath):
 37 |         self.OriginalDir = originalDir
 38 |         self.DiffDir = os.path.join(originalDir, 'diff')
 39 |         with open(CveDataPath, "rb") as f:
 40 |             self.CveDict = pickle.load(f)
 41 | 
 42 | 
 43 | """ GLOBALS """
 44 | originalDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  # vuddy root directory
 45 | cveDataPath = os.path.join(originalDir, "data", "cvedata.pkl")
 46 | info = InfoStruct(originalDir, cveDataPath)  # first three arg is dummy for now
 47 | printLock = mp.Lock()
 48 | 
 49 | 
 50 | """ FUNCTIONS """
 51 | def parse_argument():
 52 |     global info
 53 | 
 54 |     parser = argparse.ArgumentParser(prog='get_cvepatch_from_git.py')
 55 |     parser.add_argument('REPO',
 56 |                         help='''Repository name''')
 57 |     parser.add_argument('-m', '--multimode', action="store_true",
 58 |                         help='''Turn on Multimode''')
 59 |     parser.add_argument('-k', '--keyword',
 60 |                         help="Keyword to GREP, default: CVE-20", default="CVE-20")
 61 |     parser.add_argument('-c', '--cveid', help="CVE id to assign (Only when doing manual keyword search)")
 62 |     parser.add_argument('-d', '--debug', action="store_true", help=argparse.SUPPRESS)  # Hidden Debug Mode
 63 |  
 64 |     args = parser.parse_args()
 65 | 
 66 |     info.RepoName = args.REPO
 67 |     info.keyword = args.keyword
 68 |     info.cveID = args.cveid
 69 |     info.MultimodeFlag = 0
 70 |     info.MultiRepoList = []
 71 |     if args.multimode:
 72 |         info.MultimodeFlag = 1
 73 |         if "Windows" in platform.platform():
 74 |             with open(os.path.join(originalDir, 'data', 'repolists', 'list_' + info.RepoName)) as fp:
 75 |                 for repoLine in fp.readlines():
 76 |                     if len(repoLine) > 2:
 77 |                         info.MultiRepoList.append(repoLine.rstrip())
 78 |         else:
 79 |             repoBaseDir = os.path.join(info.GitStoragePath, info.RepoName)
 80 |             command_find = "find " + repoBaseDir + " -type d -exec test -e '{}/.git' ';' -print -prune"
 81 |             findOutput = subprocess.check_output(command_find, shell=True)
 82 |             info.MultiRepoList = findOutput.replace(repoBaseDir + "/", "").rstrip().split("\n")
 83 |     if args.debug:
 84 |         info.DebugMode = True
 85 | 
 86 | 
 87 | def init():
 88 |     global info
 89 | 
 90 |     parse_argument()
 91 | 
 92 |     print("Retrieving CVE patch from", info.RepoName)
 93 |     print("Multi-repo mode:"),
 94 |     if info.MultimodeFlag:
 95 |         print("ON.")
 96 |     else:
 97 |         print("OFF.")
 98 | 
 99 |     print("Initializing..."),
100 | 
101 |     try:
102 |         os.makedirs(os.path.join(info.DiffDir, info.RepoName))
103 |     except OSError:
104 |         pass
105 | 
106 |     print("Done.")
107 | 
108 | 
109 | def callGitLog(gitDir):
110 |     global info
111 |     """
112 |     Collect CVE commit log from repository
113 |     :param gitDir: repository path
114 |     :return:
115 |     """
116 |     # print "Calling git log...",
117 |     commitsList = []
118 |     gitLogOutput = ""
119 |     command_log = "\"{0}\" --no-pager log --all --pretty=fuller --grep=\"{1}\"".format(info.GitBinary, info.keyword)
120 |     print(gitDir)
121 |     os.chdir(gitDir)
122 |     try:
123 |         try:
124 |             gitLogOutput = subprocess.check_output(command_log, shell=True)
125 |             #commitsList = re.split('[\n](?=commit\s\w{40}\nAuthor:\s)|[\n](?=commit\s\w{40}\nMerge:\s)', gitLogOutput)
126 |             commitsList = re.split(b'[\n](?=commit\s\w{40}\nAuthor:\s)|[\n](?=commit\s\w{40}\nMerge:\s)', gitLogOutput)
127 |         except subprocess.CalledProcessError as e:
128 |             print("[-] Git log error:", e)
129 |     except UnicodeDecodeError as err:
130 |         print("[-] Unicode error:", err)
131 | 
132 |     # print "Done."
133 |     return commitsList
134 | 
135 | 
136 | def filterCommitMessage(commitMessage):
137 |     #추가
138 |     commitMessage = commitMessage.decode('utf-8')
139 |     """
140 |     Filter false positive commits 
141 |     Will remove 'Merge', 'Revert', 'Upgrade' commit log
142 |     :param commitMessage: commit message
143 |     :return: 
144 |     """
145 |     filterKeywordList = ["merge", "revert", "upgrade"]
146 |     matchCnt = 0
147 |     for kwd in filterKeywordList:
148 |         keywordPattern = r"\W" + kwd + r"\W|\W" + kwd + r"s\W"
149 |         compiledKeyworddPattern = re.compile(keywordPattern)
150 |         match = compiledKeyworddPattern.search(commitMessage.lower())
151 | 
152 |         # bug fixed.. now revert and upgrade commits will be filtered out.
153 |         if match:
154 |             matchCnt += 1
155 | 
156 |     if matchCnt > 0:
157 |         return 1
158 |     else:
159 |         return 0 
160 | 
161 | 
162 | def callGitShow(gitBinary, commitHashValue):
163 |     """
164 |     Grep data of git show
165 |     :param commitHashValue: 
166 |     :return: 
167 |     """
168 |     # print "Calling git show...",
169 |     command_show = "\"{0}\" show --pretty=fuller {1}".format(gitBinary, commitHashValue)
170 | 
171 |     gitShowOutput = ''
172 |     try:
173 |         gitShowOutput = subprocess.check_output(command_show, shell=True)
174 |     except subprocess.CalledProcessError as e:
175 |         print("error:", e)
176 | 
177 |     # print "Done."
178 |     return gitShowOutput
179 | 
180 | 
181 | def updateCveInfo(cveDict, cveId):
182 |     """
183 |     Get CVSS score and CWE id from CVE id
184 |     :param cveId: 
185 |     :return: 
186 |     """
187 |     # print "Updating CVE metadata...",
188 |     cvss = "0.0"
189 |     try:
190 |         cvss = str(cveDict[cveId][0])
191 |     except:
192 |         cvss = "0.0"
193 | 
194 |     cwe = "CWE-000"
195 |     try:
196 |         cwe = cveDict[cveId][1]
197 |     except:
198 |         cwe = "CWE-000"
199 |     if "NVD-CWE-noinfo" in cwe:
200 |         cwe = "CWE-000"
201 |     else:
202 |         cweNum = cwe.split('-')[1].zfill(3)
203 |         cwe = "CWE-" + str(cweNum)
204 | 
205 |     # print "Done."
206 |     return cveId + '_' + cvss + '_' + cwe + '_'
207 | 
208 | 
209 | def process(commitsList, subRepoName):
210 |     global info
211 | 
212 |     flag = 0
213 |     if len(commitsList) > 0 and commitsList[0] == '':
214 |         flag = 1
215 |         print("No commit in", info.RepoName),
216 |     else:
217 |         print(len(commitsList), "commits in", info.RepoName),
218 |     if subRepoName is None:
219 |         print("\n")
220 |     else:
221 |         print(subRepoName)
222 |         os.chdir(os.path.join(info.GitStoragePath, info.RepoName, subRepoName))
223 | 
224 |     if flag:
225 |         return
226 | 
227 |     if info.DebugMode or "Windows" in platform.platform():
228 |         # Windows - do not use multiprocessing
229 |         # Using multiprocessing will lower performance
230 |         for commitMessage in commitsList:
231 |             parallel_process(subRepoName, commitMessage)
232 |     else:  # POSIX - use multiprocessing
233 |         pool = mp.Pool()
234 |         parallel_partial = partial(parallel_process, subRepoName)
235 |         pool.map(parallel_partial, commitsList)
236 |         pool.close()
237 |         pool.join()
238 | 
239 | 
240 | def parallel_process(subRepoName, commitMessage):
241 |     global info
242 |     global printLock
243 | 
244 |     if filterCommitMessage(commitMessage):
245 |         return
246 |     else:
247 |         commitHashValue = commitMessage[7:47]
248 |         # 바이트 문자열인 경우 UTF-8로 변환
249 |         if isinstance(commitHashValue, bytes):
250 |             commitHashValue = commitHashValue.decode('utf-8')
251 |         #추가
252 |         commitMessage = commitMessage.decode('utf-8')
253 |         cvePattern = re.compile('CVE-20\d{2}-\d{4,7}')  # note: CVE id can now be 7 digit numbers
254 |         cveIdList = list(set(cvePattern.findall(commitMessage)))
255 |         
256 |         """    
257 |         Note: Aug 5, 2016
258 |         If multiple CVE ids are assigned to one commit,
259 |         store the dependency in a file which is named after
260 |         the repo, (e.g., ~/diff/dependency_ubuntu)    and use
261 |         one representative CVE that has the smallest ID number
262 |         for filename. 
263 |         A sample:
264 |         CVE-2014-6416_2e9466c84e5beee964e1898dd1f37c3509fa8853    CVE-2014-6418_CVE-2014-6417_CVE-2014-6416_
265 |         """
266 | 
267 |         if len(cveIdList) > 1:  # do this only if muliple CVEs are assigned to a commit
268 |             dependency = os.path.join(info.DiffDir, "dependency_" + info.RepoName)
269 |             with open(dependency, "a") as fp:
270 |                 cveIdFull = ""
271 |                 minCve = ""
272 |                 minimum = 9999999
273 |                 for cveId in cveIdList:
274 |                     idDigits = int(cveId.split('-')[2])
275 |                     cveIdFull += cveId + '_'
276 |                     if minimum > idDigits:
277 |                         minimum = idDigits
278 |                         minCve = cveId
279 |                 #세 줄 추가
280 |                 minCve = minCve.decode('utf-8') if isinstance(minCve, bytes) else minCve
281 |                 cveIdFull = cveIdFull.decode('utf-8') if isinstance(cveIdFull, bytes) else cveIdFull
282 |                 fp.write(str(minCve + '_' + commitHashValue + '\t' + cveIdFull + '\n'))
283 |         elif len(cveIdList) == 0:
284 |             if info.cveID is None:
285 |                 return
286 |             else:
287 |                 minCve = info.cveID  # when CVE ID is given manually through command line argument
288 |         else:
289 |             minCve = cveIdList[0]
290 | 
291 |         #추가
292 |         #git_command = f"git show --pretty=fuller {commitHashValue}"
293 |         gitShowOutput = callGitShow(info.GitBinary, commitHashValue)
294 |         # gitShowOutput이 바이트 문자열인 경우 문자열로 변환
295 |         if isinstance(gitShowOutput, bytes):
296 |             gitShowOutput = gitShowOutput.decode('latin-1') # latin-1 만 가능
297 | 
298 |         finalFileName = updateCveInfo(info.CveDict, minCve)
299 | 
300 |         diffFileName = "{0}{1}.diff".format(finalFileName, commitHashValue)
301 |         try:
302 |             #with open(os.path.join(info.DiffDir, info.RepoName, diffFileName), "w") as fp:
303 |             with open(os.path.join(info.DiffDir, info.RepoName, diffFileName), "w", encoding="utf-8") as fp:
304 |                 if subRepoName is None:
305 |                     fp.write(gitShowOutput)
306 |                 else:  # multi-repo mode
307 |                     fp.write(subRepoName + '\n' + gitShowOutput)
308 |             with printLock:
309 |                 print("[+] Writing {0} Done.".format(diffFileName))
310 |         except IOError as e:
311 |             with printLock:
312 |                 print("[+] Writing {0} Error:".format(diffFileName), e)
313 | 
314 | 
315 | def main():
316 |     global info
317 | 
318 |     t1 = time.time()
319 |     init()
320 |     if info.MultimodeFlag:
321 |         for sidx, subRepoName in enumerate(info.MultiRepoList):
322 |             gitDir = os.path.join(info.GitStoragePath, info.RepoName, subRepoName)  # where .git exists
323 |             commitsList = callGitLog(gitDir)
324 |             print(os.path.join(str(sidx + 1), str(len(info.MultiRepoList))))
325 |             if 0 < len(commitsList):
326 |                 process(commitsList, subRepoName)
327 |     else:
328 |         gitDir = os.path.join(info.GitStoragePath, info.RepoName)  # where .git exists
329 |         commitsList = callGitLog(gitDir)
330 |         process(commitsList, None)
331 | 
332 |     repoDiffDir = os.path.join(info.DiffDir, info.RepoName)
333 |     print(str(len(os.listdir(repoDiffDir))) + " patches saved in", repoDiffDir)
334 |     print("Done. (" + str(time.time() - t1) + " sec)")
335 | 
336 | 
337 | if __name__ == '__main__':
338 |     mp.freeze_support()
339 |     main()
340 | 


--------------------------------------------------------------------------------
/src/get_source_from_cvepatch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | import sys
  5 | import re
  6 | import glob
  7 | import argparse
  8 | import multiprocessing as mp
  9 | from functools import partial
 10 | import platform
 11 | import time
 12 | # Import from parent directory
 13 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 14 | try:  # for backward-compatibility (in the main repository)
 15 |     import hmark.parseutility as parseutility
 16 | except ImportError:  # for subrepo
 17 |     import tools.parseutility as parseutility
 18 | 
 19 | import config
 20 | 
 21 | # GLOBALS
 22 | originalDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  # vuddy root directory
 23 | diffDir = os.path.join(originalDir, "diff")
 24 | # resultList = []
 25 | dummyFunction = parseutility.function(None)
 26 | multimodeFlag = 0
 27 | debugMode = False
 28 | 
 29 | parseutility.setEnvironment("")
 30 | 
 31 | t1 = time.time()
 32 | 
 33 | """ re patterns """
 34 | pat_src = '[\n](?=diff --git a/)'
 35 | pat_chunk = '[\n](?=@@\s[^a-zA-Z]*\s[^a-zA-Z]*\s@@)'
 36 | pat_linenum = r"-(\d+,\d+) \+(\d+,\d+) "
 37 | pat_linenum = re.compile(pat_linenum)
 38 | 
 39 | 
 40 | def init():
 41 |     # ARGUMENTS
 42 |     global repoName
 43 |     global multimodeFlag
 44 |     global total
 45 |     global debugMode
 46 |     
 47 |     parser = argparse.ArgumentParser()
 48 |     parser.add_argument('REPO',
 49 |                         help='''Repository name''')
 50 |     parser.add_argument('-m', '--multimode', action="store_true",
 51 |                         help='''Turn on Multimode''')
 52 |     parser.add_argument('-d', '--debug', action="store_true", help=argparse.SUPPRESS)  # Hidden Debug Mode
 53 | 
 54 |     args = parser.parse_args()
 55 | 
 56 |     if args.REPO is None:
 57 |         parser.print_help()
 58 |         exit()
 59 |     repoName = args.REPO  # name of the directory that holds DIFF patches
 60 |     if args.multimode:
 61 |         multimodeFlag = 1
 62 |     if args.debug:
 63 |         debugMode = True
 64 | 
 65 |     msg = "Retrieve vulnerable functions from {0}\nMulti-repo mode: ".format(repoName)
 66 |     if multimodeFlag:
 67 |         print(msg + "On")
 68 |     else:
 69 |         print(msg + "Off")
 70 | 
 71 |     # try making missing directories
 72 |     try:
 73 |         os.makedirs(os.path.join(originalDir, 'tmp'))
 74 |     except OSError as e:
 75 |         pass
 76 |     try:
 77 |         os.makedirs(os.path.join(originalDir, 'vul', repoName))
 78 |     except OSError as e:
 79 |         pass
 80 | 
 81 |     total = len(os.listdir(os.path.join(diffDir, repoName)))
 82 | 
 83 | 
 84 | def source_from_cvepatch(ctr, diffFileName):  # diffFileName holds the filename of each DIFF patch
 85 |     # diffFileName looks like: CVE-2012-2372_7a9bc620049fed37a798f478c5699a11726b3d33.diff
 86 |     global repoName
 87 |     global debugMode
 88 |     global total
 89 |     global multimodeFlag
 90 |     global dummyFunction
 91 |     global diffDir
 92 |     global originalDir
 93 | 
 94 |     chunksCnt = 0  # number of DIFF patches
 95 |     currentCounter = 0
 96 | 
 97 |     with ctr.diffFileCntLock:
 98 |         currentCounter = ctr.diffFileCnt.value
 99 |         print(str(ctr.diffFileCnt.value + 1) + '/' + str(total)),
100 |         ctr.diffFileCnt.value += 1
101 | 
102 |     if os.path.getsize(os.path.join(diffDir, repoName, diffFileName)) > 1000000:
103 |         # don't do anything with big DIFFs (merges, upgrades, ...).
104 |         print("[-]", diffFileName, "\t(file too large)")
105 |     else:
106 |         diffFileNameSplitted = diffFileName.split('_')
107 |         cveId = diffFileNameSplitted[0]  # use only one CVEid
108 |         commitHashValue = diffFileNameSplitted[-1].split('.')[0]
109 | 
110 |         print("[+]", diffFileName, "\t(proceed)")
111 |         with open(os.path.join(diffDir, repoName, diffFileName), 'r') as fp:
112 |             patchLines = ''.join(fp.readlines())
113 |             patchLinesSplitted = re.split(pat_src, patchLines)
114 |             commitLog = patchLinesSplitted[0]
115 |             affectedFilesList = patchLinesSplitted[1:]
116 | 
117 |         repoPath = ''
118 |         if multimodeFlag:  # multimode DIFFs have repoPath at the beginning.
119 |             repoPath = commitLog.split('\n')[0].rstrip().lstrip("\xef\xbb\xbf")
120 | 
121 |         numAffectedFiles = len(affectedFilesList)
122 |         for aidx, affectedFile in enumerate(affectedFilesList):
123 |             if debugMode:
124 |                 print("\tFile # " + str(aidx + 1) + '/' + str(numAffectedFiles)),
125 |             firstLine = affectedFile.split('\n')[0]  # git --diff a/path/filename.ext b/path/filename.ext
126 |             affectedFileName = firstLine.split("--git ")[1].split(" ")[0].split("/")[-1]
127 |             codePath = firstLine.split(' b')[1].strip()  # path/filename.ext
128 | 
129 |             if not codePath.endswith(".c") and not codePath.endswith(".cpp") and not codePath.endswith(".cc") and not codePath.endswith(".c++") and not codePath.endswith(".cxx"):
130 |                 if debugMode:
131 |                     print("\t[-]", codePath, "(wrong extension)")
132 |             else:
133 |                 secondLine = affectedFile.split('\n')[1]
134 | 
135 |                 if secondLine.startswith("index") == 0:  # or secondLine.endswith("100644") == 0:
136 |                     if debugMode:
137 |                         print("\t[-]", codePath, "(invalid metadata)")  # we are looking for "index" only.
138 |                 else:
139 |                     if debugMode:
140 |                         print("\t[+]", codePath)
141 |                     indexHashOld = secondLine.split(' ')[1].split('..')[0]
142 |                     indexHashNew = secondLine.split(' ')[1].split('..')[1]
143 | 
144 |                     chunksList = re.split(pat_chunk, affectedFile)[1:]  # diff file per chunk (in list)
145 |                     chunksCnt += len(chunksList)
146 | 
147 |                     if multimodeFlag:
148 |                         os.chdir(os.path.join(config.gitStoragePath, repoName, repoPath))
149 |                     else:
150 |                         os.chdir(os.path.join(config.gitStoragePath, repoName))
151 | 
152 |                     tmpOldFileName = os.path.join(originalDir, "tmp", "{0}_{1}_old".format(repoName, currentCounter))
153 |                     command_show = "\"{0}\" show {1} > {2}".format(config.gitBinary, indexHashOld, tmpOldFileName)
154 |                     os.system(command_show)
155 | 
156 |                     tmpNewFileName = os.path.join(originalDir, "tmp", "{0}_{1}_new".format(repoName, currentCounter))
157 |                     command_show = "\"{0}\" show {1} > {2}".format(config.gitBinary, indexHashNew, tmpNewFileName)
158 |                     os.system(command_show)
159 | 
160 |                     os.chdir(originalDir)
161 |                     oldFunctionInstanceList = parseutility.parseFile_shallow(tmpOldFileName, "")
162 |                     newFunctionInstanceList = parseutility.parseFile_shallow(tmpNewFileName, "")
163 | 
164 |                     finalOldFunctionList = []
165 | 
166 |                     numChunks = len(chunksList)
167 |                     for ci, chunk in enumerate(chunksList):
168 |                         if debugMode:
169 |                             print("\t\tChunk # " + str(ci + 1) + "/" + str(numChunks)),
170 | 
171 |                         chunkSplitted = chunk.split('\n')
172 |                         chunkFirstLine = chunkSplitted[0]
173 |                         chunkLines = chunkSplitted[1:]
174 | 
175 |                         if debugMode:
176 |                             print(chunkFirstLine)
177 |                         lineNums = pat_linenum.search(chunkFirstLine)
178 |                         oldLines = lineNums.group(1).split(',')
179 |                         newLines = lineNums.group(2).split(',')
180 | 
181 |                         offset = int(oldLines[0])
182 |                         pmList = []
183 |                         lnList = []
184 |                         for chunkLine in chunkSplitted[1:]:
185 |                             if len(chunkLine) != 0:
186 |                                 pmList.append(chunkLine[0])
187 | 
188 |                         for i, pm in enumerate(pmList):
189 |                             if pm == ' ' or pm == '-':
190 |                                 lnList.append(offset + i)
191 |                             elif pm == '+':
192 |                                 lnList.append(offset + i - 1)
193 |                                 offset -= 1
194 | 
195 |                         """ HERE, ADD CHECK FOR NEW FUNCTIONS """
196 |                         hitOldFunctionList = []
197 |                         for f in oldFunctionInstanceList:
198 |                             # print f.lines[0], f.lines[1]
199 | 
200 |                             for num in range(f.lines[0], f.lines[1] + 1):
201 |                                 if num in lnList:
202 |                                     # print "Hit at", num
203 | 
204 |                                     hitOldFunctionList.append(f)
205 |                                     break  # found the function to be patched
206 | 
207 |                                     # if f.lines[0] <= offset <= f.lines[1]:
208 |                                     #     print "\t\t\tOffset HIT!!", f.name
209 |                                     # elif f.lines[0] <= bound <= f.lines[1]:
210 |                                     #     print "\t\t\tBound  HIT!!", f.name
211 | 
212 |                         for f in hitOldFunctionList:
213 |                             # print "Verify hitFunction", f.name
214 |                             # print "ln",
215 |                             for num in range(f.lines[0], f.lines[1] + 1):
216 |                                 # print num,
217 |                                 try:
218 |                                     listIndex = lnList.index(num)
219 |                                 except ValueError:
220 |                                     pass
221 |                                 else:
222 |                                     if lnList.count(num) > 1:
223 |                                         listIndex += 1
224 |                                     # print "\nmatch:", num
225 |                                     # print "value\t", chunkSplitted[1:][lnList.index(num)]
226 |                                     # print "pm   \t", pmList[lnList.index(num)]
227 |                                     if pmList[listIndex] == '+' or pmList[listIndex] == '-':
228 |                                         # print "Maybe meaningful",
229 |                                         flag = 0
230 |                                         for commentKeyword in ["/*", "*/", "//", "*"]:
231 |                                             if chunkLines[listIndex][1:].lstrip().startswith(commentKeyword):
232 |                                                 flag = 1
233 |                                                 break
234 |                                         if flag:
235 |                                             pass
236 |                                             # print "but not."
237 |                                         else:
238 |                                             # print "MEANINGFUL!!"
239 |                                             finalOldFunctionList.append(f)
240 |                                             break
241 |                                     else:
242 |                                         pass
243 |                                         # print "Not meaningful"
244 |                                         # print "============\n"
245 | 
246 |                     finalOldFunctionList = list(set(finalOldFunctionList))  # sometimes list has dups
247 | 
248 |                     finalNewFunctionList = []
249 |                     for fold in finalOldFunctionList:
250 |                         flag = 0
251 |                         for fnew in newFunctionInstanceList:
252 |                             if fold.name == fnew.name:
253 |                                 finalNewFunctionList.append(fnew)
254 |                                 flag = 1
255 |                                 break
256 |                         if not flag:
257 |                             finalNewFunctionList.append(dummyFunction)
258 | 
259 |                     if debugMode:
260 |                         print("\t\t\t", len(finalNewFunctionList), "functions found.")
261 |                     vulFileNameBase = diffFileName.split('.diff')[0] + '_' + affectedFileName
262 | 
263 |                     # os.chdir(os.path.join(originalDir, "vul", repoName))
264 | 
265 |                     for index, f in enumerate(finalOldFunctionList):
266 |                         os.chdir(originalDir)
267 |                         oldFuncInstance = finalOldFunctionList[index]
268 | 
269 |                         fp = open(oldFuncInstance.parentFile, 'r')
270 |                         srcFileRaw = fp.readlines()
271 |                         fp.close()
272 |                         finalOldFunction = ''.join(srcFileRaw[oldFuncInstance.lines[0]-1:oldFuncInstance.lines[1]])
273 | 
274 |                         # oldFuncArgs = ''
275 |                         # for ai, funcArg in enumerate(oldFuncInstance.parameterList):
276 |                         #     oldFuncArgs += "DTYPE " + funcArg
277 |                         #     if ai + 1 != len(oldFuncInstance.parameterList):
278 |                         #         oldFuncArgs += ', '
279 |                         # finalOldFunction = "DTYPE {0} ({1})\n{{ {2}\n}}"\
280 |                         #     .format(oldFuncInstance.name, oldFuncArgs, oldFuncInstance.funcBody)
281 | 
282 |                         finalOldFuncId = str(oldFuncInstance.funcId)
283 | 
284 |                         newFuncInstance = finalNewFunctionList[index]
285 | 
286 |                         if newFuncInstance.name is None:
287 |                             finalNewFunction = ""
288 |                         else:
289 |                             fp = open(newFuncInstance.parentFile, 'r')
290 |                             srcFileRaw = fp.readlines()
291 |                             fp.close()
292 |                             finalNewFunction = ''.join(srcFileRaw[newFuncInstance.lines[0]-1:newFuncInstance.lines[1]])
293 | 
294 |                             # finalNewFunction = finalNewFunctionList[index].funcBody
295 | 
296 |                         finalOldBody = finalOldFunction[finalOldFunction.find('{')+1:finalOldFunction.rfind('}')]
297 |                         finalNewBody = finalNewFunction[finalNewFunction.find('{')+1:finalNewFunction.rfind('}')]
298 |                         tmpold = parseutility.normalize(parseutility.removeComment(finalOldBody))
299 |                         tmpnew = parseutility.normalize(parseutility.removeComment(finalNewBody))
300 | 
301 |                         if tmpold != tmpnew and len(tmpnew) > 0:
302 |                             # if two are same, it means nothing but comment is patched.
303 |                             with ctr.functionCntLock:
304 |                                 ctr.functionCnt.value += 1
305 |                             os.chdir(os.path.join(originalDir, "vul", repoName))
306 |                             vulOldFileName = vulFileNameBase + '_' + finalOldFuncId + "_OLD.vul"
307 |                             vulNewFileName = vulFileNameBase + '_' + finalOldFuncId + "_NEW.vul"
308 |                             with open(vulOldFileName, 'w') as fp:
309 |                                 fp.write(finalOldFunction)
310 |                             with open(vulNewFileName, 'w') as fp:
311 |                                 if finalNewFunctionList[index].name is not None:
312 |                                     fp.write(finalNewFunction)
313 |                                 else:
314 |                                     fp.write("")
315 |                             diffCommand = "\"{0}\" -u {1} {2} > {3}_{4}.patch".format(config.diffBinary,
316 |                                                                                        vulOldFileName,
317 |                                                                                        vulNewFileName,
318 |                                                                                        vulFileNameBase,
319 |                                                                                        finalOldFuncId)
320 |                             os.system(diffCommand)
321 | 
322 | 
323 | def main():
324 |     
325 |     ctr = Counter()
326 |     diffList = os.listdir(os.path.join(diffDir, repoName))
327 |     if debugMode or "Windows" in platform.platform():
328 |         # Windows - do not use multiprocessing
329 |         # Using multiprocessing will lower performance
330 |         for diffFile in diffList:
331 |             source_from_cvepatch(ctr, diffFile)
332 |     else:  # POSIX - use multiprocessing
333 |         pool = mp.Pool()
334 |         parallel_partial = partial(source_from_cvepatch, ctr)
335 |         pool.map(parallel_partial, diffList)
336 |         pool.close()
337 |         pool.join()
338 | 
339 |     # delete temp source files
340 |     wildcard_temp = os.path.join(originalDir, "tmp", repoName + "_*")
341 |     for f in glob.glob(wildcard_temp):
342 |         os.remove(f)
343 | 
344 |     print("")
345 |     print("Done getting vulnerable functions from", repoName)
346 |     #print "Reconstructed", len(
347 |     #    os.listdir(os.path.join(originalDir, 'vul', repoName))), "vulnerable functions from", diffFileCnt.value, "patches."
348 |     print("Reconstructed", ctr.functionCnt.value, "vulnerable functions from", ctr.diffFileCnt.value, "patches.")
349 |     print("Elapsed: %.2f sec" % (time.time()-t1))
350 | 
351 | 
352 | if __name__ == "__main__":
353 |     mp.freeze_support()
354 |     class Counter:
355 |         diffFileCnt = mp.Value('i', 0)
356 |         diffFileCntLock = mp.Manager().Lock()
357 |         functionCnt = mp.Value('i', 0)
358 |         functionCntLock = mp.Manager().Lock()
359 |     init()
360 |     main()
361 | 


--------------------------------------------------------------------------------
/src/repo_updater.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | import urllib2
 5 | import git
 6 | import json
 7 | import logging
 8 | 
 9 | 
10 | def is_git_repo(path):
11 |     try:
12 |         _ = git.Repo(path).git_dir
13 |         return True
14 |     except git.exc.InvalidGitRepositoryError:
15 |         return False
16 | 
17 | logger = logging.getLogger(__name__)
18 | logger.setLevel(logging.DEBUG)
19 | formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
20 | file_handler = logging.FileHandler("repo_updater.log")
21 | file_handler.setFormatter(formatter)
22 | stream_handler = logging.StreamHandler()
23 | logger.addHandler(file_handler)
24 | logger.addHandler(stream_handler)
25 | 
26 | parser = argparse.ArgumentParser()
27 | parser.add_argument('REPO', help='''Repository name''')
28 | 
29 | args = parser.parse_args()
30 | 
31 | if args.REPO is None:
32 |     parser.print_help()
33 |     exit()
34 | 
35 | cwd = os.getcwd()
36 | repo_name = args.REPO
37 | git_dir = "/home/whiteboxDB/gitrepos"
38 | 
39 | if repo_name.lower() == "android":
40 |     url_base = "https://android.googlesource.com/"
41 |     url_list = url_base + "?format=TEXT"
42 |     response = urllib2.urlopen(url_list)
43 |     repo_list = response.read().rstrip().split("\n")
44 |     repo_base = os.path.join(git_dir, repo_name) 
45 | elif repo_name.lower() == "chromium":
46 |     url_base = "https://chromium.googlesource.com/"
47 |     url_list = url_base + "?format=TEXT"
48 |     response = urllib2.urlopen(url_list)
49 |     repo_list = response.read().rstrip().split("\n")
50 |     repo_base = os.path.join(git_dir, repo_name)
51 | 
52 | if not os.path.isdir(repo_base):
53 |     os.mkdir(repo_base)
54 | 
55 | for ri, repo in enumerate(repo_list):
56 |     target_dir = os.path.join(repo_base, repo)
57 |     infostr = str(ri+1) + "/" + str(len(repo_list)) + "\t" + repo
58 |     if os.path.isdir(target_dir) and is_git_repo(target_dir):
59 |         infostr += " EXISTS (PULL)"
60 |         logger.info(infostr)
61 |         os.chdir(target_dir)
62 |         os.system("git pull")
63 |         os.chdir(cwd)
64 |     else:
65 |         infostr += " DOESN'T EXIST (CLONE)"
66 |         logger.info(infostr)
67 |         os.system("git clone {0}{1} {2}".format(url_base, repo, target_dir))
68 |     
69 | 
70 | 


--------------------------------------------------------------------------------
/src/vul_dup_remover.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import sys
 5 | import hashlib
 6 | 
 7 | # Import from parent directory
 8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 9 | try:
10 |     from hmark.parseutility import normalize
11 | except ImportError:
12 |     from tools.parseutility import normalize
13 | 
14 | hashdict = {}
15 | cntdict = {}
16 | vulcntlist = []
17 | repolist = []
18 | 
19 | originalDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  # vuddy root directory
20 | vulsDir = os.path.join(originalDir, "vul")
21 | dirs = os.listdir(vulsDir)
22 | dirs.sort()
23 | os.chdir(vulsDir)
24 | for d in dirs:
25 |     if os.path.isdir(d):
26 |         repolist.append(d)
27 |         cntdict[d] = 0
28 |         # print d
29 |         # print repolist
30 |         vulcntlist.append(len(os.listdir(d)))
31 |         # print vulcntlist
32 |         for vul in os.listdir(d):
33 |             if vul.endswith("OLD.vul"):
34 |                 with open(os.path.join(d, vul), "r") as fp:
35 |                     text = '\n'.join(fp.readlines())
36 |                     #text = normalize(text)
37 |                     text = normalize(text).encode('utf-8')
38 |                     checksum = hashlib.md5(text).hexdigest()
39 |                     try:
40 |                         hashdict[checksum].append(d + ' ' + vul)
41 |                     except:
42 |                         hashdict[checksum] = [d + ' ' + vul]
43 | 
44 | cnt = 0
45 | 
46 | for key in hashdict:
47 |     if len(hashdict[key]) > 1:
48 |         for vul in hashdict[key][1:]:
49 |             cnt += 1
50 |             repo = vul.split(' ')[0]
51 |             rest = vul.split(' ')[1]
52 |             base = rest[:-8]
53 |             cntdict[repo] += 1
54 |             os.remove(os.path.join(repo, rest))
55 |             try:
56 |                 os.remove(os.path.join(repo, base + "_NEW.vul"))
57 |                 os.remove(os.path.join(repo, base + ".patch"))
58 |             except:
59 |                 pass
60 | 
61 | print("[RESULT]")
62 | for idx, r in enumerate(repolist):
63 |     print('\t' + r + ":\tdeleted " + str(cntdict[r]) + " duplicate files from " + str(vulcntlist[idx]) + " files.")
64 | 
65 | print("Total:", cnt, "duplicate files.")
66 | 


--------------------------------------------------------------------------------
/src/vul_hidx_generator.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | import sys
  5 | import hashlib
  6 | import time
  7 | import argparse
  8 | import multiprocessing as mp
  9 | from functools import partial
 10 | # Import from parent directory
 11 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 12 | try:
 13 |     import hmark.parseutility as parser
 14 | except ImportError:
 15 |     import tools.parseutility as parser
 16 | import config
 17 | 
 18 | 
 19 | def parse_function(absLvl, srcFile):
 20 |     if absLvl == 0:
 21 |         functionInstanceList = parser.parseFile_shallow(srcFile, "")
 22 |         return (srcFile, functionInstanceList, None)
 23 |     elif absLvl == 4:
 24 |         functionInstanceList = parser.parseFile_deep(srcFile, "")
 25 |         # Some lines below are added by Squizz on Jan 16, for FP reduction!
 26 |         functionInstanceList_New = parser.parseFile_deep(srcFile.replace("OLD.vul", "NEW.vul"), "")
 27 |         return (srcFile, functionInstanceList, functionInstanceList_New)
 28 | 
 29 | 
 30 | def main():
 31 |     originalDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  # vuddy root directory
 32 |     vulsDir = os.path.join(originalDir, "vul")
 33 | 
 34 |     arg_parser = argparse.ArgumentParser()
 35 |     arg_parser.add_argument('REPO',
 36 |                             help='''Repository name''')
 37 |     arg_parser.add_argument('-a', '--abstract-level', required=True, type=int, nargs=1, choices=[0, 4],
 38 |                             help='''Abstract Level''')
 39 | 
 40 |     args = arg_parser.parse_args()
 41 | 
 42 |     projName = args.REPO
 43 |     intendedAbsLvl = 4
 44 |     if args.abstract_level:
 45 |         intendedAbsLvl = args.abstract_level[0]
 46 | 
 47 |     projDictList = []
 48 |     hashFileMapList = []
 49 |     for i in range(0, 5):
 50 |         projDictList.append({})
 51 |         hashFileMapList.append({})
 52 | 
 53 |     print("loading source"),
 54 |     srcFileList = parser.loadVul(os.path.join(vulsDir, projName))
 55 |     print("(done)")
 56 | 
 57 |     time0 = time.time()
 58 | 
 59 |     numFiles = len(srcFileList)
 60 |     numFuncs = 0
 61 |     numLines = 0
 62 | 
 63 |     pool = mp.Pool()
 64 |     func = partial(parse_function, intendedAbsLvl)
 65 |     for srcFileIdx, returnTuple in enumerate(pool.imap(func, srcFileList)):
 66 |         srcFile = returnTuple[0]
 67 |         functionInstanceList = returnTuple[1]
 68 |         functionInstanceList_New = returnTuple[2]
 69 | 
 70 |         print(srcFileIdx + 1, '/', len(srcFileList), srcFile)
 71 |         numFuncs += len(functionInstanceList)
 72 |         if len(functionInstanceList) > 0:
 73 |             numLines += functionInstanceList[0].parentNumLoc
 74 | 
 75 |         for fi, f in enumerate(functionInstanceList):
 76 |             f.removeListDup()
 77 |             path = f.parentFile
 78 |             path = "." + path[f.parentFile.find("/vul/"):]
 79 |             absBody = parser.abstract(f, intendedAbsLvl)[1]
 80 |             #absBody = parser.normalize(absBody).encode('utf-8')
 81 |             absBody = parser.normalize(absBody)
 82 |             # print absBody
 83 |             funcLen = len(absBody)
 84 |             # print funcLen, absBody
 85 |             # print len(absBody)
 86 |             hashValue = hashlib.md5(absBody).hexdigest()
 87 | 
 88 |             if intendedAbsLvl == 4 and len(functionInstanceList_New) > 0:
 89 |                 fnew = functionInstanceList_New[fi]
 90 |                 fnew.removeListDup()
 91 |                 absBodyNew = parser.abstract(fnew, intendedAbsLvl)[1]
 92 |                 absBodyNew = parser.normalize(absBodyNew)
 93 |                 hashValueNew = hashlib.md5(absBodyNew).hexdigest()
 94 | 
 95 |                 if hashValue == hashValueNew:
 96 |                     # if abstract bodies of old and new func are identical,
 97 |                     # don't create hash index
 98 |                     continue
 99 | 
100 |             try:
101 |                 projDictList[intendedAbsLvl][funcLen].append(hashValue)
102 |             except KeyError:
103 |                 projDictList[intendedAbsLvl][funcLen] = [hashValue]
104 | 
105 |             try:
106 |                 hashFileMapList[intendedAbsLvl][hashValue].extend([path, f.funcId])
107 |             except KeyError:
108 |                 hashFileMapList[intendedAbsLvl][hashValue] = [path, f.funcId]
109 | 
110 |     pool.close()
111 |     pool.join()
112 | 
113 |     packageInfo = config.version + ' ' + str(projName) + ' ' + str(numFiles) + ' ' + str(numFuncs) + ' ' + str(
114 |         numLines) + '\n'
115 |     hidxDir = os.path.join(originalDir, "hidx")
116 |     if os.path.exists(hidxDir) is False:
117 |         os.makedirs(hidxDir)
118 |     hidxFile = os.path.join(hidxDir, "hashmark_{0}_{1}.hidx".format(intendedAbsLvl, projName))
119 |     with open(hidxFile, 'w') as fp:
120 |         fp.write(packageInfo)
121 |         for key in sorted(projDictList[intendedAbsLvl]):
122 |             fp.write(str(key) + '\t')
123 |             for h in list(set(projDictList[intendedAbsLvl][key])):
124 |                 fp.write(h + '\t')
125 |             fp.write('\n')
126 | 
127 |         fp.write('\n=====\n')
128 | 
129 |         for key in sorted(hashFileMapList[intendedAbsLvl]):
130 |             fp.write(str(key) + '\t')
131 |             for f in hashFileMapList[intendedAbsLvl][key]:
132 |                 fp.write(str(f) + '\t')
133 |             fp.write('\n')
134 | 
135 |     print("Hash index saved to:", os.path.join(originalDir, "hidx", "hashmark_{0}_{1}.hidx".format(intendedAbsLvl, projName)))
136 |     time1 = time.time()
137 |     print("Elapsed time:", time1 - time0)
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     mp.freeze_support()
142 |     main()
143 | 


--------------------------------------------------------------------------------
/src/vul_verifier.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import sys
 5 | # Import from parent directory
 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 7 | try:
 8 |     import hmark.parseutility as pu
 9 | except ImportError:
10 |     import tools.parseutility as pu
11 | 
12 | 
13 | def getBody(original):
14 |     return original[original.find('{')+1:original.rfind('}')]
15 | 
16 | originalDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  # vuddy root directory
17 | vulsDir = os.path.join(originalDir, "vul")
18 | dirs = os.listdir(vulsDir)
19 | rmcntDict = {}
20 | for dir in dirs:
21 |     # print dir
22 |     if dir != "chromium":
23 |         continue
24 |     for vul in os.listdir(os.path.join(vulsDir, dir)):
25 |         if vul.endswith("OLD.vul"):
26 |             with open(os.path.join(vulsDir, dir, vul), "r") as fp:
27 |                 raw = ''.join(fp.readlines())
28 |                 body = getBody(pu.removeComment(raw))
29 |             
30 |             if body.count(";") == 1:
31 |                 kill = 1  # this function must be single-line
32 |             else:
33 |                 kill = 0
34 | 
35 |             cnt = 0
36 |             for line in body.split('\n'):
37 |                 if len(line.strip()) > 0:
38 |                     cnt += 1  # cnt will be 1 for single lined functions
39 | 
40 |             with open(os.path.join(vulsDir, dir, vul[:-8] + "_NEW.vul"), 'r') as fp:
41 |                 newraw = ''.join(fp.readlines())
42 |                 newbody = getBody(pu.removeComment(newraw))
43 | 
44 |             if kill == 1 or cnt == 1 or pu.normalize(body) == pu.normalize(newbody) or len(newraw) == 0:
45 |                 vulBase = vul[:-8]
46 |                 os.remove(os.path.join(vulsDir, dir, vulBase + "_OLD.vul"))
47 |                 os.remove(os.path.join(vulsDir, dir, vulBase + "_NEW.vul"))
48 |                 os.remove(os.path.join(vulsDir, dir, vulBase + ".patch"))
49 |                 try:
50 |                     rmcntDict[dir] += 1
51 |                 except:
52 |                     rmcntDict[dir] = 1
53 | 
54 | for dir in rmcntDict:
55 |     print("removed", rmcntDict[dir], "FP records from", dir)
56 | 


--------------------------------------------------------------------------------
/testcode/async.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * async.c: Asynchronous function calls for boot performance
  3 |  *
  4 |  * (C) Copyright 2009 Intel Corporation
  5 |  * Author: Arjan van de Ven <arjan@linux.intel.com>
  6 |  *
  7 |  * This program is free software; you can redistribute it and/or
  8 |  * modify it under the terms of the GNU General Public License
  9 |  * as published by the Free Software Foundation; version 2
 10 |  * of the License.
 11 |  */
 12 | 
 13 | 
 14 | /*
 15 | 
 16 | Goals and Theory of Operation
 17 | 
 18 | The primary goal of this feature is to reduce the kernel boot time,
 19 | by doing various independent hardware delays and discovery operations
 20 | decoupled and not strictly serialized.
 21 | 
 22 | More specifically, the asynchronous function call concept allows
 23 | certain operations (primarily during system boot) to happen
 24 | asynchronously, out of order, while these operations still
 25 | have their externally visible parts happen sequentially and in-order.
 26 | (not unlike how out-of-order CPUs retire their instructions in order)
 27 | 
 28 | Key to the asynchronous function call implementation is the concept of
 29 | a "sequence cookie" (which, although it has an abstracted type, can be
 30 | thought of as a monotonically incrementing number).
 31 | 
 32 | The async core will assign each scheduled event such a sequence cookie and
 33 | pass this to the called functions.
 34 | 
 35 | The asynchronously called function should before doing a globally visible
 36 | operation, such as registering device numbers, call the
 37 | async_synchronize_cookie() function and pass in its own cookie. The
 38 | async_synchronize_cookie() function will make sure that all asynchronous
 39 | operations that were scheduled prior to the operation corresponding with the
 40 | cookie have completed.
 41 | 
 42 | Subsystem/driver initialization code that scheduled asynchronous probe
 43 | functions, but which shares global resources with other drivers/subsystems
 44 | that do not use the asynchronous call feature, need to do a full
 45 | synchronization with the async_synchronize_full() function, before returning
 46 | from their init function. This is to maintain strict ordering between the
 47 | asynchronous and synchronous parts of the kernel.
 48 | 
 49 | */
 50 | 
 51 | #include <linux/async.h>
 52 | #include <linux/atomic.h>
 53 | #include <linux/ktime.h>
 54 | #include <linux/export.h>
 55 | #include <linux/wait.h>
 56 | #include <linux/sched.h>
 57 | #include <linux/slab.h>
 58 | #include <linux/workqueue.h>
 59 | 
 60 | #include "workqueue_internal.h"
 61 | 
 62 | static async_cookie_t next_cookie = 1;
 63 | 
 64 | #define MAX_WORK		32768
 65 | #define ASYNC_COOKIE_MAX	ULLONG_MAX	/* infinity cookie */
 66 | 
 67 | static LIST_HEAD(async_global_pending);	/* pending from all registered doms */
 68 | static ASYNC_DOMAIN(async_dfl_domain);
 69 | static DEFINE_SPINLOCK(async_lock);
 70 | 
 71 | struct async_entry {
 72 | 	struct list_head	domain_list;
 73 | 	struct list_head	global_list;
 74 | 	struct work_struct	work;
 75 | 	async_cookie_t		cookie;
 76 | 	async_func_t		func;
 77 | 	void			*data;
 78 | 	struct async_domain	*domain;
 79 | };
 80 | 
 81 | static DECLARE_WAIT_QUEUE_HEAD(async_done);
 82 | 
 83 | static atomic_t entry_count;
 84 | 
 85 | static async_cookie_t lowest_in_progress(struct async_domain *domain)
 86 | {
 87 | 	struct list_head *pending;
 88 | 	async_cookie_t ret = ASYNC_COOKIE_MAX;
 89 | 	unsigned long flags;
 90 | 
 91 | 	spin_lock_irqsave(&async_lock, flags);
 92 | 
 93 | 	if (domain)
 94 | 		pending = &domain->pending;
 95 | 	else
 96 | 		pending = &async_global_pending;
 97 | 
 98 | 	if (!list_empty(pending))
 99 | 		ret = list_first_entry(pending, struct async_entry,
100 | 				       domain_list)->cookie;
101 | 
102 | 	spin_unlock_irqrestore(&async_lock, flags);
103 | 	return ret;
104 | }
105 | 
106 | /*
107 |  * pick the first pending entry and run it
108 |  */
109 | static void async_run_entry_fn(struct work_struct *work)
110 | {
111 | 	struct async_entry *entry =
112 | 		container_of(work, struct async_entry, work);
113 | 	unsigned long flags;
114 | 	ktime_t uninitialized_var(calltime), delta, rettime;
115 | 
116 | 	/* 1) run (and print duration) */
117 | 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
118 | 		pr_debug("calling  %lli_%pF @ %i\n",
119 | 			(long long)entry->cookie,
120 | 			entry->func, task_pid_nr(current));
121 | 		calltime = ktime_get();
122 | 	}
123 | 	entry->func(entry->data, entry->cookie);
124 | 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
125 | 		rettime = ktime_get();
126 | 		delta = ktime_sub(rettime, calltime);
127 | 		pr_debug("initcall %lli_%pF returned 0 after %lld usecs\n",
128 | 			(long long)entry->cookie,
129 | 			entry->func,
130 | 			(long long)ktime_to_ns(delta) >> 10);
131 | 	}
132 | 
133 | 	/* 2) remove self from the pending queues */
134 | 	spin_lock_irqsave(&async_lock, flags);
135 | 	list_del_init(&entry->domain_list);
136 | 	list_del_init(&entry->global_list);
137 | 
138 | 	/* 3) free the entry */
139 | 	kfree(entry);
140 | 	atomic_dec(&entry_count);
141 | 
142 | 	spin_unlock_irqrestore(&async_lock, flags);
143 | 
144 | 	/* 4) wake up any waiters */
145 | 	wake_up(&async_done);
146 | }
147 | 
148 | static async_cookie_t __async_schedule(async_func_t func, void *data, struct async_domain *domain)
149 | {
150 | 	struct async_entry *entry;
151 | 	unsigned long flags;
152 | 	async_cookie_t newcookie;
153 | 
154 | 	/* allow irq-off callers */
155 | 	entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC);
156 | 
157 | 	/*
158 | 	 * If we're out of memory or if there's too much work
159 | 	 * pending already, we execute synchronously.
160 | 	 */
161 | 	if (!entry || atomic_read(&entry_count) > MAX_WORK) {
162 | 		kfree(entry);
163 | 		spin_lock_irqsave(&async_lock, flags);
164 | 		newcookie = next_cookie++;
165 | 		spin_unlock_irqrestore(&async_lock, flags);
166 | 
167 | 		/* low on memory.. run synchronously */
168 | 		func(data, newcookie);
169 | 		return newcookie;
170 | 	}
171 | 	INIT_LIST_HEAD(&entry->domain_list);
172 | 	INIT_LIST_HEAD(&entry->global_list);
173 | 	INIT_WORK(&entry->work, async_run_entry_fn);
174 | 	entry->func = func;
175 | 	entry->data = data;
176 | 	entry->domain = domain;
177 | 
178 | 	spin_lock_irqsave(&async_lock, flags);
179 | 
180 | 	/* allocate cookie and queue */
181 | 	newcookie = entry->cookie = next_cookie++;
182 | 
183 | 	list_add_tail(&entry->domain_list, &domain->pending);
184 | 	if (domain->registered)
185 | 		list_add_tail(&entry->global_list, &async_global_pending);
186 | 
187 | 	atomic_inc(&entry_count);
188 | 	spin_unlock_irqrestore(&async_lock, flags);
189 | 
190 | 	/* mark that this task has queued an async job, used by module init */
191 | 	current->flags |= PF_USED_ASYNC;
192 | 
193 | 	/* schedule for execution */
194 | 	queue_work(system_unbound_wq, &entry->work);
195 | 
196 | 	return newcookie;
197 | }
198 | 
199 | /**
200 |  * async_schedule - schedule a function for asynchronous execution
201 |  * @func: function to execute asynchronously
202 |  * @data: data pointer to pass to the function
203 |  *
204 |  * Returns an async_cookie_t that may be used for checkpointing later.
205 |  * Note: This function may be called from atomic or non-atomic contexts.
206 |  */
207 | async_cookie_t async_schedule(async_func_t func, void *data)
208 | {
209 | 	return __async_schedule(func, data, &async_dfl_domain);
210 | }
211 | EXPORT_SYMBOL_GPL(async_schedule);
212 | 
213 | /**
214 |  * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
215 |  * @func: function to execute asynchronously
216 |  * @data: data pointer to pass to the function
217 |  * @domain: the domain
218 |  *
219 |  * Returns an async_cookie_t that may be used for checkpointing later.
220 |  * @domain may be used in the async_synchronize_*_domain() functions to
221 |  * wait within a certain synchronization domain rather than globally.  A
222 |  * synchronization domain is specified via @domain.  Note: This function
223 |  * may be called from atomic or non-atomic contexts.
224 |  */
225 | async_cookie_t async_schedule_domain(async_func_t func, void *data,
226 | 				     struct async_domain *domain)
227 | {
228 | 	return __async_schedule(func, data, domain);
229 | }
230 | EXPORT_SYMBOL_GPL(async_schedule_domain);
231 | 
232 | /**
233 |  * async_synchronize_full - synchronize all asynchronous function calls
234 |  *
235 |  * This function waits until all asynchronous function calls have been done.
236 |  */
237 | void async_synchronize_full(void)
238 | {
239 | 	async_synchronize_full_domain(NULL);
240 | }
241 | EXPORT_SYMBOL_GPL(async_synchronize_full);
242 | 
243 | /**
244 |  * async_unregister_domain - ensure no more anonymous waiters on this domain
245 |  * @domain: idle domain to flush out of any async_synchronize_full instances
246 |  *
247 |  * async_synchronize_{cookie|full}_domain() are not flushed since callers
248 |  * of these routines should know the lifetime of @domain
249 |  *
250 |  * Prefer ASYNC_DOMAIN_EXCLUSIVE() declarations over flushing
251 |  */
252 | void async_unregister_domain(struct async_domain *domain)
253 | {
254 | 	spin_lock_irq(&async_lock);
255 | 	WARN_ON(!domain->registered || !list_empty(&domain->pending));
256 | 	domain->registered = 0;
257 | 	spin_unlock_irq(&async_lock);
258 | }
259 | EXPORT_SYMBOL_GPL(async_unregister_domain);
260 | 
261 | /**
262 |  * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
263 |  * @domain: the domain to synchronize
264 |  *
265 |  * This function waits until all asynchronous function calls for the
266 |  * synchronization domain specified by @domain have been done.
267 |  */
268 | void async_synchronize_full_domain(struct async_domain *domain)
269 | {
270 | 	async_synchronize_cookie_domain(ASYNC_COOKIE_MAX, domain);
271 | }
272 | EXPORT_SYMBOL_GPL(async_synchronize_full_domain);
273 | 
274 | /**
275 |  * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing
276 |  * @cookie: async_cookie_t to use as checkpoint
277 |  * @domain: the domain to synchronize (%NULL for all registered domains)
278 |  *
279 |  * This function waits until all asynchronous function calls for the
280 |  * synchronization domain specified by @domain submitted prior to @cookie
281 |  * have been done.
282 |  */
283 | void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain)
284 | {
285 | 	ktime_t uninitialized_var(starttime), delta, endtime;
286 | 
287 | 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
288 | 		pr_debug("async_waiting @ %i\n", task_pid_nr(current));
289 | 		starttime = ktime_get();
290 | 	}
291 | 
292 | 	wait_event(async_done, lowest_in_progress(domain) >= cookie);
293 | 
294 | 	if (initcall_debug && system_state == SYSTEM_BOOTING) {
295 | 		endtime = ktime_get();
296 | 		delta = ktime_sub(endtime, starttime);
297 | 
298 | 		pr_debug("async_continuing @ %i after %lli usec\n",
299 | 			task_pid_nr(current),
300 | 			(long long)ktime_to_ns(delta) >> 10);
301 | 	}
302 | }
303 | EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
304 | 
305 | /**
306 |  * async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing
307 |  * @cookie: async_cookie_t to use as checkpoint
308 |  *
309 |  * This function waits until all asynchronous function calls prior to @cookie
310 |  * have been done.
311 |  */
312 | void async_synchronize_cookie(async_cookie_t cookie)
313 | {
314 | 	async_synchronize_cookie_domain(cookie, &async_dfl_domain);
315 | }
316 | EXPORT_SYMBOL_GPL(async_synchronize_cookie);
317 | 
318 | /**
319 |  * current_is_async - is %current an async worker task?
320 |  *
321 |  * Returns %true if %current is an async worker task.
322 |  */
323 | bool current_is_async(void)
324 | {
325 | 	struct worker *worker = current_wq_worker();
326 | 
327 | 	return worker && worker->current_func == async_run_entry_fn;
328 | }
329 | 


--------------------------------------------------------------------------------
/testcode/configs.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * kernel/configs.c
  3 |  * Echo the kernel .config file used to build the kernel
  4 |  *
  5 |  * Copyright (C) 2002 Khalid Aziz <khalid_aziz@hp.com>
  6 |  * Copyright (C) 2002 Randy Dunlap <rdunlap@xenotime.net>
  7 |  * Copyright (C) 2002 Al Stone <ahs3@fc.hp.com>
  8 |  * Copyright (C) 2002 Hewlett-Packard Company
  9 |  *
 10 |  * This program is free software; you can redistribute it and/or modify
 11 |  * it under the terms of the GNU General Public License as published by
 12 |  * the Free Software Foundation; either version 2 of the License, or (at
 13 |  * your option) any later version.
 14 |  *
 15 |  * This program is distributed in the hope that it will be useful, but
 16 |  * WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 18 |  * NON INFRINGEMENT.  See the GNU General Public License for more
 19 |  * details.
 20 |  *
 21 |  * You should have received a copy of the GNU General Public License
 22 |  * along with this program; if not, write to the Free Software
 23 |  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 24 |  */
 25 | 
 26 | #include <linux/kernel.h>
 27 | #include <linux/module.h>
 28 | #include <linux/proc_fs.h>
 29 | #include <linux/seq_file.h>
 30 | #include <linux/init.h>
 31 | #include <asm/uaccess.h>
 32 | 
 33 | /**************************************************/
 34 | /* the actual current config file                 */
 35 | 
 36 | /*
 37 |  * Define kernel_config_data and kernel_config_data_size, which contains the
 38 |  * wrapped and compressed configuration file.  The file is first compressed
 39 |  * with gzip and then bounded by two eight byte magic numbers to allow
 40 |  * extraction from a binary kernel image:
 41 |  *
 42 |  *   IKCFG_ST
 43 |  *   <image>
 44 |  *   IKCFG_ED
 45 |  */
 46 | #define MAGIC_START	"IKCFG_ST"
 47 | #define MAGIC_END	"IKCFG_ED"
 48 | #include "config_data.h"
 49 | 
 50 | 
 51 | #define MAGIC_SIZE (sizeof(MAGIC_START) - 1)
 52 | #define kernel_config_data_size \
 53 | 	(sizeof(kernel_config_data) - 1 - MAGIC_SIZE * 2)
 54 | 
 55 | #ifdef CONFIG_IKCONFIG_PROC
 56 | 
 57 | static ssize_t
 58 | ikconfig_read_current(struct file *file, char __user *buf,
 59 | 		      size_t len, loff_t * offset)
 60 | {
 61 | 	return simple_read_from_buffer(buf, len, offset,
 62 | 				       kernel_config_data + MAGIC_SIZE,
 63 | 				       kernel_config_data_size);
 64 | }
 65 | 
 66 | static const struct file_operations ikconfig_file_ops = {
 67 | 	.owner = THIS_MODULE,
 68 | 	.read = ikconfig_read_current,
 69 | 	.llseek = default_llseek,
 70 | };
 71 | 
 72 | static int __init ikconfig_init(void)
 73 | {
 74 | 	struct proc_dir_entry *entry;
 75 | 
 76 | 	/* create the current config file */
 77 | 	entry = proc_create("config.gz", S_IFREG | S_IRUGO, NULL,
 78 | 			    &ikconfig_file_ops);
 79 | 	if (!entry)
 80 | 		return -ENOMEM;
 81 | 
 82 | 	proc_set_size(entry, kernel_config_data_size);
 83 | 
 84 | 	return 0;
 85 | }
 86 | 
 87 | static void __exit ikconfig_cleanup(void)
 88 | {
 89 | 	remove_proc_entry("config.gz", NULL);
 90 | }
 91 | 
 92 | module_init(ikconfig_init);
 93 | module_exit(ikconfig_cleanup);
 94 | 
 95 | #endif /* CONFIG_IKCONFIG_PROC */
 96 | 
 97 | MODULE_LICENSE("GPL");
 98 | MODULE_AUTHOR("Randy Dunlap");
 99 | MODULE_DESCRIPTION("Echo the kernel .config file used to build the kernel");
100 | 


--------------------------------------------------------------------------------
/testcode/wrong_case.c:
--------------------------------------------------------------------------------
 1 | void module_layout(struct module *mod,
 2 |                    struct modversion_info *ver,
 3 |                    struct kernel_param *kp,
 4 |                    struct kernel_symbol *ks,
 5 | 		   struct tracepoint * const *tp) // cannot identify "struct tracepoint * const *tp" as function argument
 6 | {
 7 | }
 8 | 
 9 | void func(void)
10 | {
11 | 	int a0[10];
12 | 	int a1[10][10]; // cannot identify two-dimensional array (multi-dimensional array?)
13 | 	return;
14 | }
15 | 


--------------------------------------------------------------------------------
/tools/FuncParser-opt.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/squizz617/vuddy/33cdab1ad04a6dcc76011b92821dbeb055c6691e/tools/FuncParser-opt.jar


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/squizz617/vuddy/33cdab1ad04a6dcc76011b92821dbeb055c6691e/tools/__init__.py


--------------------------------------------------------------------------------
/tools/cvedatagen/README.md:
--------------------------------------------------------------------------------
 1 | # NVD CVE crawler
 2 | 
 3 | ## Modules
 4 | File Name           | Description
 5 | --------------------|------------
 6 | cveXmlDownloader.py	| Downloads XML files from NVD
 7 | cveXmlParser.py		| Parses and generates cvedata.pkl
 8 | cveXmlUpdater.py	| Downloads updated records from NVD and updates cvedata.pkl
 9 | 
10 | ## How to use
11 | 1. Running for the first time
12 |   * Run `cveXmlDownloader.py`, `cveXmlParser.py`, and `cveXmlUpdater.py` in a row.
13 | 
14 | 2. Later use
15 |   * If you have already generated cvedata.pkl through past runs, run cveXmlUpdater.py for updates.
16 | 
17 | 


--------------------------------------------------------------------------------
/tools/cvedatagen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/squizz617/vuddy/33cdab1ad04a6dcc76011b92821dbeb055c6691e/tools/cvedatagen/__init__.py


--------------------------------------------------------------------------------
/tools/cvedatagen/common.py:
--------------------------------------------------------------------------------
 1 | #import urllib2
 2 | from urllib import request
 3 | import sys
 4 | from zipfile import ZipFile
 5 | from xml.etree.ElementTree import parse
 6 | import json
 7 | import os
 8 | 
 9 | 
10 | def download_url(url, fileName):
11 |     #u = urllib2.urlopen(url)
12 |     u = request.urlopen(url)
13 |     f = open(fileName, "wb")
14 |     meta = u.info()
15 |     #fileSize = int(meta.getheaders("Content-Length")[0])
16 |     fileSize = int(meta.get_all("Content-Length")[0])
17 |     print("Downloading: %s (%s bytes)" % (fileName, fileSize))
18 | 
19 |     downloadedSize = 0
20 |     blockSize = 8192
21 |     barSize = 30
22 |     while True:
23 |         buffer = u.read(blockSize)
24 |         if not buffer:
25 |             break
26 | 
27 |         downloadedSize += len(buffer)
28 |         f.write(buffer)
29 |         status = "\r"
30 |         #status += "#" * (downloadedSize * barSize / fileSize)
31 |         #status += " " * (barSize - downloadedSize * barSize / fileSize)
32 |         #status += "%10d  [%3.2f%%]" % (downloadedSize, downloadedSize * 100. / fileSize)
33 |         status += "#" * (downloadedSize * barSize // fileSize)
34 |         status += " " * (barSize - downloadedSize * barSize // fileSize)
35 |         status += "%10d  [%3.2f%%]" % (downloadedSize, downloadedSize * 100. // fileSize)
36 |         # status += chr(8)*(len(status)+1)
37 |         sys.stdout.write(status)
38 |         sys.stdout.flush()
39 | 
40 |     sys.stdout.write("\n")
41 |     f.close()
42 | 
43 | 
44 | def unzip(fileName):
45 |     print("Extracting: " + fileName),
46 |     zip = ZipFile(fileName)
47 |     zip.extractall()
48 |     zip.close()
49 |     print(" [DONE]")
50 | 
51 | 
52 | def parse_xml(xmlFile):
53 |     print("Processing: " + xmlFile),
54 |     if not xmlFile.endswith(".json"):
55 |         return {}
56 | 
57 |     update_count = 0
58 |     new_count = 0
59 |     subDict = {}
60 |     cveid = ""
61 |     cvss = ""
62 |     cweid = ""
63 |     reference = []
64 |     summary = ""
65 | 
66 |     #with open(xmlFile) as f:
67 |     with open(xmlFile, 'r', encoding='utf-8') as f:
68 |         json_obj = json.load(f)
69 | 
70 |     cve_dict = json_obj["CVE_Items"]
71 |     for cve in cve_dict:
72 |         cveid = cve["cve"]["CVE_data_meta"]["ID"]
73 |         try:
74 |             cweid = cve["cve"]["problemtype"]["problemtype_data"][0]["description"][0]["value"]
75 |         except:
76 |             cweid = "CWE-000"
77 | 
78 |         try:
79 |             cvss = cve["impact"]["baseMetricV2"]["cvssV2"]["baseScore"]
80 |         except:
81 |             cvss = "0.0"
82 | 
83 |         if cveid in subDict:
84 |             update_count += 1
85 |         else:
86 |             new_count += 1
87 | 
88 |         subDict[cveid] = [cvss, cweid, reference, summary]
89 | 
90 |     print("[Updated %s records, added %s new records]" % (update_count, new_count))
91 |     return subDict
92 | 


--------------------------------------------------------------------------------
/tools/cvedatagen/cveXmlDownloader.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Download and store NVD's CVE data in XML.
 4 | See https://nvd.nist.gov/vuln/data-feeds#CVE_FEED for information.
 5 | """
 6 | 
 7 | import os
 8 | import datetime
 9 | import common
10 | originalDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11 | 
12 | 
13 | def process():
14 |     DLDir = os.path.join(originalDir, "data", "CVEXML")
15 | 
16 |     try:
17 |         os.makedirs(DLDir)
18 |     except OSError:
19 |         pass
20 | 
21 |     # NVD's XML Vulnerability Feeds have been deprecated. Use JSON instead..
22 |     # https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2002.json.zip
23 |     urlBase = "https://nvd.nist.gov/feeds/json/cve/1.1/"
24 | 
25 |     os.chdir(DLDir)
26 | 
27 |     for year in range(2002, datetime.datetime.now().year + 1):
28 |         fileName = "nvdcve-1.1-{0}.json.zip".format(year)
29 |         url = urlBase + fileName
30 | 
31 |         common.download_url(url, fileName)
32 |         common.unzip(fileName)
33 |         os.remove(fileName)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     process()
38 | 


--------------------------------------------------------------------------------
/tools/cvedatagen/cveXmlParser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | NVD's CVE xml data processor.
 4 | xml data is downloaded from https://nvd.nist.gov/download.cfm
 5 | This module should be run only once.
 6 | or, if the pickle file has been corrupted, run this module again.
 7 | Updates of the database is done in cvexmlupdater.py
 8 | """
 9 | 
10 | import os
11 | import common
12 | try:
13 |     import cPickle as pickle
14 | except ImportError:
15 |     import pickle
16 | originalDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
17 | 
18 | 
19 | def process():
20 |     DLDir = os.path.join(originalDir, "data", "CVEXML")
21 |     cveDict = {}
22 | 
23 |     for xml in os.listdir(DLDir):
24 |         subDict = common.parse_xml(os.path.join(DLDir, xml))
25 |         cveDict.update(subDict)
26 | 
27 |     pickle.dump(cveDict, open(os.path.join(originalDir, "data", "cvedata.pkl"), "wb"))
28 | 
29 |     print("Stored " + str(len(cveDict)) + " CVE data in file 'cvedata.pkl'.")
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     process()
34 | 


--------------------------------------------------------------------------------
/tools/cvedatagen/cveXmlUpdater.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | CVE data updater.
 4 | Run cveXmlDownloader.py and cveXmlParser.py before running this module.
 5 | This module downloads "modified" data from NVD, uncompress and update the database.
 6 | """
 7 | 
 8 | import os
 9 | import pickle
10 | import common
11 | 
12 | originalDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13 | 
14 | 
15 | def process():
16 |     # first download the modified cve data from NVD
17 |     fileName = "nvdcve-1.1-modified.json.zip"
18 |     url = "https://nvd.nist.gov/feeds/json/cve/1.1/" + fileName
19 | 
20 |     common.download_url(url, fileName)
21 |     common.unzip(fileName)
22 |     os.remove(fileName)
23 | 
24 |     # load the pickled cve data
25 |     print("Reading pickled data..."),
26 |     cveDict = pickle.load(open(os.path.join(originalDir, "data", "cvedata.pkl"), "rb"))
27 |     print("[DONE]")
28 | 
29 |     subDict = common.parse_xml(fileName.replace(".zip", ""))
30 |     cveDict.update(subDict)
31 | 
32 |     os.remove(fileName.replace(".zip", ""))
33 | 
34 |     print("Dumping updated pickle..."),
35 |     pickle.dump(cveDict, open(os.path.join(originalDir, "data", "cvedata.pkl"), "wb"))
36 |     print("[DONE]")
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     process()
41 | 


--------------------------------------------------------------------------------
/tools/parseutility.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import subprocess
  4 | import re
  5 | import platform
  6 | 
  7 | # Import from parent directory
  8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  9 | import config
 10 | 
 11 | 
 12 | def get_platform():
 13 |     global osName
 14 |     global bits
 15 | 
 16 |     pf = platform.platform()
 17 |     bits, _ = platform.architecture()
 18 |     if "Windows" in pf:
 19 |         osName = "win"
 20 |         bits = ""
 21 |     elif "Linux" in pf:
 22 |         osName = "linux"
 23 |         if "64" in bits:
 24 |             bits = "64"
 25 |         else:
 26 |             bits = "86"
 27 |     else:
 28 |         osName = "osx"
 29 |         bits = ""
 30 | 
 31 | 
 32 | def setEnvironment(caller):
 33 |     get_platform()
 34 |     global javaCallCommand
 35 |     if caller == "GUI":
 36 |         # try:
 37 |         #   base_path = sys._MEIPASS
 38 |         # except:
 39 |         #   base_path = os.path.abspath(".")
 40 |         cwd = os.getcwd()
 41 |         if osName == "win":
 42 |             # full_path = os.path.join(base_path, "FuncParser.exe")
 43 |             javaCallCommand = os.path.join(cwd, "FuncParser-opt.exe ")
 44 | 
 45 |         elif osName == "linux" or osName == "osx":
 46 |             # full_path = os.path.join(base_path, "FuncParser.jar")
 47 |             # javaCallCommand = "java -Xmx1024m -jar " + full_path + " "
 48 |             javaCallCommand = "\"{0}\" -Xmx1024m -jar \"{1}\" ".format(config.javaBinary, os.path.join(cwd, "FuncParser-opt.jar"))
 49 | 
 50 |     else:
 51 |         if osName == "win":
 52 |             base_path = os.path.dirname(os.path.abspath(__file__))  # vuddy/hmark root directory
 53 |             javaCallCommand = os.path.join(base_path, "FuncParser-opt.exe ")
 54 |         elif osName == "linux" or osName == "osx":
 55 |             base_path = os.path.dirname(os.path.abspath(__file__))  # vuddy/hmark root directory
 56 |             javaCallCommand = "\"{0}\" -Xmx1024m -jar \"{1}\" ".format(config.javaBinary, os.path.join(base_path, "FuncParser-opt.jar"))
 57 | 
 58 | 
 59 | class function:
 60 |     parentFile = None  # Absolute file which has the function
 61 |     parentNumLoc = None  # Number of LoC of the parent file
 62 |     name = None  # Name of the function
 63 |     lines = None  # Tuple (lineFrom, lineTo) that indicates the LoC of function
 64 |     funcId = None  # n, indicating n-th function in the file
 65 |     parameterList = []  # list of parameter variables
 66 |     variableList = []  # list of local variables
 67 |     dataTypeList = []  # list of data types, including user-defined types
 68 |     funcCalleeList = []  # list of called functions' names
 69 |     funcBody = None
 70 | 
 71 |     def __init__(self, fileName):
 72 |         self.parentFile = fileName
 73 |         self.parameterList = []
 74 |         self.variableList = []
 75 |         self.dataTypeList = []
 76 |         self.funcCalleeList = []
 77 | 
 78 |     def removeListDup(self):
 79 |         # for best performance, must execute this method
 80 |         # for every instance before applying the abstraction.
 81 |         self.parameterList = list(set(self.parameterList))
 82 |         self.variableList = list(set(self.variableList))
 83 |         self.dataTypeList = list(set(self.dataTypeList))
 84 |         self.funcCalleeList = list(set(self.funcCalleeList))
 85 | 
 86 |         # def getOriginalFunction(self):
 87 |         #   # returns the original function back from the instance.
 88 |         #   fp = open(self.parentFile, 'r')
 89 |         #   srcFileRaw = fp.readlines()
 90 |         #   fp.close()
 91 |         #   return ''.join(srcFileRaw[self.lines[0]-1:self.lines[1]])
 92 | 
 93 | 
 94 | def loadSource(rootDirectory):
 95 |     # returns the list of .src files under the specified root directory.
 96 |     maxFileSizeInBytes = None
 97 |     maxFileSizeInBytes = 2097152  # remove this line if you don't want to restrict
 98 |     # the maximum file size that you process.
 99 |     walkList = os.walk(rootDirectory)
100 |     srcFileList = []
101 |     for path, dirs, files in walkList:
102 |         for fileName in files:
103 |             ext = fileName.lower()
104 |             if ext.endswith('.c') or ext.endswith('.cpp') or ext.endswith('.cc') or ext.endswith('.c++') or ext.endswith('.cxx'):
105 |                 absPathWithFileName = path.replace('\\', '/') + '/' + fileName
106 |                 if maxFileSizeInBytes is not None:
107 |                     if os.path.getsize(absPathWithFileName) < maxFileSizeInBytes:
108 |                         srcFileList.append(absPathWithFileName)
109 |                 else:
110 |                     srcFileList.append(absPathWithFileName)
111 |     return srcFileList
112 | 
113 | 
114 | def loadVul(rootDirectory):
115 |     # returns the list of .vul files under the specified root directory.
116 |     maxFileSizeInBytes = None
117 |     # maxFileSizeInBytes = 2097152  # remove this line if you don't want to restrict
118 |     # the maximum file size that you process.
119 |     walkList = os.walk(rootDirectory)
120 |     srcFileList = []
121 |     for path, dirs, files in walkList:
122 |         for fileName in files:
123 |             if fileName.endswith('OLD.vul'):
124 |                 absPathWithFileName = path.replace('\\', '/') + '/' + fileName
125 |                 if maxFileSizeInBytes is not None:
126 |                     if os.path.getsize(absPathWithFileName) < maxFileSizeInBytes:
127 |                         srcFileList.append(absPathWithFileName)
128 |                 else:
129 |                     srcFileList.append(absPathWithFileName)
130 |     return srcFileList
131 | 
132 | 
133 | def removeComment(string):
134 |     # Code for removing C/C++ style comments. (Imported from ReDeBug.)
135 |     c_regex = re.compile(
136 |         r'(?P<comment>//.*?$|[{}]+)|(?P<multilinecomment>/\*.*?\*/)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"]*)',
137 |         re.DOTALL | re.MULTILINE)
138 |     return ''.join([c.group('noncomment') for c in c_regex.finditer(string) if c.group('noncomment')])
139 | 
140 | 
141 | # def getBody(originalFunction):
142 | #   # returns the function's body as a string.
143 | #   return originalFunction[originalFunction.find('{')+1:originalFunction.rfind('}')]
144 | 
145 | 
146 | def normalize(string):
147 |     # Code for normalizing the input string.
148 |     # LF and TAB literals, curly braces, and spaces are removed,
149 |     # and all characters are lowercased.
150 |     return ''.join(string.replace('\n', '').replace('\r', '').replace('\t', '').replace('{', '').replace('}', '').split(
151 |         ' ')).lower()
152 | 
153 | 
154 | def abstract(instance, level):
155 |     # Applies abstraction on the function instance,
156 |     # and then returns a tuple consisting of the original body and abstracted body.
157 |     originalFunctionBody = instance.funcBody
158 |     # print "==================="
159 |     originalFunctionBody = removeComment(originalFunctionBody)
160 |     # print originalFunctionBody
161 |     # print '===================================================='
162 |     if int(level) >= 0:  # No abstraction.
163 |         abstractBody = originalFunctionBody
164 | 
165 |     if int(level) >= 1:  # PARAM
166 |         parameterList = instance.parameterList
167 |         for param in parameterList:
168 |             if len(param) == 0:
169 |                 continue
170 |             try:
171 |                 paramPattern = re.compile("(^|\W)" + param + "(\W)")
172 |                 abstractBody = paramPattern.sub("\g<1>FPARAM\g<2>", abstractBody)
173 |             except:
174 |                 pass
175 | 
176 |     if int(level) >= 2:  # DTYPE
177 |         dataTypeList = instance.dataTypeList
178 |         for dtype in dataTypeList:
179 |             if len(dtype) == 0:
180 |                 continue
181 |             try:
182 |                 dtypePattern = re.compile("(^|\W)" + dtype + "(\W)")
183 |                 abstractBody = dtypePattern.sub("\g<1>DTYPE\g<2>", abstractBody)
184 |             except:
185 |                 pass
186 | 
187 |     if int(level) >= 3:  # LVAR
188 |         variableList = instance.variableList
189 |         for lvar in variableList:
190 |             if len(lvar) == 0:
191 |                 continue
192 |             try:
193 |                 lvarPattern = re.compile("(^|\W)" + lvar + "(\W)")
194 |                 abstractBody = lvarPattern.sub("\g<1>LVAR\g<2>", abstractBody)
195 |             except:
196 |                 pass
197 | 
198 |     if int(level) >= 4:  # FUNCCALL
199 |         funcCalleeList = instance.funcCalleeList
200 |         for fcall in funcCalleeList:
201 |             if len(fcall) == 0:
202 |                 continue
203 |             try:
204 |                 fcallPattern = re.compile("(^|\W)" + fcall + "(\W)")
205 |                 abstractBody = fcallPattern.sub("\g<1>FUNCCALL\g<2>", abstractBody)
206 |             except:
207 |                 pass
208 | 
209 |     return (originalFunctionBody, abstractBody)
210 | 
211 | 
212 | delimiter = "\r\0?\r?\0\r"
213 | 
214 | 
215 | def parseFile_shallow(srcFileName, caller):
216 |     # this does not parse body.
217 |     global javaCallCommand
218 |     global delimiter
219 |     setEnvironment(caller)
220 |     javaCallCommand += "\"" + srcFileName + "\" 0"
221 |     functionInstanceList = []
222 |     try:
223 |         astString = subprocess.check_output(javaCallCommand, stderr=subprocess.STDOUT, shell=True)
224 |     except subprocess.CalledProcessError as e:
225 |         print("Parser Error:", e)
226 |         astString = ""
227 |     # astString 문자열로 디코딩
228 |     astString = astString.decode('latin-1')
229 |     funcList = astString.split(delimiter)
230 |     for func in funcList[1:]:
231 |         functionInstance = function(srcFileName)
232 |         elemsList = func.split('\n')[1:-1]
233 |         # print elemsList
234 |         if len(elemsList) > 9:
235 |             functionInstance.parentNumLoc = int(elemsList[1])
236 |             functionInstance.name = elemsList[2]
237 |             functionInstance.lines = (int(elemsList[3].split('\t')[0]), int(elemsList[3].split('\t')[1]))
238 |             functionInstance.funcId = int(elemsList[4])
239 |             functionInstance.funcBody = '\n'.join(elemsList[9:])
240 |             # print functionInstance.funcBody
241 |             # print "-------------------"
242 | 
243 |             functionInstanceList.append(functionInstance)
244 | 
245 |     return functionInstanceList
246 | 
247 | 
248 | # def parseFile_semiDeep(srcFileName, caller):
249 | #     # this does not parse body.
250 | #     global javaCallCommand
251 | #     global delimiter
252 | #     setEnvironment(caller)
253 | #     javaCallCommand += "\"" + srcFileName + "\" 0"
254 | #     functionInstanceList = []
255 | #     try:
256 | #         astString = subprocess.check_output(javaCallCommand, stderr=subprocess.STDOUT, shell=True)
257 | #     except subprocess.CalledProcessError as e:
258 | #         print "Parser Error:", e
259 | #         astString = ""
260 | 
261 | #     funcList = astString.split(delimiter)
262 | #     for func in funcList[1:]:
263 | #         functionInstance = function(srcFileName)
264 | #         elemsList = func.split('\n')[1:-1]
265 | #         # print elemsList
266 | #         if len(elemsList) > 9:
267 | #             functionInstance.parentNumLoc = int(elemsList[1])
268 | #             functionInstance.name = elemsList[2]
269 | #             functionInstance.lines = (int(elemsList[3].split('\t')[0]), int(elemsList[3].split('\t')[1]))
270 | #             functionInstance.funcId = int(elemsList[4])
271 | #             functionInstance.parameterList = elemsList[5].rstrip().split('\t')
272 | #             functionInstance.funcBody = '\n'.join(elemsList[9:])
273 | #             # print functionInstance.funcBody
274 | #             # print "-------------------"
275 | 
276 | #             functionInstanceList.append(functionInstance)
277 | 
278 | #     return functionInstanceList
279 | 
280 | 
281 | def parseFile_deep(srcFileName, caller):
282 |     global javaCallCommand
283 |     global delimiter
284 |     setEnvironment(caller)
285 |     # this parses function definition plus body.
286 |     javaCallCommand += "\"" + srcFileName + "\" 1"
287 |     functionInstanceList = []
288 | 
289 |     try:
290 |         astString = subprocess.check_output(javaCallCommand, stderr=subprocess.STDOUT, shell=True)
291 |     except subprocess.CalledProcessError as e:
292 |         print("Parser Error:", e)
293 |         astString = ""
294 | 
295 |     funcList = astString.split(delimiter)
296 |     for func in funcList[1:]:
297 |         functionInstance = function(srcFileName)
298 | 
299 |         elemsList = func.split('\n')[1:-1]
300 |         # print elemsList
301 |         if len(elemsList) > 9:
302 |             functionInstance.parentNumLoc = int(elemsList[1])
303 |             functionInstance.name = elemsList[2]
304 |             functionInstance.lines = (int(elemsList[3].split('\t')[0]), int(elemsList[3].split('\t')[1]))
305 |             functionInstance.funcId = int(elemsList[4])
306 |             functionInstance.parameterList = elemsList[5].rstrip().split('\t')
307 |             functionInstance.variableList = elemsList[6].rstrip().split('\t')
308 |             functionInstance.dataTypeList = elemsList[7].rstrip().split('\t')
309 |             functionInstance.funcCalleeList = elemsList[8].rstrip().split('\t')
310 |             functionInstance.funcBody = '\n'.join(elemsList[9:])
311 |             # print '\n'.join(elemsList[9:])
312 |             functionInstanceList.append(functionInstance)
313 | 
314 |     return functionInstanceList
315 | 


--------------------------------------------------------------------------------