├── ReadMe.md ├── extraTools ├── __init__.py └── vuldetect │ ├── __init__.py │ ├── deepwukong.py │ ├── ivdetect.py │ ├── sysevr.py │ └── utils │ ├── __init__.py │ ├── environments.py │ ├── sinkPoint.py │ └── symbolized.py ├── main.py ├── mainTool ├── CPG.py ├── __init__.py ├── antlr │ ├── CPP14.interp │ ├── CPP14.tokens │ ├── CPP14Lexer.interp │ ├── CPP14Lexer.py │ ├── CPP14Lexer.tokens │ ├── CPP14Listener.py │ ├── CPP14Parser.py │ ├── CPP14Visitor.py │ └── __init__.py ├── ast │ ├── ParsingUtils.py │ ├── __init__.py │ ├── astNode.py │ ├── builders.py │ ├── declarations │ │ ├── __init__.py │ │ ├── complexDecls.py │ │ └── simpleDecls.py │ ├── expressions │ │ ├── __init__.py │ │ ├── binaryExpressions.py │ │ ├── expression.py │ │ ├── expressionHolders.py │ │ ├── postfixExpressions.py │ │ └── primaryExpressions.py │ ├── statements │ │ ├── __init__.py │ │ ├── blockStarters.py │ │ ├── jumps.py │ │ └── statements.py │ └── walking │ │ ├── __init__.py │ │ └── visitor.py ├── cdg │ ├── CDG.py │ ├── DominatorTree.py │ └── __init__.py ├── cfg │ ├── CCFG.py │ ├── CFG.py │ ├── __init__.py │ └── nodes.py ├── ddg │ ├── DDGCreator.py │ ├── DefUseGraph.py │ └── __init__.py ├── udg │ ├── __init__.py │ ├── astAnalyzers.py │ ├── astProvider.py │ ├── environments.py │ └── useDefGraph.py └── utils │ ├── __init__.py │ ├── graphUtils.py │ └── types.py ├── resources ├── CPP14.g4 └── calleeInfos.json └── test ├── extraToolTests ├── __init__.py ├── deepwukongTest.py ├── ivdetectTest.py └── sysevrTest.py ├── mainToolTests ├── ASTBuildTest.py ├── CDGBuildTest.py ├── CFGBuildTest.py ├── CPGBuildTest.py ├── DDGBuildTest.py ├── FileParsingTest.py ├── UDGBuildTest.py ├── jsonTestData.py └── parsingCode.py └── testfiles ├── ComplexStruct.c ├── IdentifierDeclTest.c ├── inputcases └── sard_test_cases ├── CWE119_1.c ├── CWE121_new_goto.c ├── CWE123_Write_What_Where_Condition__connect_socket_53a.c ├── CWE123_Write_What_Where_Condition__connect_socket_53b.c ├── CWE123_Write_What_Where_Condition__connect_socket_53c.c ├── CWE123_Write_What_Where_Condition__connect_socket_53d.c ├── CWE_119_122_Struct.c ├── CWE_119_122_switch.c ├── CWE_119_124_class_decl.c ├── CWE_119_124_class_method_decl.c ├── CWE_119_124_fscanf.c ├── CWE_119_fget.c └── io.c /ReadMe.md: -------------------------------------------------------------------------------- 1 | # About CppCodeAnalyzer 2 | 3 | It is a parsing tool based on python for C/C++ to construct code property graph, which is the python version of [CppCodeAnalyzerJava](https://github.com/for-just-we/CppCodeAnalyzerJava), most of functions of CppCodeAnalyzer are similar to Joern, the differences are that: 4 | 5 | - The grammar we utilize here is from the repo of [grammars-v4](https://github.com/antlr/grammars-v4) Antlr official, which means the input of module ast (Antlr AST) is quite different from Joern, but the output customized AST is the same, so the parsing module in ast package is different from Joern. 6 | 7 | - When constructing CFG, CppCodeAnalyzer takes `for-range` and `try-catch` into consideration. 8 | 9 | * when parsing code such as `for (auto p: vec){ xxx }`, the CFG is like in graph 1 10 | 11 | * when parsing `try-catch`, we simple ignore statements in catch block because in normal states they are not going to be executed, and the control flow in `try-catch` is quite hard to compute. 12 | 13 | * when parsing use-def information by udg package, we take the information of pointer uses. For example, `memcpy(dest, src, 100);` defines symbol `* dest` and uses symbol `* src`, Joern considered pointer define with variable `Tainted` but did not consider pointer uses. 14 | 15 | Graph 1 16 | ```mermaid 17 | graph LR 18 | EmptyCondition --> A[auto p: vec] 19 | A --> B[xxx] 20 | B --> EmptyCondition 21 | EmptyCondition --> Exit 22 | ``` 23 | 24 | The pipeline of CppCodeAnalyzer is similar to Joern, which could be illustrated as: 25 | 26 | ```mermaid 27 | graph LR 28 | AntlrAST --Transform --> AST -- control flow analysis --> CFG 29 | CFG -- dominate analysis --> CDG 30 | CFG -- symbol def use analysis --> UDG 31 | UDG -- data dependence analysis --> DDG 32 | ``` 33 | 34 | If you want more details, coule refer to [Joern工具工作流程分析](https://blog.csdn.net/qq_44370676/article/details/125089161) 35 | 36 | - package ast transform Antlr AST to customized AST. 37 | 38 | - package cfg conduct control flow analysis and convert customized AST into CFG. 39 | 40 | - package cdg conduct statement dominate analysis and construct control dependence relations between statements. 41 | 42 | - package udg analyze the symbols defined and used in each statement independently. 43 | 44 | - package ddg construct data dependence relations between statements with def-use information computed in udg package. 45 | 46 | 47 | # Usage 48 | 49 | The testfile in directionary `test/mainToolTests` illustrated the progress of each module, you could refer to those test cases to learn how to use API in CppCodeAnalyzer. 50 | 51 | Environment: 52 | 53 | - python 3.8 54 | 55 | - antlr4-python3-runtime 4.9.2 56 | 57 | Used as python package: 58 | 59 | - Download release first and unzip 60 | 61 | - Run `python setup.py bdist_wheel` and `pip install dist/CppCodeAnalyzer-1.0-py3-none-any.whl` 62 | 63 | - After installing, when import APIs from CppCodeAnalyzer, you just need to add prefix `CppCodeAnalyzer` to the package name, for example, the import statement `from mainTool.udg.astAnalyzers import ASTDefUseAnalyzer, CalleeInfos, CFGToUDGConverter` in [CPGBuildTest.py](https://github.com/for-just-we/CppCodeAnalyzer/blob/master/test/mainToolTests/CPGBuildTest.py), you just need to modify to `from CppCodeAnalyzer.mainTool.udg.astAnalyzers import ASTDefUseAnalyzer, CalleeInfos, CFGToUDGConverter`. 64 | 65 | 66 | # Our motivations 67 | 68 | - When we conduct experiments with Joern tool parsing SARD datasets, we find some error.The statement `wchar_t data[50] = L'A';` should be in a single CFG node, but each token in the statement is assigned to a CFG node, after we check the source code, we believe the root cause is the grammar used by [Joern](https://github.com/octopus-platform/joern/blob/dev/projects/extensions/joern-fuzzyc/src/main/java/antlr/Function.g4#L13). 69 | 70 | - Also, most researches utilize python to write deep-learning programs, it could be more convenient to parse code with python because the parsing module could directly connect to deep-learning module, there would be no need to write scripts to parse output of Joern. 71 | 72 | # Challenges 73 | 74 | - Parsing control-flow in `for-range` and `try-catch` is difficult, there are no materials depicting CFG in `for-range` and `try-catch`. 75 | 76 | - Parsing def-use information of pointer variable is difficult. For example, in `*(p+i+1) = a[i][j];`, symbols defined include `* p` and used include `p, i, j, a, * a`. However, this is not very accurate, but computing the location of memory staticlly is difficult. This could brings following problems. 77 | 78 | ```cpp 79 | s1: memset(source, 100, 'A'); 80 | s2: source[99] = '\0'; 81 | s3: memcpy(data, source, 100); 82 | ``` 83 | 84 | - In results of CppCodeAnalyzer, s1 and s2 define symbol `* source` , but the later kills the front. So, there is only DDG edge `s2 -> s3` in DDG. 85 | 86 | - However, s1 defines `* source`, s2 defines `* ( source + 99)`, a precise DDG should contains edge `s1 -> s3, s2 -> s3` 87 | 88 | Also, our tool is much more slower than Joern, normally parsing a file in SARD dataset needs 20 - 30 seconds, so we recommand dump output CPG into json format first if you need to train a model. The Java version [CppCodeAnalyzerJava](https://github.com/for-just-we/CppCodeAnalyzerJava) is much more faster, if you prefer fast analysis you could use Java version. 89 | 90 | 91 | 92 | # configuration 93 | 94 | [calleeInfos.json](https://github.com/for-just-we/CppCodeAnalyzer/blob/master/resources/calleeInfos.json) stores APIs which define or use variable of pointer type, you can use `json` package to load these callee infos and set [ASTDefUseAnalyzer](https://github.com/for-just-we/CppCodeAnalyzer/blob/master/mainTool/udg/astAnalyzers.py).calleeInfos according to your own preference when analysing use-def information of each code line. 95 | 96 | Note that calleeInfos.json is important to parse data dependence, or you would lose data dependence of pointer variable generated by API (such as `memcpy`), you can load like 97 | 98 | ```python 99 | import json 100 | from CppCodeAnalyzer.mainTool.CPG import initialCalleeInfos, CFGToUDGConverter, ASTDefUseAnalyzer 101 | 102 | calleeInfs = json.load(open("path to calleeInfos.json", 'r', encoding='utf-8')) 103 | calleeInfos = initialCalleeInfos(calleeInfs) 104 | 105 | converter: CFGToUDGConverter = CFGToUDGConverter() 106 | astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer() 107 | astAnalyzer.calleeInfos = calleeInfos 108 | converter.astAnalyzer = astAnalyzer 109 | ``` 110 | remember set `astAnalyzer.calleeInfos = calleeInfos` and `converter.astAnalyzer = astAnalyzer` to load calleeInfos 111 | 112 | 113 | # Extra Tools 114 | 115 | The package `extraTools` contains some preprocess code for vulnerability detectors IVDetect, SySeVR and DeepWuKong. The usage could refer to file in `test/extraToolTests` 116 | 117 | 118 | # References 119 | 120 | 121 | > [Yamaguchi, F. , Golde, N. , Arp, D. , & Rieck, K. . (2014). Modeling and Discovering Vulnerabilities with Code Property Graphs. IEEE Symposium on Security and Privacy. IEEE.](https://ieeexplore.ieee.org/document/6956589) 122 | 123 | > [Li Y , Wang S , Nguyen T N . Vulnerability Detection with Fine-grained Interpretations. 2021.](https://arxiv.org/abs/2106.10478) 124 | 125 | > [SySeVR: A Framework for Using Deep Learning to Detect Software Vulnerabilities\[J\]. IEEE Transactions on Dependable and Secure Computing, 2021, PP(99):1-1.](https://arxiv.org/abs/1807.06756) 126 | 127 | > [Cheng X , Wang H , Hua J , et al. DeepWukong[J]. ACM Transactions on Software Engineering and Methodology (TOSEM), 2021.](https://dl.acm.org/doi/10.1145/3436877) 128 | -------------------------------------------------------------------------------- /extraTools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/extraTools/__init__.py -------------------------------------------------------------------------------- /extraTools/vuldetect/__init__.py: -------------------------------------------------------------------------------- 1 | # This package is used to support DL-based vulnerability detectors like SySeVR -------------------------------------------------------------------------------- /extraTools/vuldetect/ivdetect.py: -------------------------------------------------------------------------------- 1 | from extraTools.vuldetect.utils.symbolized import ASTVarAnalyzer 2 | from mainTool.CPG import * 3 | import re 4 | 5 | # supproting IVDetect to extract variables and their types 6 | # paper: Vulnerability Detection with Fine-Grained Interpretations 7 | 8 | # In mainTool, CppCodeAnalyzer is able to produce AST, CDG and DDG, so there is no need here 9 | # to write script to 10 | 11 | # parsing the contents in a AST node into its sub token sequence, generating feature 1 12 | # code in https://github.com/vulnerabilitydetection/VulnerabilityDetectionResearch/blob/new_implementation/IVDetect/utils/process.py#L138 could 13 | # produce errors when parsing identifiers like TASK_SIZE_MAX with all upper case 14 | def lexical_parse(line: str) -> List[str]: 15 | tokens = line.split(" ") 16 | filtered_set = ['', ' ', ' ', ',', '\n', ';', '(', ')', '<', '>', '{', '}', '[', ']', '``', '\'\'', '\"', "'"] 17 | 18 | tokens = list(filter(lambda t: t not in filtered_set, tokens)) 19 | new_tokens = list() 20 | for token in tokens: 21 | if token.isalpha(): 22 | new_tokens.extend(re.findall("[a-zA-Z][^A-Z]*", token)) 23 | else: 24 | # 按下划线分割 25 | new_tokens.extend([t for t in token.split('_') if t != '']) 26 | return new_tokens 27 | 28 | 29 | # 返回每个statement对应的astnode及其type token序列 30 | def generate_feature3(statements: List[ASTNode]): 31 | astVarAnalyzer: ASTVarAnalyzer = ASTVarAnalyzer() 32 | varLists: List[list] = list() 33 | for statement in statements: 34 | provider: ASTNodeASTProvider = ASTNodeASTProvider() 35 | provider.node = statement 36 | astVarAnalyzer.analyzeAST(provider) 37 | 38 | vars = list() 39 | for variable in astVarAnalyzer.variables: 40 | vars.extend(lexical_parse(variable)) 41 | if variable in astVarAnalyzer.var2type.keys(): 42 | vars.extend(lexical_parse(astVarAnalyzer.var2type[variable])) 43 | varLists.append(vars) 44 | 45 | return varLists 46 | 47 | 48 | # 返回和该statement有控制依赖的结点组成的token sequence,为了不让深度太大,我们暂时限制depth = 1 49 | def find_control(cur_stmt_idx: int, cdg_edge_idxs: Dict[int, int], seq: List[int], depth: int, limit: int): 50 | record = [] 51 | if cur_stmt_idx in cdg_edge_idxs.keys(): 52 | control_stmt = cdg_edge_idxs[cur_stmt_idx] 53 | seq.append(control_stmt) 54 | record.append(control_stmt) 55 | if depth < limit: 56 | for stmt in record: 57 | find_control(stmt, cdg_edge_idxs, seq, depth + 1, limit) 58 | 59 | 60 | def generate_feature4(cpg: CPG, limit: int = 1) -> List[List[List[str]]]: 61 | cdg_edge_idxs: Dict[int, int] = { edge.destination: edge.source for edge in cpg.CDGEdges } 62 | # 每个statement的控制依赖结点 63 | cd_idxs_for_stmt: List[List[int]] = list() 64 | for stmt_idx in range(len(cpg.statements)): 65 | seq: List[int] = list() 66 | find_control(stmt_idx, cdg_edge_idxs, seq, 1, limit) 67 | cd_idxs_for_stmt.append(seq) 68 | 69 | feature4_for_stmts: List[List[List[str]]] = list() 70 | for cd_idxs in cd_idxs_for_stmt: 71 | sub_tokens_in_stmts: List[List[str]] = [lexical_parse(cpg.statements[idx].getEscapedCodeStr()) 72 | for idx in cd_idxs] 73 | feature4_for_stmts.append(sub_tokens_in_stmts) 74 | 75 | return feature4_for_stmts 76 | 77 | 78 | def find_data(cur_stmt_idx: int, ddg_edge_idxs: Dict[int, Set[int]], seq: List[int], depth: int, limit: int): 79 | record = [] 80 | if cur_stmt_idx in ddg_edge_idxs.keys(): 81 | for data_stmt in ddg_edge_idxs[cur_stmt_idx]: 82 | seq.append(data_stmt) 83 | record.append(data_stmt) 84 | if depth < limit: 85 | for stmt in record: 86 | find_data(stmt, ddg_edge_idxs, seq, depth + 1, limit) 87 | 88 | 89 | # feature5, data dependence 90 | def generate_feature5(cpg: CPG, limit: int = 1) -> List[List[List[str]]]: 91 | ddg_edge_idxs: Dict[int, Set[int]] = dict() 92 | for edge in cpg.DDGEdges: 93 | if edge.destination not in ddg_edge_idxs.keys(): 94 | ddg_edge_idxs[edge.destination] = {edge.source} 95 | else: 96 | ddg_edge_idxs[edge.destination].add(edge.source) 97 | # 每个statement的控制依赖结点 98 | dd_idxs_for_stmt: List[List[int]] = list() 99 | for stmt_idx in range(len(cpg.statements)): 100 | seq: List[int] = list() 101 | find_data(stmt_idx, ddg_edge_idxs, seq, 1, limit) 102 | dd_idxs_for_stmt.append(seq) 103 | 104 | feature5_for_stmts: List[List[List[str]]] = list() 105 | for dd_idxs in dd_idxs_for_stmt: 106 | sub_tokens_in_stmts: List[List[str]] = [lexical_parse(cpg.statements[idx].getEscapedCodeStr()) 107 | for idx in dd_idxs] 108 | feature5_for_stmts.append(sub_tokens_in_stmts) 109 | 110 | return feature5_for_stmts -------------------------------------------------------------------------------- /extraTools/vuldetect/sysevr.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.statements.jumps import ReturnStatement 2 | from extraTools.vuldetect.utils.sinkPoint import SyVCPoint, CallExprTool 3 | from extraTools.vuldetect.utils.symbolized import SymbolizingTool 4 | from mainTool.CPG import * 5 | from typing import List, Set 6 | import json 7 | 8 | 9 | 10 | class SySeSlice(object): 11 | def __init__(self, keyLine: List[int], keyContent: str): 12 | # slice覆盖到的行号以及每一行所在的文件 13 | self.lineNumbers: List[List[int]] = list() 14 | # slice中每个语句对应的token序列 15 | self.lineContents: List[str] = list() 16 | # key line 行号,文件Id 17 | self.keyLine: List[int] = keyLine 18 | # 内容 19 | self.keyLineContent: str = keyContent 20 | # 文件id对文件名 21 | self.id2file: Dict[int, str] = None 22 | 23 | def __hash__(self): 24 | return hash(json.dumps(self.lineContents)) 25 | 26 | def toJson(self) -> Dict: 27 | return { 28 | "keyline": self.keyLine, 29 | "id2file": self.id2file, 30 | "line-Nos": self.lineNumbers, 31 | "line-contents": self.lineContents 32 | } 33 | 34 | 35 | # 一个程序中所有function都由一个SliceTool对象处理 36 | # cpgs is all cpgs from functions of a program (could be a file sometimes) 37 | class SySeSliceTool(object): 38 | def __init__(self, cpgs: List[CPG], sensitive_apis: Set[str], symbolizingTool: SymbolizingTool): 39 | self.cpgs: List[CPG] = cpgs 40 | self.funcName2cpg: Dict[str, CPG] = {cpg.name: cpg for cpg in cpgs} 41 | self.sensitive_apis: Set[str] = sensitive_apis # 42 | self.symbolizingTool: SymbolizingTool = symbolizingTool 43 | 44 | self.slices: Set[SySeSlice] = set() # store all code gadgets of a program 45 | # backward information of control- data-deoendence for each statement 46 | self.funcName2backInfo: Dict[str, Dict[int, Set[int]]] = dict() 47 | # forward information of data-deoendence for each statement 48 | self.funcName2forwInfo: Dict[str, Dict[int, Set[int]]] = dict() 49 | # 将文件名映射 50 | self.files: List[str] = list() 51 | for cpg in self.cpgs: 52 | self.generateForAndBackInfos(cpg) 53 | if cpg.file not in self.files: 54 | self.files.append(cpg.file) 55 | 56 | self.file2Id: Dict[str, int] = { file: i for i, file in enumerate(self.files) } 57 | 58 | 59 | 60 | def generateForAndBackInfos(self, cpg: CPG): 61 | # backward 62 | backInfo: Dict[int, Set[int]] = dict() 63 | # forward 64 | forwInfo: Dict[int, Set[int]] = dict() 65 | 66 | # backward for control dependence 67 | for edge in cpg.CDGEdges: 68 | if edge.destination not in backInfo.keys(): 69 | backInfo[edge.destination] = set() 70 | backInfo[edge.destination].add(edge.source) 71 | 72 | # forward and backward for data dependence 73 | for edge in cpg.DDGEdges: 74 | # backward 75 | if edge.destination not in backInfo.keys(): 76 | backInfo[edge.destination] = set() 77 | backInfo[edge.destination].add(edge.source) 78 | # forward 79 | if edge.source not in forwInfo.keys(): 80 | forwInfo[edge.source] = set() 81 | forwInfo[edge.source].add(edge.destination) 82 | 83 | self.funcName2backInfo[cpg.name] = backInfo 84 | self.funcName2forwInfo[cpg.name] = forwInfo 85 | 86 | 87 | def generateSliceForProgram(self): 88 | sinkTool: SyVCPoint = SyVCPoint(self.sensitive_apis) 89 | slicesCpg = list(filter(lambda cpg: cpg.joinSlice, self.cpgs)) 90 | for cpg in slicesCpg: 91 | for i, stmt in enumerate(cpg.statements): 92 | # 是否算SySe 93 | if sinkTool.judgeSink(stmt): 94 | coveredFileIds: Set[int] = set() 95 | lineNumber: int = stmt.location.startLine 96 | slice: SySeSlice = SySeSlice([lineNumber, self.file2Id[cpg.file]], stmt.getEscapedCodeStr()) 97 | 98 | backwardFunctionChain: List[str] = list() # store function call chain in backward slices 99 | backwardLineContents: List[str] = list() 100 | backwardLineInfo: List[List[int]] = list() 101 | backwardIdxs: List[int] = [i] 102 | self.generateSlice(cpg.name, backwardIdxs, backwardLineContents, backwardFunctionChain, 103 | backwardLineInfo, coveredFileIds, True) 104 | 105 | forwardFunctionChain: List[str] = list() # store function call chain in backward slices 106 | forwardLineContents: List[str] = list() 107 | forwardLineInfo: List[List[int]] = list() 108 | forwardIdxs: List[int] = [i] 109 | self.generateSlice(cpg.name, forwardIdxs, forwardLineContents, forwardFunctionChain, 110 | forwardLineInfo, coveredFileIds, False) 111 | 112 | slice.lineNumbers.extend(backwardLineInfo) 113 | slice.lineContents.extend(backwardLineContents) 114 | 115 | idx = forwardLineContents.index(stmt) 116 | forwardLineContents.pop(idx) 117 | forwardLineInfo.pop(idx) 118 | slice.lineNumbers.extend(forwardLineInfo) 119 | slice.lineContents.extend(forwardLineContents) 120 | 121 | slice.id2file = { fileId: self.files[fileId] for fileId in coveredFileIds } 122 | 123 | self.slices.add(slice) 124 | 125 | 126 | def generateSlice(self, functionName: str, sliceIdxs: List[int], slices: List[str], 127 | functionChain: List[str], sliceLines: List[List[int]], 128 | coveredFileIds: Set[int], backward: bool=True ): 129 | if functionName in functionChain: 130 | return 131 | 132 | functionChain.append(functionName) 133 | # computes all nodes with program-dependence with startIdx in a single function first 134 | if backward: 135 | Info: Dict[int, Set[int]] = self.funcName2backInfo[functionName] 136 | else: 137 | Info: Dict[int, Set[int]] = self.funcName2forwInfo[functionName] 138 | workList: List[int] = sliceIdxs.copy() 139 | while len(workList) > 0: 140 | curIdx: int = workList.pop(0) 141 | for o in Info.get(curIdx, set()): 142 | if o not in sliceIdxs: 143 | sliceIdxs.append(o) 144 | workList.append(o) 145 | 146 | # sliceIdxs stores all indexes of nodes of slices 147 | cpg: CPG = self.funcName2cpg[functionName] 148 | coveredFileIds.add(self.file2Id[cpg.file]) 149 | sliceIdxs.sort() 150 | for id in sliceIdxs: 151 | # 添加slice行代码 152 | slices.append(self.symbolizingTool.symbolize(cpg.statements[id].getEscapedCodeStr())) 153 | # 添加slice行行号和文件id 154 | sliceLines.append([cpg.statements[id].location.startLine, self.file2Id[cpg.file]]) 155 | 156 | callTool = CallExprTool() 157 | callTool.judgeCall(cpg.statements[id]) 158 | if callTool.functionName is not None and callTool.functionName in self.funcName2cpg.keys(): 159 | otherCpg: CPG = self.funcName2cpg[callTool.functionName] 160 | # 以前面一行代码的return语句为起点反向遍历 161 | if backward: 162 | assert isinstance(otherCpg.statements[-1], ReturnStatement) 163 | newStartIdxs: List[int] = [len(otherCpg.statements)] 164 | else: 165 | assert callTool.argNum> 0 166 | newStartIdxs: List[int] = list(range(callTool.argNum)) 167 | self.generateSlice(otherCpg.name, newStartIdxs, slices, functionChain, sliceLines, 168 | coveredFileIds, backward) -------------------------------------------------------------------------------- /extraTools/vuldetect/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/extraTools/vuldetect/utils/__init__.py -------------------------------------------------------------------------------- /extraTools/vuldetect/utils/environments.py: -------------------------------------------------------------------------------- 1 | from mainTool.udg.astProvider import ASTProvider 2 | from typing import Set, Dict 3 | 4 | 5 | class VariableEnvironment(object): 6 | def __init__(self, astProvider: ASTProvider): 7 | self.astProvider: ASTProvider = astProvider 8 | # 自己处理的token 9 | self.handledSymbols: Set[str] = set() 10 | # 交由父节点处理的token 11 | self.upStreamSymbols: Set[str] = set() 12 | self.var2type: Dict[str, str] = dict() # 变量名映射为类型名 13 | self.funcNames: Set[str] = set() # 使用过的函数名 14 | 15 | # 处理子结点的symbol,默认自己解决子结点中的symbol 16 | def addChildSymbols(self, childSymbols: Set[str], child: ASTProvider): 17 | self.handledSymbols.update(childSymbols) 18 | 19 | # 交由父节点处理的symbol 20 | def upstreamSymbols(self) -> Set[str]: 21 | return self.upStreamSymbols 22 | 23 | # 自己处理的symbol 24 | def selfHandledSymbols(self) -> Set[str]: 25 | return self.handledSymbols 26 | 27 | # 函数调用环境 28 | class CallVarEnvironment(VariableEnvironment): 29 | def addChildSymbols(self, childSymbols: Set[str], child: ASTProvider): 30 | childNumber: int = child.getChildNumber() 31 | # 函数名不添加 32 | if childNumber != 0: 33 | # 参数中的变量名全都处理了 34 | self.handledSymbols.update(childSymbols) 35 | 36 | # 交由父节点处理的symbol 37 | def upstreamSymbols(self) -> Set[str]: 38 | return set() 39 | 40 | # 自己处理的symbol 41 | def selfHandledSymbols(self) -> Set[str]: 42 | return set() 43 | 44 | class CalleeEnvironment(VariableEnvironment): 45 | def addChildSymbols(self, childSymbols: Set[str], child: ASTProvider): 46 | self.funcNames.update(childSymbols) 47 | 48 | 49 | class ClassStaticIdentifierVarEnvironment(VariableEnvironment): 50 | # Identifier类型直接获取token作为symbol,并返回给父节点处理 51 | def upstreamSymbols(self) -> Set[str]: 52 | code: str = self.astProvider.getChild(1).getEscapedCodeStr() 53 | retval: Set[str] = { code } 54 | return retval 55 | 56 | def selfHandledSymbols(self) -> Set[str]: 57 | return set() 58 | 59 | class IdentifierVarEnvironment(VariableEnvironment): 60 | # Identifier类型直接获取token作为symbol,并返回给父节点处理 61 | def upstreamSymbols(self) -> Set[str]: 62 | code: str = self.astProvider.getEscapedCodeStr() 63 | retval: Set[str] = { code } 64 | return retval 65 | 66 | def selfHandledSymbols(self) -> Set[str]: 67 | return set() 68 | 69 | # 结构体成员访问 70 | # 这里需要注意的是会出现 struct1 -> inner1 这种,我会将struct1 和 struct1 -> inner1 添加到变量列表,但是inner1就不会了 71 | class MemberAccessVarEnvironment(VariableEnvironment): 72 | def upstreamSymbols(self) -> Set[str]: 73 | retval: Set[str] = { self.astProvider.getEscapedCodeStr() } 74 | return retval 75 | 76 | def addChildSymbols(self, childSymbols: Set[str], child: ASTProvider): 77 | # 结构体变量名添加到symbol中但是使用的成员变量名不添加 78 | childNum: int = child.getChildNumber() 79 | if childNum == 0: 80 | self.handledSymbols.update(childSymbols) 81 | 82 | 83 | 84 | # 变量定义 85 | class VarDeclEnvironment(VariableEnvironment): 86 | def __init__(self, astProvider: ASTProvider): 87 | super().__init__(astProvider) 88 | self.type: str = self.astProvider.getChild(0).getEscapedCodeStr() 89 | 90 | def addChildSymbols(self, childSymbols: Set[str], child: ASTProvider): 91 | # 结构体变量名添加到symbol中但是使用的成员变量名不添加 92 | childNum: int = child.getChildNumber() 93 | # 变量名 94 | if childNum == 1: 95 | for symbol in childSymbols: 96 | self.var2type[symbol] = self.type 97 | self.handledSymbols.update(childSymbols) -------------------------------------------------------------------------------- /extraTools/vuldetect/utils/sinkPoint.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.astNode import ASTNode 2 | from mainTool.ast.expressions.postfixExpressions import CallExpression 3 | from mainTool.ast.expressions.expressionHolders import Callee 4 | from mainTool.ast.expressions.expression import ArrayIndexing, UnaryOp 5 | from mainTool.ast.expressions.postfixExpressions import IncDecOp 6 | from mainTool.ast.expressions.binaryExpressions import BinaryExpression, AssignmentExpr 7 | from typing import Set 8 | 9 | # judging whether a statement could be a sink point, following SySeVR 10 | class SyVCPoint(object): 11 | def __init__(self, sensitive_apis: Set[str]): 12 | self.sensitive_apis: Set[str] = sensitive_apis 13 | 14 | def judgeSink(self, astNode: ASTNode): 15 | # Library/API Function Call 16 | if isinstance(astNode, Callee): 17 | if astNode.getEscapedCodeStr() in self.sensitive_apis: 18 | return True 19 | # Array Usage 20 | elif isinstance(astNode, ArrayIndexing): 21 | return True 22 | # Pointer Usage 23 | elif isinstance(astNode, UnaryOp): 24 | if astNode.operator == '*': 25 | return True 26 | # Arithmetic Expression 27 | elif isinstance(astNode, BinaryExpression): 28 | if astNode.operator in { '+', '-', '*', '/' }: 29 | return True 30 | 31 | flag = False 32 | for i in range(astNode.getChildCount()): 33 | flag = flag or self.judgeSink(astNode.getChild(i)) 34 | return flag 35 | 36 | 37 | class XFGPoint(SyVCPoint): 38 | def __init__(self, sensitive_apis: Set[str]): 39 | super(XFGPoint, self).__init__(sensitive_apis) 40 | 41 | def judgeSink(self, astNode: ASTNode): 42 | # Library/API Function Call 43 | if isinstance(astNode, Callee): 44 | if astNode.getEscapedCodeStr() in self.sensitive_apis: 45 | return True 46 | # Array Usage 47 | elif isinstance(astNode, ArrayIndexing): 48 | return True 49 | # Pointer Usage 50 | elif isinstance(astNode, UnaryOp): 51 | if astNode.operator == '*': 52 | return True 53 | # Arithmetic Expression 54 | elif isinstance(astNode, BinaryExpression): 55 | if astNode.operator in { '+', '-', '*', '/', '<<', '>>' }: 56 | return True 57 | # increament assignment 58 | elif isinstance(astNode, AssignmentExpr): 59 | if astNode.operator in { "+=", "-=", "*=", "/=", "%=", ">>=", "<<=" }: 60 | return True 61 | # # x++ / x-- / ++x / --x 62 | elif isinstance(astNode, IncDecOp): 63 | return True 64 | 65 | flag = False 66 | for i in range(astNode.getChildCount()): 67 | flag = flag or self.judgeSink(astNode.getChild(i)) 68 | return flag 69 | 70 | 71 | 72 | # judging whether a statement could be a function call, haven't consider nesting function call 73 | class CallExprTool(object): 74 | def __init__(self): 75 | self.functionName: str = None 76 | self.argNum: int = -1 77 | 78 | def judgeCall(self, astNode: ASTNode): 79 | # Library/API Function Call 80 | if isinstance(astNode, CallExpression): 81 | self.functionName = astNode.getChild(0).getEscapedCodeStr() 82 | self.argNum = astNode.argumentList.getChildCount() 83 | return 84 | for i in range(astNode.getChildCount()): 85 | self.judgeCall(astNode.getChild(i)) -------------------------------------------------------------------------------- /extraTools/vuldetect/utils/symbolized.py: -------------------------------------------------------------------------------- 1 | from extraTools.vuldetect.utils.environments import * 2 | from mainTool.CPG import * 3 | from typing import List, Set, Tuple 4 | 5 | 6 | 7 | # symbolized variable names and function names 8 | # however, here we don't actually symbolize them, 9 | # we only collect the var name and func name to be symbolized 10 | # and their corresponding symbolized name 11 | 12 | symbolic_var_prefix = "VAR" 13 | symbolic_func_prefix = "FUN" 14 | 15 | 16 | class ASTVarAnalyzer(object): 17 | def __init__(self): 18 | self.environmentStack: List[VariableEnvironment] = list() 19 | self.variables: Set[str] = None 20 | self.var2type: Dict[str, str] = dict() 21 | self.funcNames: Set[str] = set() 22 | 23 | def reset(self): 24 | self.environmentStack.clear() 25 | self.variables = set() 26 | self.funcNames = set() 27 | # self.var2type = dict() 28 | 29 | def analyzeAST(self, astProvider: ASTProvider): 30 | self.reset() 31 | self.traverseAST(astProvider) 32 | 33 | 34 | def traverseAST(self, astProvider: ASTProvider): 35 | env: VariableEnvironment = self.createVarEnvironment(astProvider) 36 | self.traverseASTChildren(astProvider, env) 37 | 38 | 39 | def traverseASTChildren(self, astProvider: ASTProvider, env: VariableEnvironment): 40 | numChildren: int = astProvider.getChildCount() 41 | self.environmentStack.append(env) 42 | for i in range(numChildren): 43 | childProvider: ASTProvider = astProvider.getChild(i) 44 | self.traverseAST(childProvider) 45 | self.environmentStack.pop() 46 | self.variables.update(env.selfHandledSymbols()) 47 | self.reportUpstream(env) 48 | self.var2type.update(env.var2type) 49 | self.funcNames.update(env.funcNames) 50 | 51 | 52 | def reportUpstream(self, env: VariableEnvironment): 53 | symbols: Set[str] = env.upstreamSymbols() 54 | astProvider: ASTProvider = env.astProvider 55 | if len(self.environmentStack) > 0: 56 | parentEnv: VariableEnvironment = self.environmentStack[-1] 57 | parentEnv.addChildSymbols(symbols, astProvider) 58 | 59 | 60 | 61 | def createVarEnvironment(self, astProvider: ASTProvider) -> VariableEnvironment: 62 | nodeType: str = astProvider.getTypeAsString() 63 | 64 | if nodeType == "IdentifierDecl" or nodeType == "Parameter": 65 | return VarDeclEnvironment(astProvider) 66 | elif nodeType == "CallExpression": 67 | return CallVarEnvironment(astProvider) 68 | elif nodeType == "ClassStaticIdentifier": 69 | return ClassStaticIdentifierVarEnvironment(astProvider) 70 | elif nodeType == "Identifier": 71 | return IdentifierVarEnvironment(astProvider) 72 | elif nodeType == "MemberAccess" or nodeType == "PtrMemberAccess": 73 | return MemberAccessVarEnvironment(astProvider) 74 | elif nodeType == "Callee": 75 | return CalleeEnvironment(astProvider) 76 | else: 77 | return VariableEnvironment(astProvider) 78 | 79 | 80 | def getVarFuncNamesInFunc(statements: List[ASTNode]) -> Tuple[Set[str], Set[str]]: 81 | astVarAnalyzer: ASTVarAnalyzer = ASTVarAnalyzer() 82 | varSets: Set[str] = set() 83 | funcSets: Set[str] = set() 84 | for statement in statements: 85 | provider: ASTNodeASTProvider = ASTNodeASTProvider() 86 | provider.node = statement 87 | astVarAnalyzer.analyzeAST(provider) 88 | varSets.update(astVarAnalyzer.variables) 89 | funcSets.update(astVarAnalyzer.funcNames) 90 | 91 | return varSets, funcSets 92 | 93 | 94 | class SymbolizingTool(object): 95 | def __init__(self, systemDefinedVars: Set[str], systemDefinedFuncs: Set[str]): 96 | self.systemDefinedVars: Set[str] = systemDefinedVars 97 | self.systemDefinedFuncs: Set[str] = systemDefinedFuncs 98 | self.var2symbol: Dict[str, str] = dict() 99 | self.func2symbol: Dict[str, str] = dict() 100 | 101 | # cpgs is all cpgs from functions of a program 102 | def getVarFuncNamesInFile(self, cpgs: List[CPG]): 103 | for cpg in cpgs: 104 | if cpg.name not in self.systemDefinedFuncs and cpg.name not in self.func2symbol.keys(): 105 | self.func2symbol[cpg.name] = symbolic_func_prefix + str(len(self.func2symbol) + 1) 106 | varSets, funcSets = getVarFuncNamesInFunc(cpg.statements) 107 | for var in varSets: 108 | if var not in self.var2symbol.keys() and var not in self.systemDefinedVars: 109 | self.var2symbol[var] = symbolic_var_prefix + str(len(self.var2symbol) + 1) 110 | for func in funcSets: 111 | if func not in self.systemDefinedFuncs and func not in self.func2symbol.keys(): 112 | self.func2symbol[func] = symbolic_func_prefix + str(len(self.func2symbol) + 1) 113 | 114 | 115 | 116 | def symbolize(self, code: str) -> str: 117 | tokens = code.split(' ') 118 | symbolized_tokens = [] 119 | for token in tokens: 120 | symVarName: str = self.var2symbol.get(token, None) 121 | symFuncName: str = self.func2symbol.get(token, None) 122 | if symVarName is not None: 123 | symbolized_tokens.append(symVarName) 124 | elif symFuncName is not None: 125 | symbolized_tokens.append(symFuncName) 126 | else: 127 | symbolized_tokens.append(token) 128 | return " ".join(symbolized_tokens) -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from mainTool.CPG import * 2 | import argparse 3 | import sys 4 | import os 5 | import json 6 | 7 | 8 | suffixs: Set[str] = {'c', 'cxx', 'c++', 'cpp', 'cc', 'cp', 'hxx', 'h', 'hpp'} 9 | 10 | def show_files(base_path, all_files: List[str]): 11 | """ 12 | 遍历当前目录所有py文件及文件夹 13 | :param path: 14 | :param all_files: 15 | """ 16 | file_list = os.listdir(base_path) 17 | # 准备循环判断每个元素是否是文件夹还是文件,是文件的话,把名称传入list,是文件夹的话,递归 18 | for file in file_list: 19 | # 利用os.path.join()方法取得路径全名,并存入cur_path变量,否则每次只能遍历一层目录 20 | cur_path = os.path.join(base_path, file) 21 | # 判断是否是文件夹 22 | if os.path.isdir(cur_path): 23 | show_files(cur_path, all_files) 24 | else: 25 | suffix = file.split('.')[-1] 26 | if suffix in suffixs: 27 | all_files.append(cur_path) 28 | 29 | def _argparse(): 30 | parser = argparse.ArgumentParser(description='user guide for CppCodeAnalyzer') 31 | parser.add_argument('-f', '--file', required=False, type=str, 32 | help='specify c file to be parsed') 33 | parser.add_argument('-d', '--dir', required=False, type=str, 34 | help='specify dir which contains source files to be parsed') 35 | parser.add_argument('-c', '--calleeInfos', required=False, type=str, 36 | help='specify dir which contains source files to be parsed', 37 | default="resources/calleeInfos.json") 38 | parser.add_argument('-o', '--output', required=False, type=str, help='specify dir which store parsing results') 39 | return parser.parse_args() 40 | 41 | if __name__ == '__main__': 42 | parser = _argparse() # main这里引入命令行参数函数 43 | 44 | # 参数解析过程 45 | if parser.file is None and parser.dir is None: 46 | sys.stderr.write("please specify file or dir to parse") 47 | exit(-1) 48 | elif parser.file is not None and parser.dir is not None: 49 | sys.stderr.write("please do not specify file and dir in the same time") 50 | exit(-1) 51 | 52 | potential_output_dir = "" # 默认解析结果存放的文件夹位置 53 | file2parse: List[str] = list() 54 | # 如果指定检测一个文件,那么默认解析结果存放在输入文件同一个文件夹下 55 | if parser.file is not None: 56 | parts: List[str] = parser.file.split(os.path.sep) 57 | potential_output_dir = os.path.sep.join(parts[:-1]) 58 | file2parse.append(parser.file) 59 | # 指定检测文件夹,那么解析结果存放在该文件夹下 60 | elif parser.dir is not None: 61 | potential_output_dir = parser.dir 62 | show_files(potential_output_dir, file2parse) 63 | 64 | if len(file2parse) == 0: 65 | sys.stderr.write("the dir must at least contains one c file") 66 | exit(-1) 67 | 68 | output_dir = "" 69 | # 没有设定解析结果存放位置的话就用默认值 70 | if parser.output is not None: 71 | output_dir = parser.output 72 | else: 73 | output_dir = potential_output_dir 74 | 75 | # 加载callee信息 76 | calleeInfs: Dict[str, Dict] = json.load(open(parser.calleeInfos, 'r', encoding='utf-8')) 77 | calleeInfos: CalleeInfos = initialCalleeInfos(calleeInfs) 78 | 79 | converter: CFGToUDGConverter = CFGToUDGConverter() 80 | astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer() 81 | astAnalyzer.calleeInfos = calleeInfos 82 | converter.astAnalyzer = astAnalyzer 83 | defUseConverter: CFGAndUDGToDefUseCFG = CFGAndUDGToDefUseCFG() 84 | ddgCreator: DDGCreator = DDGCreator() 85 | 86 | # start analysing 87 | result_cpgs: List[CPG] = list() 88 | successful_count = 0 89 | 90 | for i, file_name in enumerate(file2parse): 91 | print(f"{i} / {len(file2parse)} -- {file_name}") 92 | # ddgCreator.clear() 93 | try: 94 | cpgs: List[CPG] = fileParse(file_name, converter, defUseConverter, ddgCreator) 95 | result_cpgs.extend(cpgs) 96 | successful_count += 1 97 | except Exception as e: 98 | print(f"syntax error might appear in {file_name}") 99 | 100 | output_file = output_dir + "result.json" 101 | json_cpgs: List[Dict] = [cpg.toSerializedJson() for cpg in result_cpgs] 102 | json.dump(json_cpgs, open(output_file, 'w', encoding='utf-8'), indent=2) 103 | -------------------------------------------------------------------------------- /mainTool/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/__init__.py -------------------------------------------------------------------------------- /mainTool/antlr/CPP14.tokens: -------------------------------------------------------------------------------- 1 | MultiLineMacro=1 2 | Directive=2 3 | Alignas=3 4 | Alignof=4 5 | Asm=5 6 | Auto=6 7 | Bool=7 8 | Break=8 9 | Case=9 10 | Catch=10 11 | Char=11 12 | Char16=12 13 | Char32=13 14 | Class=14 15 | Const=15 16 | Constexpr=16 17 | Const_cast=17 18 | Continue=18 19 | Decltype=19 20 | Default=20 21 | Delete=21 22 | Do=22 23 | Double=23 24 | Dynamic_cast=24 25 | Else=25 26 | Enum=26 27 | Explicit=27 28 | Export=28 29 | Extern=29 30 | FalseToken=30 31 | Final=31 32 | Float=32 33 | For=33 34 | Friend=34 35 | Goto=35 36 | If=36 37 | Inline=37 38 | Int=38 39 | Long=39 40 | Mutable=40 41 | Namespace=41 42 | New=42 43 | Noexcept=43 44 | Nullptr=44 45 | Operator=45 46 | Override=46 47 | Private=47 48 | Protected=48 49 | Public=49 50 | Register=50 51 | Reinterpret_cast=51 52 | Return=52 53 | Short=53 54 | Signed=54 55 | Sizeof=55 56 | Static=56 57 | Static_assert=57 58 | Static_cast=58 59 | Struct=59 60 | Switch=60 61 | Template=61 62 | This=62 63 | Thread_local=63 64 | Throw=64 65 | TrueToken=65 66 | Try=66 67 | Typedef=67 68 | Typeid=68 69 | Typename=69 70 | Union=70 71 | Unsigned=71 72 | Using=72 73 | Virtual=73 74 | Void=74 75 | Volatile=75 76 | Wchar=76 77 | While=77 78 | LeftParen=78 79 | RightParen=79 80 | LeftBracket=80 81 | RightBracket=81 82 | LeftBrace=82 83 | RightBrace=83 84 | Plus=84 85 | Minus=85 86 | Star=86 87 | Div=87 88 | Mod=88 89 | Caret=89 90 | And=90 91 | Or=91 92 | Tilde=92 93 | Not=93 94 | Assign=94 95 | Less=95 96 | Greater=96 97 | PlusAssign=97 98 | MinusAssign=98 99 | StarAssign=99 100 | DivAssign=100 101 | ModAssign=101 102 | XorAssign=102 103 | AndAssign=103 104 | OrAssign=104 105 | LeftShift=105 106 | LeftShiftAssign=106 107 | Equal=107 108 | NotEqual=108 109 | LessEqual=109 110 | GreaterEqual=110 111 | AndAnd=111 112 | OrOr=112 113 | PlusPlus=113 114 | MinusMinus=114 115 | Comma=115 116 | ArrowStar=116 117 | Arrow=117 118 | Question=118 119 | Colon=119 120 | Doublecolon=120 121 | Semi=121 122 | Dot=122 123 | DotStar=123 124 | Ellipsis=124 125 | Identifier=125 126 | Integerliteral=126 127 | Decimalliteral=127 128 | Octalliteral=128 129 | Hexadecimalliteral=129 130 | Binaryliteral=130 131 | Integersuffix=131 132 | Characterliteral=132 133 | Floatingliteral=133 134 | Stringliteral=134 135 | Userdefinedintegerliteral=135 136 | Userdefinedfloatingliteral=136 137 | Userdefinedstringliteral=137 138 | Userdefinedcharacterliteral=138 139 | Whitespace=139 140 | Newline=140 141 | BlockComment=141 142 | LineComment=142 143 | 'alignas'=3 144 | 'alignof'=4 145 | 'asm'=5 146 | 'auto'=6 147 | 'bool'=7 148 | 'break'=8 149 | 'case'=9 150 | 'catch'=10 151 | 'char'=11 152 | 'char16_t'=12 153 | 'char32_t'=13 154 | 'class'=14 155 | 'const'=15 156 | 'constexpr'=16 157 | 'const_cast'=17 158 | 'continue'=18 159 | 'decltype'=19 160 | 'default'=20 161 | 'delete'=21 162 | 'do'=22 163 | 'double'=23 164 | 'dynamic_cast'=24 165 | 'else'=25 166 | 'enum'=26 167 | 'explicit'=27 168 | 'export'=28 169 | 'extern'=29 170 | 'false'=30 171 | 'final'=31 172 | 'float'=32 173 | 'for'=33 174 | 'friend'=34 175 | 'goto'=35 176 | 'if'=36 177 | 'inline'=37 178 | 'int'=38 179 | 'long'=39 180 | 'mutable'=40 181 | 'namespace'=41 182 | 'new'=42 183 | 'noexcept'=43 184 | 'operator'=45 185 | 'override'=46 186 | 'private'=47 187 | 'protected'=48 188 | 'public'=49 189 | 'register'=50 190 | 'reinterpret_cast'=51 191 | 'return'=52 192 | 'short'=53 193 | 'signed'=54 194 | 'sizeof'=55 195 | 'static'=56 196 | 'static_assert'=57 197 | 'static_cast'=58 198 | 'struct'=59 199 | 'switch'=60 200 | 'template'=61 201 | 'this'=62 202 | 'thread_local'=63 203 | 'throw'=64 204 | 'true'=65 205 | 'try'=66 206 | 'typedef'=67 207 | 'typeid'=68 208 | 'typename'=69 209 | 'union'=70 210 | 'unsigned'=71 211 | 'using'=72 212 | 'virtual'=73 213 | 'void'=74 214 | 'volatile'=75 215 | 'wchar_t'=76 216 | 'while'=77 217 | '('=78 218 | ')'=79 219 | '['=80 220 | ']'=81 221 | '{'=82 222 | '}'=83 223 | '+'=84 224 | '-'=85 225 | '*'=86 226 | '/'=87 227 | '%'=88 228 | '^'=89 229 | '&'=90 230 | '|'=91 231 | '~'=92 232 | '!'=93 233 | '='=94 234 | '<'=95 235 | '>'=96 236 | '+='=97 237 | '-='=98 238 | '*='=99 239 | '/='=100 240 | '%='=101 241 | '^='=102 242 | '&='=103 243 | '|='=104 244 | '<<'=105 245 | '<<='=106 246 | '=='=107 247 | '!='=108 248 | '<='=109 249 | '>='=110 250 | '&&'=111 251 | '||'=112 252 | '++'=113 253 | '--'=114 254 | ','=115 255 | '->*'=116 256 | '->'=117 257 | '?'=118 258 | ':'=119 259 | '::'=120 260 | ';'=121 261 | '.'=122 262 | '.*'=123 263 | '...'=124 264 | -------------------------------------------------------------------------------- /mainTool/antlr/CPP14Lexer.tokens: -------------------------------------------------------------------------------- 1 | MultiLineMacro=1 2 | Directive=2 3 | Alignas=3 4 | Alignof=4 5 | Asm=5 6 | Auto=6 7 | Bool=7 8 | Break=8 9 | Case=9 10 | Catch=10 11 | Char=11 12 | Char16=12 13 | Char32=13 14 | Class=14 15 | Const=15 16 | Constexpr=16 17 | Const_cast=17 18 | Continue=18 19 | Decltype=19 20 | Default=20 21 | Delete=21 22 | Do=22 23 | Double=23 24 | Dynamic_cast=24 25 | Else=25 26 | Enum=26 27 | Explicit=27 28 | Export=28 29 | Extern=29 30 | FalseToken=30 31 | Final=31 32 | Float=32 33 | For=33 34 | Friend=34 35 | Goto=35 36 | If=36 37 | Inline=37 38 | Int=38 39 | Long=39 40 | Mutable=40 41 | Namespace=41 42 | New=42 43 | Noexcept=43 44 | Nullptr=44 45 | Operator=45 46 | Override=46 47 | Private=47 48 | Protected=48 49 | Public=49 50 | Register=50 51 | Reinterpret_cast=51 52 | Return=52 53 | Short=53 54 | Signed=54 55 | Sizeof=55 56 | Static=56 57 | Static_assert=57 58 | Static_cast=58 59 | Struct=59 60 | Switch=60 61 | Template=61 62 | This=62 63 | Thread_local=63 64 | Throw=64 65 | TrueToken=65 66 | Try=66 67 | Typedef=67 68 | Typeid=68 69 | Typename=69 70 | Union=70 71 | Unsigned=71 72 | Using=72 73 | Virtual=73 74 | Void=74 75 | Volatile=75 76 | Wchar=76 77 | While=77 78 | LeftParen=78 79 | RightParen=79 80 | LeftBracket=80 81 | RightBracket=81 82 | LeftBrace=82 83 | RightBrace=83 84 | Plus=84 85 | Minus=85 86 | Star=86 87 | Div=87 88 | Mod=88 89 | Caret=89 90 | And=90 91 | Or=91 92 | Tilde=92 93 | Not=93 94 | Assign=94 95 | Less=95 96 | Greater=96 97 | PlusAssign=97 98 | MinusAssign=98 99 | StarAssign=99 100 | DivAssign=100 101 | ModAssign=101 102 | XorAssign=102 103 | AndAssign=103 104 | OrAssign=104 105 | LeftShift=105 106 | LeftShiftAssign=106 107 | Equal=107 108 | NotEqual=108 109 | LessEqual=109 110 | GreaterEqual=110 111 | AndAnd=111 112 | OrOr=112 113 | PlusPlus=113 114 | MinusMinus=114 115 | Comma=115 116 | ArrowStar=116 117 | Arrow=117 118 | Question=118 119 | Colon=119 120 | Doublecolon=120 121 | Semi=121 122 | Dot=122 123 | DotStar=123 124 | Ellipsis=124 125 | Identifier=125 126 | Integerliteral=126 127 | Decimalliteral=127 128 | Octalliteral=128 129 | Hexadecimalliteral=129 130 | Binaryliteral=130 131 | Integersuffix=131 132 | Characterliteral=132 133 | Floatingliteral=133 134 | Stringliteral=134 135 | Userdefinedintegerliteral=135 136 | Userdefinedfloatingliteral=136 137 | Userdefinedstringliteral=137 138 | Userdefinedcharacterliteral=138 139 | Whitespace=139 140 | Newline=140 141 | BlockComment=141 142 | LineComment=142 143 | 'alignas'=3 144 | 'alignof'=4 145 | 'asm'=5 146 | 'auto'=6 147 | 'bool'=7 148 | 'break'=8 149 | 'case'=9 150 | 'catch'=10 151 | 'char'=11 152 | 'char16_t'=12 153 | 'char32_t'=13 154 | 'class'=14 155 | 'const'=15 156 | 'constexpr'=16 157 | 'const_cast'=17 158 | 'continue'=18 159 | 'decltype'=19 160 | 'default'=20 161 | 'delete'=21 162 | 'do'=22 163 | 'double'=23 164 | 'dynamic_cast'=24 165 | 'else'=25 166 | 'enum'=26 167 | 'explicit'=27 168 | 'export'=28 169 | 'extern'=29 170 | 'false'=30 171 | 'final'=31 172 | 'float'=32 173 | 'for'=33 174 | 'friend'=34 175 | 'goto'=35 176 | 'if'=36 177 | 'inline'=37 178 | 'int'=38 179 | 'long'=39 180 | 'mutable'=40 181 | 'namespace'=41 182 | 'new'=42 183 | 'noexcept'=43 184 | 'operator'=45 185 | 'override'=46 186 | 'private'=47 187 | 'protected'=48 188 | 'public'=49 189 | 'register'=50 190 | 'reinterpret_cast'=51 191 | 'return'=52 192 | 'short'=53 193 | 'signed'=54 194 | 'sizeof'=55 195 | 'static'=56 196 | 'static_assert'=57 197 | 'static_cast'=58 198 | 'struct'=59 199 | 'switch'=60 200 | 'template'=61 201 | 'this'=62 202 | 'thread_local'=63 203 | 'throw'=64 204 | 'true'=65 205 | 'try'=66 206 | 'typedef'=67 207 | 'typeid'=68 208 | 'typename'=69 209 | 'union'=70 210 | 'unsigned'=71 211 | 'using'=72 212 | 'virtual'=73 213 | 'void'=74 214 | 'volatile'=75 215 | 'wchar_t'=76 216 | 'while'=77 217 | '('=78 218 | ')'=79 219 | '['=80 220 | ']'=81 221 | '{'=82 222 | '}'=83 223 | '+'=84 224 | '-'=85 225 | '*'=86 226 | '/'=87 227 | '%'=88 228 | '^'=89 229 | '&'=90 230 | '|'=91 231 | '~'=92 232 | '!'=93 233 | '='=94 234 | '<'=95 235 | '>'=96 236 | '+='=97 237 | '-='=98 238 | '*='=99 239 | '/='=100 240 | '%='=101 241 | '^='=102 242 | '&='=103 243 | '|='=104 244 | '<<'=105 245 | '<<='=106 246 | '=='=107 247 | '!='=108 248 | '<='=109 249 | '>='=110 250 | '&&'=111 251 | '||'=112 252 | '++'=113 253 | '--'=114 254 | ','=115 255 | '->*'=116 256 | '->'=117 257 | '?'=118 258 | ':'=119 259 | '::'=120 260 | ';'=121 261 | '.'=122 262 | '.*'=123 263 | '...'=124 264 | -------------------------------------------------------------------------------- /mainTool/antlr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/antlr/__init__.py -------------------------------------------------------------------------------- /mainTool/ast/ParsingUtils.py: -------------------------------------------------------------------------------- 1 | from antlr4.ParserRuleContext import ParserRuleContext 2 | 3 | # from ast.expressions.expressionHolders import 4 | 5 | def childTokenString(ctx: ParserRuleContext) -> str: 6 | if ctx is None: 7 | return "" 8 | nChildren: int = ctx.getChildCount() 9 | 10 | if nChildren == 0: 11 | return ctx.getText() 12 | 13 | retval: str = "" 14 | 15 | for i in range(nChildren): 16 | child: ParserRuleContext = ctx.getChild(i) 17 | childText: str = childTokenString(child) 18 | 19 | if childText != "": 20 | retval = retval + childText + " " 21 | 22 | return retval.strip() -------------------------------------------------------------------------------- /mainTool/ast/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/ast/__init__.py -------------------------------------------------------------------------------- /mainTool/ast/astNode.py: -------------------------------------------------------------------------------- 1 | from antlr4 import ParserRuleContext 2 | from typing import List 3 | 4 | from mainTool.ast.ParsingUtils import childTokenString 5 | from mainTool.ast.walking.visitor import ASTNodeVisitor 6 | 7 | 8 | NOT_SET = -1 9 | 10 | class CodeLocation(object): 11 | def __init__(self, context: ParserRuleContext = None): 12 | self.startLine: int = NOT_SET 13 | self.startPos: int = NOT_SET 14 | self.startIndex: int = NOT_SET 15 | self.stopIndex: int = NOT_SET 16 | 17 | if context is not None: 18 | self.initializeFromContext(context) 19 | 20 | def initializeFromContext(self, context: ParserRuleContext = None): 21 | self.startLine = context.start.line 22 | self.startPos = context.start.column 23 | self.startIndex = context.start.tokenIndex 24 | self.stopIndex = context.stop.tokenIndex 25 | 26 | def __str__(self): 27 | return f"{self.startLine}:{self.startPos}:{self.startIndex}:{self.stopIndex}" 28 | 29 | def __cmp__(self, other): 30 | if self.startLine < other.startLine: 31 | return -1 32 | elif self.startLine > other.startLine: 33 | return 1 34 | 35 | if self.startPos < other.startPos: 36 | return -1 37 | else: 38 | return 1 39 | 40 | def __lt__(self, other): 41 | return self.startLine < other.startLine or (self.startLine == other.startLine and 42 | self.startPos < other.startPos) 43 | 44 | def __gt__(self, other): 45 | return self.startLine > other.startLine or (self.startLine == other.startLine and 46 | self.startPos > other.startPos) 47 | 48 | 49 | 50 | class ASTNode(object): 51 | def __init__(self): 52 | self.codeStr: str = None # 该node对应的代码文本 53 | self.parseTreeNodeContext: ParserRuleContext = None 54 | self.location: CodeLocation = None 55 | self.isInCFG: bool = False # 属于CFG node还是纯粹是AST node 56 | 57 | self.children: List[ASTNode] = list() 58 | self.childNumber: int = NOT_SET 59 | 60 | def addChild(self, node): 61 | node.childNumber = len(self.children) 62 | self.children.append(node) 63 | 64 | def getChildCount(self) -> int: 65 | return len(self.children) 66 | 67 | def getChild(self, i: int): 68 | if i < 0 or i >= len(self.children): 69 | raise RuntimeError("index out of bound") 70 | return self.children[i] 71 | 72 | def initializeFromContext(self, ctx: ParserRuleContext): 73 | self.parseTreeNodeContext = ctx 74 | self.setLocation(ctx) 75 | self.codeStr = self.escapeCodeStr(childTokenString(self.parseTreeNodeContext)) 76 | 77 | def setLocation(self, ctx: ParserRuleContext): 78 | self.location = CodeLocation(ctx) 79 | 80 | def getEscapedCodeStr(self) -> str: 81 | if self.codeStr is not None: 82 | return self.codeStr 83 | self.codeStr = self.escapeCodeStr(childTokenString(self.parseTreeNodeContext)) 84 | return self.codeStr 85 | 86 | def escapeCodeStr(self, codeStr) -> str: 87 | retval = codeStr 88 | retval = retval.replace("\n", "\\n") 89 | retval = retval.replace("\t", "\\t") 90 | return retval 91 | 92 | def getLocationString(self) -> str: 93 | # self.setLocation(self.parseTreeNodeContext) 94 | return str(self.location) 95 | 96 | def markAsCFGNode(self): 97 | self.isInCFG = True 98 | 99 | def isLeaf(self) -> bool: 100 | return len(self.children) == 0 101 | 102 | def getTypeAsString(self): 103 | return type(self).__name__ 104 | 105 | def accept(self, visitor: ASTNodeVisitor): 106 | visitor.visit(self) 107 | 108 | def __str__(self): 109 | return self.getEscapedCodeStr() 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | if __name__ == '__main__': 118 | location1 = CodeLocation() 119 | location1.startLine = 1 120 | location1.startPos = 2 121 | 122 | location2 = CodeLocation() 123 | location2.startLine = 2 124 | location2.startPos = 3 125 | 126 | print(location1 < location2) -------------------------------------------------------------------------------- /mainTool/ast/declarations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/ast/declarations/__init__.py -------------------------------------------------------------------------------- /mainTool/ast/declarations/complexDecls.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.statements.statements import Statement, CompoundStatement 2 | from mainTool.ast.expressions.primaryExpressions import PrimaryExpression 3 | from mainTool.ast.expressions.expression import Identifier 4 | from mainTool.ast.astNode import ASTNode 5 | from mainTool.ast.walking.visitor import ASTNodeVisitor 6 | 7 | from typing import List 8 | from mainTool.utils.types import ClassType 9 | 10 | # 包括函数和类的定义 11 | 12 | # 函数参数类型 13 | class ParameterType(ASTNode): 14 | def __init__(self): 15 | super(ParameterType, self).__init__() 16 | self.baseType: str = None 17 | self.completeType: str = None 18 | 19 | def getEscapedCodeStr(self) -> str: 20 | if self.codeStr is not None: 21 | return self.codeStr 22 | return self.completeType 23 | 24 | # 函数返回类型 25 | class ReturnType(ASTNode): 26 | def __init__(self): 27 | super(ReturnType, self).__init__() 28 | self.baseType: str = None 29 | self.completeType: str = None 30 | 31 | def getEscapedCodeStr(self) -> str: 32 | return self.completeType 33 | 34 | # 函数定义形参 35 | class Parameter(ASTNode): 36 | def __init__(self): 37 | super(Parameter, self).__init__() 38 | self.type: ParameterType = None 39 | self.name: Identifier = None 40 | self.defaultValue: PrimaryExpression = None 41 | 42 | def setName(self, name: Identifier): 43 | self.name = name 44 | super().addChild(name) 45 | 46 | def setType(self, type: ParameterType): 47 | self.type = type 48 | super().addChild(type) 49 | 50 | def setDefaultValue(self, value: PrimaryExpression): 51 | self.defaultValue = value 52 | super().addChild(value) 53 | 54 | def addChild(self, node): 55 | if isinstance(node, Identifier): 56 | self.setName(node) 57 | elif isinstance(node, ParameterType): 58 | self.setType(node) 59 | elif isinstance(node, PrimaryExpression): 60 | self.setDefaultValue(node) 61 | else: 62 | super().addChild(node) 63 | 64 | def accept(self, visitor: ASTNodeVisitor): 65 | visitor.visit(self) 66 | 67 | 68 | # 参数列表 69 | class ParameterList(ASTNode): 70 | def __init__(self): 71 | super(ParameterList, self).__init__() 72 | self.parameters: List[Parameter] = list() 73 | 74 | def addParameter(self, param: Parameter): 75 | self.parameters.append(param) 76 | super().addChild(param) 77 | 78 | def addChild(self, node): 79 | if isinstance(node, Parameter): 80 | self.addParameter(node) 81 | 82 | # 将所有参数名加载到一个string里面 83 | def getEscapedCodeStr(self) -> str: 84 | if self.codeStr is not None: 85 | return self.codeStr 86 | 87 | if len(self.parameters) == 0: 88 | self.codeStr = "" 89 | return self.codeStr 90 | 91 | s = "" 92 | for param in self.parameters: 93 | s += param.getEscapedCodeStr() + " , " 94 | 95 | s = s.encode('utf-8')[:-3].decode('utf-8') 96 | self.codeStr = s 97 | return s 98 | 99 | def accept(self, visitor: ASTNodeVisitor): 100 | visitor.visit(self) 101 | 102 | 103 | class FunctionDef(ASTNode): 104 | def __init__(self): 105 | super(FunctionDef, self).__init__() 106 | self.name: Identifier = None # 函数名 107 | self.parameterList: ParameterList = None # 形参列表 108 | self.returnType: ReturnType = None # 返回类型 109 | self.content: CompoundStatement = None # 函数body 110 | 111 | def replaceName(self, name: Identifier): 112 | i = self.children.index(self.name) 113 | self.children[i] = name 114 | self.name = name 115 | 116 | def setContent(self, functionContent: CompoundStatement): 117 | self.content = functionContent 118 | super().addChild(functionContent) 119 | 120 | def setParameterList(self, parameterList: ParameterList): 121 | self.parameterList = parameterList 122 | super().addChild(parameterList) 123 | 124 | def setName(self, name: Identifier): 125 | self.name = name 126 | super().addChild(name) 127 | 128 | def setReturnType(self, returnType: ReturnType): 129 | self.returnType = returnType 130 | super().addChild(returnType) 131 | 132 | def addChild(self, node): 133 | if isinstance(node, CompoundStatement): 134 | self.setContent(node) 135 | elif isinstance(node, ParameterList): 136 | self.setParameterList(node) 137 | elif isinstance(node, ReturnType): 138 | self.setReturnType(node) 139 | elif isinstance(node, Identifier): 140 | self.setName(node) 141 | else: 142 | super().addChild(node) 143 | 144 | class ClassDefStatement(Statement): 145 | def __init__(self): 146 | super(ClassDefStatement, self).__init__() 147 | self.name: Identifier = None 148 | self.functionDefs: List[FunctionDef] = list() 149 | self.type: ClassType = None 150 | 151 | def addChild(self, node): 152 | if isinstance(node, Identifier): 153 | self.name = node 154 | super().addChild(node) -------------------------------------------------------------------------------- /mainTool/ast/declarations/simpleDecls.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.expressions.expression import Expression, Identifier 2 | from mainTool.ast.astNode import ASTNode 3 | 4 | class IdentifierDeclType(ASTNode): 5 | def __init__(self): 6 | super(IdentifierDeclType, self).__init__() 7 | self.baseType: str = None 8 | self.completeType: str = None 9 | 10 | def getEscapedCodeStr(self) -> str: 11 | if self.codeStr is not None: 12 | return self.codeStr 13 | return self.completeType 14 | 15 | 16 | class IdentifierDecl(ASTNode): 17 | def __init__(self): 18 | super(IdentifierDecl, self).__init__() 19 | self.type: IdentifierDeclType = None 20 | self.name: Identifier = None 21 | 22 | def setName(self, name: Identifier): 23 | self.name = name 24 | super().addChild(name) 25 | 26 | def setType(self, type: IdentifierDeclType): 27 | self.type = type 28 | super().addChild(type) 29 | 30 | def addChild(self, node): 31 | if isinstance(node, Identifier): 32 | self.setName(node) 33 | elif isinstance(node, IdentifierDeclType): 34 | self.setType(node) 35 | else: 36 | super().addChild(node) 37 | 38 | def getEscapedCodeStr(self) -> str: 39 | if self.codeStr is not None: 40 | return self.codeStr 41 | self.codeStr = ' '.join( [self.getChild(i).getEscapedCodeStr() for i in range(self.getChildCount())]) 42 | 43 | # def getChildCount(self) -> int: 44 | # childCount: int = 0 45 | # if self.type is not None: 46 | # childCount += 1 47 | # if self.name is not None: 48 | # childCount += 1 49 | # return childCount 50 | # 51 | # def getChild(self, i: int): 52 | # if i == 0: 53 | # return self.type 54 | # elif i == 1: 55 | # return self.name 56 | # else: 57 | # return self.children[i - 1] 58 | 59 | 60 | # for init 表达式 61 | class ForInit(Expression): 62 | pass 63 | 64 | class ForRangeInit(IdentifierDecl): 65 | def __init__(self): 66 | super(ForRangeInit, self).__init__() 67 | # 要遍历的数组表达式 68 | self.arrayExpr: Expression = None 69 | 70 | def setArrayExpr(self, arrayExpr: Expression): 71 | self.arrayExpr = arrayExpr 72 | arrayExpr.childNumber = self.getChildCount() 73 | self.children.append(arrayExpr) 74 | 75 | def setType(self, type: IdentifierDeclType): 76 | super().setType(type) 77 | self.type.childNumber = 0 78 | self.name.childNumber = 1 79 | 80 | def addChild(self, node): 81 | # Type var1: var2 82 | if isinstance(node, Identifier): 83 | # var1 84 | if self.name is None: 85 | self.setName(node) 86 | # var2 87 | else: 88 | self.setArrayExpr(node) 89 | 90 | elif isinstance(node, Expression): 91 | self.setArrayExpr(node) 92 | 93 | elif isinstance(node, IdentifierDeclType): 94 | self.setType(node) 95 | 96 | def getEscapedCodeStr(self) -> str: 97 | return self.type.getEscapedCodeStr() + " " + self.name.getEscapedCodeStr() + " : " + self.arrayExpr.getEscapedCodeStr() 98 | 99 | -------------------------------------------------------------------------------- /mainTool/ast/expressions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/ast/expressions/__init__.py -------------------------------------------------------------------------------- /mainTool/ast/expressions/binaryExpressions.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.expressions.expression import Expression 2 | from mainTool.ast.astNode import ASTNode 3 | from antlr4.ParserRuleContext import ParserRuleContext 4 | from typing import List 5 | 6 | 7 | # 二元运算表达式 8 | class BinaryExpression(Expression): 9 | def __init__(self): 10 | super(Expression, self).__init__() 11 | self.flag: bool = True 12 | self.subExpressions: List[Expression] = [None, None] 13 | 14 | def getLeft(self) -> Expression: 15 | return self.subExpressions[0] 16 | 17 | def getRight(self) -> Expression: 18 | return self.subExpressions[1] 19 | 20 | def setLeft(self, left: Expression): 21 | self.subExpressions[0] = left 22 | 23 | def setRight(self, right: Expression): 24 | self.subExpressions[1] = right 25 | 26 | def addChild(self, item: ASTNode): 27 | if not isinstance(item, Expression): 28 | raise RuntimeError("Error: child of BinaryExpression should be Expression") 29 | if self.getLeft() is None: 30 | self.setLeft(item) 31 | elif self.getRight() is None: 32 | self.setRight(item) 33 | else: 34 | raise RuntimeError("Error: attempting to add third child to binary expression") 35 | super().addChild(item) 36 | 37 | def getChildCount(self) -> int: 38 | childCount: int = 0 39 | if self.getLeft() is not None: 40 | childCount += 1 41 | if self.getRight() is not None: 42 | childCount += 1 43 | return childCount 44 | 45 | def getChild(self, i: int): 46 | return self.subExpressions[i] 47 | 48 | def initializeFromContext(self, ctx: ParserRuleContext): 49 | super().initializeFromContext(ctx) 50 | if ctx.getChildCount() == 3 and self.flag: 51 | self.operator = ctx.getChild(1).getText() 52 | 53 | 54 | # 加减运算,op 包括 +, -。2个AST子结点 55 | class AdditiveExpression(BinaryExpression): 56 | pass 57 | 58 | # 与运算,op 包括 && 59 | class AndExpression(BinaryExpression): 60 | pass 61 | 62 | # 赋值运算,op 包括 =, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |= 63 | # c语言中规定 赋值运算符的左边必须变量,不能是常量。 64 | class AssignmentExpr(BinaryExpression): 65 | pass 66 | 67 | # 逻辑与运算,op 包括 & 68 | class BitAndExpression(BinaryExpression): 69 | pass 70 | 71 | # 等于判断,op 包括 ==, != 72 | class EqualityExpression(BinaryExpression): 73 | pass 74 | 75 | # 逻辑异或运算,op 包括 ^ 76 | class ExclusiveOrExpression(BinaryExpression): 77 | pass 78 | 79 | # 或运算,op 包括 || 80 | class InclusiveOrExpression(BinaryExpression): 81 | pass 82 | 83 | # 乘除模运算,op 包括 *, /, % 84 | class MultiplicativeExpression(BinaryExpression): 85 | pass 86 | 87 | # 或运算,op 包括 || 88 | class OrExpression(BinaryExpression): 89 | pass 90 | 91 | # 大于小于判断,op 包括 <, >, <=, >= 92 | class RelationalExpression(BinaryExpression): 93 | pass 94 | 95 | # 移位运算,op 包括 <<, >> 96 | class ShiftExpression(BinaryExpression): 97 | pass -------------------------------------------------------------------------------- /mainTool/ast/expressions/expression.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.astNode import ASTNode 2 | from mainTool.ast.walking.visitor import ASTNodeVisitor 3 | 4 | class Expression(ASTNode): 5 | def __init__(self): 6 | super(Expression, self).__init__() 7 | self.operator: str = "" 8 | 9 | def replaceFirstChild(self, node: ASTNode): 10 | self.children[0] = node 11 | 12 | # 定义的标识符,主要是变量名,类名,函数名等等 13 | class Identifier(Expression): 14 | def accept(self, visitor: ASTNodeVisitor): 15 | visitor.visit(self) 16 | 17 | def copy(self): 18 | identifier: Identifier = Identifier() 19 | identifier.codeStr = self.codeStr 20 | identifier.location = self.location 21 | return identifier 22 | 23 | # 类静态变量等等,比如int testClass::var = 1; 24 | class ClassStaticIdentifier(Identifier): 25 | def __init__(self): 26 | super(ClassStaticIdentifier, self).__init__() 27 | self.className: Identifier = None 28 | self.varName: Identifier = None 29 | 30 | def addChild(self, node: ASTNode): 31 | if isinstance(node, Identifier): 32 | if self.className is None: 33 | self.className = node 34 | else: 35 | self.varName = node 36 | super().addChild(node) 37 | 38 | def copy(self): 39 | classIdentifier: ClassStaticIdentifier = ClassStaticIdentifier() 40 | className: Identifier = self.className.copy() 41 | varName: Identifier = self.varName.copy() 42 | classIdentifier.addChild(className) 43 | classIdentifier.addChild(varName) 44 | classIdentifier.location = self.location 45 | return classIdentifier 46 | 47 | def getEscapedCodeStr(self) -> str: 48 | return self.className.getEscapedCodeStr() + "::" + self.varName.getEscapedCodeStr() 49 | 50 | # 数组访问, Expression [ Expression ] 51 | class ArrayIndexing(Expression): 52 | def __init__(self): 53 | super(ArrayIndexing, self).__init__() 54 | self.array: Expression = None # 数组名,可以为变量名,或者函数调用等表达式 55 | self.index: Expression = None # 索引 56 | 57 | def setArrayExpression(self, array: Expression): 58 | self.array = array 59 | super().addChild(array) 60 | 61 | def setIndexExpression(self, index: Expression): 62 | self.index = index 63 | super().addChild(index) 64 | 65 | def addChild(self, node): 66 | if isinstance(node, Expression) and self.getChildCount() == 0: 67 | self.setArrayExpression(node) 68 | elif isinstance(node, Expression) and self.getChildCount() == 1: 69 | self.setIndexExpression(node) 70 | else: 71 | super().addChild(node) 72 | 73 | def accept(self, visitor: ASTNodeVisitor): 74 | visitor.visit(self) 75 | 76 | 77 | # 转换类型 78 | class CastTarget(Expression): 79 | pass 80 | 81 | # 类型转换 82 | class CastExpression(Expression): 83 | def __init__(self): 84 | super(CastExpression, self).__init__() 85 | self.castTarget: Expression = None 86 | self.castExpression: Expression = None 87 | 88 | def getChildCount(self) -> int: 89 | childCount: int = 0 90 | if self.castTarget is not None: 91 | childCount += 1 92 | if self.castExpression is not None: 93 | childCount += 1 94 | return childCount 95 | 96 | def getChild(self, i: int): 97 | if i == 0: 98 | return self.castTarget 99 | return self.castExpression 100 | 101 | def setCastTarget(self, castTarget: Expression): 102 | self.castTarget = castTarget 103 | super().addChild(castTarget) 104 | 105 | def setCastExpression(self, castExpression: Expression): 106 | self.castExpression = castExpression 107 | super().addChild(castExpression) 108 | 109 | def addChild(self, node): 110 | if self.castTarget is None: 111 | self.setCastTarget(node) 112 | else: 113 | self.setCastExpression(node) 114 | 115 | def accept(self, visitor: ASTNodeVisitor): 116 | visitor.visit(self) 117 | 118 | # 三目表达式, cond ? expr1 : expr2 119 | class ConditionalExpression(Expression): 120 | pass 121 | 122 | # ++ 或 -- 运算符 123 | class IncDec(Expression): 124 | pass 125 | 126 | ## sizeof表达式 127 | class SizeofExpr(Expression): 128 | pass 129 | 130 | # sizeof运算符 131 | class Sizeof(Expression): 132 | pass 133 | 134 | # sizeof运算数 135 | class SizeofOperand(Expression): 136 | pass 137 | 138 | # 单目表达式,运算符包括 &, *, +, -, ~, !, 这里 ++x, --x种,++,--也算单目运算,不过我把它算到IncDecOp中了 139 | class UnaryOp(Expression): 140 | def accept(self, visitor: ASTNodeVisitor): 141 | visitor.visit(self) 142 | 143 | # 单目运算符,包括 &, *, +, -, ~, ! 144 | class UnaryOperator(Expression): 145 | def accept(self, visitor: ASTNodeVisitor): 146 | visitor.visit(self) -------------------------------------------------------------------------------- /mainTool/ast/expressions/expressionHolders.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.expressions.expression import Expression 2 | from mainTool.ast.astNode import ASTNode 3 | from mainTool.ast.walking.visitor import ASTNodeVisitor 4 | 5 | 6 | class ExpressionHolder(Expression): 7 | def getEscapedCodeStr(self) -> str: 8 | if self.codeStr is not None: 9 | return self.codeStr 10 | 11 | expr: Expression = self.getExpression() 12 | if expr is None: 13 | return "" 14 | codeStr = expr.getEscapedCodeStr() 15 | return codeStr 16 | 17 | def getExpression(self): 18 | if self.getChildCount() > 0: 19 | return self.getChild(0) 20 | return None 21 | 22 | 23 | # 函数调用参数,子类为Expression 24 | # 如果在解析AST的时候精简掉Argument后面解析Use Def的时候不太好弄 25 | class Argument(ExpressionHolder): 26 | pass 27 | 28 | # ArgumentList可以接任意个(包括0个) Argument,没有参数的话就是一个没有child的 ArgumentList 29 | class ArgumentList(ExpressionHolder): 30 | def accept(self, visitor: ASTNodeVisitor): 31 | visitor.visit(self) 32 | 33 | 34 | # Callee 可能为 Identifier(直接使用函数名)或者MemberAccess、PtrMemberAccess(成员函数) 35 | class Callee(ExpressionHolder): 36 | pass 37 | 38 | # 条件判断 39 | class Condition(ExpressionHolder): 40 | def accept(self, visitor: ASTNodeVisitor): 41 | visitor.visit(self) 42 | 43 | # 对应数组赋值 a = { a1, a2, a3 } 中的 a1, a2, a3,可能有嵌套 44 | class InitializerList(ExpressionHolder): 45 | pass 46 | 47 | class ThrowExpression(ExpressionHolder): 48 | def __init__(self): 49 | super(ThrowExpression, self).__init__() 50 | self.throwExpression = None 51 | 52 | def getThrowExpression(self): 53 | return self.throwExpression 54 | 55 | def addChild(self, node: ASTNode): 56 | if isinstance(node, Expression): 57 | self.throwExpression = node 58 | super().addChild(node) 59 | 60 | 61 | -------------------------------------------------------------------------------- /mainTool/ast/expressions/postfixExpressions.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.expressions.expression import Expression, Identifier 2 | from mainTool.ast.expressions.expressionHolders import ArgumentList 3 | from mainTool.ast.walking.visitor import ASTNodeVisitor 4 | 5 | 6 | class PostfixExpression(Expression): 7 | pass 8 | 9 | # 对应 x++ 或者 x--, ++x, --x 10 | class IncDecOp(PostfixExpression): 11 | def __init__(self): 12 | super(IncDecOp, self).__init__() 13 | # isPost为true表示 ++/-- 在变量后面, x++, x--,反之为 ++x, --x 14 | self.isPost = True 15 | 16 | class CallExpressionBase(PostfixExpression): 17 | def __init__(self): 18 | self.targetFunc: Expression = None 19 | self.argumentList: ArgumentList = None 20 | super(CallExpressionBase, self).__init__() 21 | 22 | def setTargetFunc(self, targetFunc: Expression): 23 | self.targetFunc = targetFunc 24 | super().addChild(targetFunc) 25 | 26 | def setArgumentList(self, argumentList: ArgumentList): 27 | self.argumentList = argumentList 28 | super().addChild(argumentList) 29 | 30 | def accept(self, visitor: ASTNodeVisitor): 31 | visitor.visit(self) 32 | 33 | class CallExpression(CallExpressionBase): 34 | def addChild(self, node): 35 | if isinstance(node, Identifier): 36 | self.setTargetFunc(node) 37 | elif isinstance(node, ArgumentList): 38 | self.setArgumentList(node) 39 | else: 40 | super().addChild(node) 41 | 42 | def accept(self, visitor: ASTNodeVisitor): 43 | visitor.visit(self) 44 | 45 | 46 | class DeleteExpression(CallExpressionBase): 47 | def __init__(self): 48 | # delete掉的变量名 49 | self.target: Expression = None 50 | super(DeleteExpression, self).__init__() 51 | 52 | def addChild(self, node): 53 | if isinstance(node, Expression) and self.target is None: 54 | self.target = node 55 | super().addChild(node) 56 | 57 | def accept(self, visitor: ASTNodeVisitor): 58 | visitor.visit(self) 59 | 60 | 61 | class NewExpression(CallExpressionBase): 62 | def __init__(self): 63 | # targetClass 64 | self.targetClass: Identifier = None 65 | super(NewExpression, self).__init__() 66 | 67 | def setTargetClass(self, targetClass: Identifier): 68 | self.targetClass = targetClass 69 | self.targetClass.codeStr = self.targetClass.codeStr.strip() 70 | super().addChild(targetClass) 71 | 72 | def accept(self, visitor: ASTNodeVisitor): 73 | visitor.visit(self) 74 | 75 | # 成员变量访问,对应 a.f1 76 | class MemberAccess(PostfixExpression): 77 | def accept(self, visitor: ASTNodeVisitor): 78 | visitor.visit(self) 79 | 80 | # 指针类型成员变量访问,a->f1 81 | class PtrMemberAccess(PostfixExpression): 82 | def accept(self, visitor: ASTNodeVisitor): 83 | visitor.visit(self) -------------------------------------------------------------------------------- /mainTool/ast/expressions/primaryExpressions.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.expressions.expression import Expression 2 | from mainTool.ast.walking.visitor import ASTNodeVisitor 3 | 4 | # literals,常量部分 5 | class PrimaryExpression(Expression): 6 | def accept(self, visitor: ASTNodeVisitor): 7 | visitor.visit(self) 8 | 9 | # bool类型常量 10 | class BoolExpression(PrimaryExpression): 11 | def accept(self, visitor: ASTNodeVisitor): 12 | visitor.visit(self) 13 | 14 | # char类型常量 15 | class CharExpression(PrimaryExpression): 16 | def accept(self, visitor: ASTNodeVisitor): 17 | visitor.visit(self) 18 | 19 | # float, double类型常量 20 | class DoubleExpression(PrimaryExpression): 21 | def accept(self, visitor: ASTNodeVisitor): 22 | visitor.visit(self) 23 | 24 | # int, short, unsigned int, unsigned short, long, unsigned long类型常量 25 | class IntegerExpression(PrimaryExpression): 26 | def accept(self, visitor: ASTNodeVisitor): 27 | visitor.visit(self) 28 | 29 | # 指针类型常量只有2种,NULL和nullptr 30 | class PointerExpression(PrimaryExpression): 31 | def accept(self, visitor: ASTNodeVisitor): 32 | visitor.visit(self) 33 | 34 | # 字符串类型常量 35 | class StringExpression(PrimaryExpression): 36 | def accept(self, visitor: ASTNodeVisitor): 37 | visitor.visit(self) -------------------------------------------------------------------------------- /mainTool/ast/statements/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/ast/statements/__init__.py -------------------------------------------------------------------------------- /mainTool/ast/statements/blockStarters.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.statements.statements import Statement 2 | from mainTool.ast.declarations.simpleDecls import IdentifierDeclType, ForInit, ForRangeInit 3 | from mainTool.ast.expressions.expressionHolders import Condition 4 | from mainTool.ast.expressions.expression import Expression, Identifier 5 | from mainTool.ast.walking.visitor import ASTNodeVisitor 6 | 7 | from typing import List 8 | 9 | class BlockStarter(Statement): 10 | def __init__(self): 11 | super(BlockStarter, self).__init__() 12 | self.statement: Statement = None 13 | self.condition: Condition = None 14 | 15 | def addChild(self, node): 16 | if isinstance(node, Condition): 17 | self.condition = node 18 | elif isinstance(node, Statement): 19 | self.statement = node 20 | super().addChild(node) 21 | 22 | # else语句 23 | class ElseStatement(BlockStarter): 24 | pass 25 | 26 | # if语句 27 | class IfStatement(BlockStarter): 28 | def __init__(self): 29 | super(IfStatement, self).__init__() 30 | self.elseNode: ElseStatement = None 31 | 32 | def getChildCount(self) -> int: 33 | childCount: int = super().getChildCount() 34 | if self.elseNode is not None: 35 | childCount += 1 36 | return childCount 37 | 38 | def getChild(self, i: int): 39 | if i == 0: 40 | return self.condition 41 | elif i == 1: 42 | return self.statement 43 | elif i == 2: 44 | return self.elseNode 45 | raise RuntimeError("Invalid IfItem") 46 | 47 | def accept(self, visitor: ASTNodeVisitor): 48 | visitor.visit(self) 49 | 50 | # switch语句 51 | class SwitchStatement(BlockStarter): 52 | def accept(self, visitor: ASTNodeVisitor): 53 | visitor.visit(self) 54 | 55 | # while语句 56 | class WhileStatement(BlockStarter): 57 | def accept(self, visitor: ASTNodeVisitor): 58 | visitor.visit(self) 59 | 60 | # do-while 61 | class DoStatement(BlockStarter): 62 | def accept(self, visitor: ASTNodeVisitor): 63 | visitor.visit(self) 64 | 65 | # for语句 66 | class ForStatement(BlockStarter): 67 | def __init__(self): 68 | super(ForStatement, self).__init__() 69 | self.forInitStatement: ForInit = None 70 | self.expression: Expression = None 71 | 72 | def addChild(self, node): 73 | if isinstance(node, ForInit): 74 | self.forInitStatement = node 75 | elif isinstance(node, Expression) and not isinstance(node, Condition): 76 | self.expression = node 77 | super().addChild(node) 78 | 79 | 80 | class ForRangeStatement(BlockStarter): 81 | def __init__(self): 82 | super(ForRangeStatement, self).__init__() 83 | self.forRangeInit: ForRangeInit = None 84 | 85 | def addChild(self, node): 86 | if isinstance(node, ForRangeInit): 87 | self.forRangeInit = node 88 | super().addChild(node) 89 | 90 | 91 | # Try-Catch 92 | class CatchStatement(BlockStarter): 93 | def __init__(self): 94 | self.exceptionType: IdentifierDeclType = None 95 | self.exceptionIdentifier: Identifier = None 96 | super(CatchStatement, self).__init__() 97 | 98 | def setExceptionType(self, exceptionType: IdentifierDeclType): 99 | self.exceptionType = exceptionType 100 | super().addChild(exceptionType) 101 | 102 | def addChild(self, node): 103 | if isinstance(node, Identifier): 104 | self.exceptionIdentifier = node 105 | super().addChild(node) 106 | 107 | # try下面包括一个tryBlock(CompoundStatement) 和 catchList 108 | class TryStatement(BlockStarter): 109 | def __init__(self): 110 | super(TryStatement, self).__init__() 111 | self.catchList: List[CatchStatement] = list() 112 | 113 | def addCatchStatement(self, catchStatement: CatchStatement): 114 | self.catchList.append(catchStatement) 115 | 116 | def addChild(self, node): 117 | if isinstance(node, CatchStatement): 118 | self.addCatchStatement(node) 119 | else: 120 | super().addChild(node) -------------------------------------------------------------------------------- /mainTool/ast/statements/jumps.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.statements.statements import Statement 2 | from mainTool.ast.walking.visitor import ASTNodeVisitor 3 | 4 | 5 | class JumpStatement(Statement): 6 | pass 7 | 8 | # break; 9 | class BreakStatement(JumpStatement): 10 | def accept(self, visitor: ASTNodeVisitor): 11 | visitor.visit(self) 12 | 13 | # continue; 14 | class ContinueStatement(JumpStatement): 15 | def accept(self, visitor: ASTNodeVisitor): 16 | visitor.visit(self) 17 | 18 | # goto label; 19 | class GotoStatement(JumpStatement): 20 | def getTarget(self) -> str: 21 | return self.getChild(0).getEscapedCodeStr() 22 | 23 | def getEscapedCodeStr(self) -> str: 24 | return "goto " + self.getTarget() + ";" 25 | 26 | def accept(self, visitor: ASTNodeVisitor): 27 | visitor.visit(self) 28 | 29 | # return expr; 30 | class ReturnStatement(JumpStatement): 31 | def accept(self, visitor: ASTNodeVisitor): 32 | visitor.visit(self) -------------------------------------------------------------------------------- /mainTool/ast/statements/statements.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.astNode import ASTNode 2 | from mainTool.ast.expressions.expression import Expression 3 | from mainTool.ast.walking.visitor import ASTNodeVisitor 4 | from mainTool.utils.types import LabelType 5 | 6 | from antlr4 import ParserRuleContext 7 | 8 | class Statement(ASTNode): 9 | pass 10 | 11 | # CompoundStatement = { + blockStatements + } 12 | class CompoundStatement(Statement): 13 | def __init__(self): 14 | super(CompoundStatement, self).__init__() 15 | 16 | def getStatements(self) -> list: 17 | return self.children 18 | 19 | def addStatement(self, stmt: ASTNode): 20 | super().addChild(stmt) 21 | 22 | def getEscapedCodeStr(self) -> str: 23 | return "" 24 | 25 | def accept(self, visitor: ASTNodeVisitor): 26 | visitor.visit(self) 27 | 28 | class ExpressionHolderStatement(Statement): 29 | def getEscapedCodeStr(self) -> str: 30 | expr: Expression = self.getExpression() 31 | if expr is None: 32 | return "" 33 | return expr.getEscapedCodeStr() 34 | 35 | def getExpression(self): 36 | if self.getChildCount() == 0: 37 | return None 38 | return self.getChild(0) 39 | 40 | # 表达式语句, expr + ; 41 | class ExpressionStatement(ExpressionHolderStatement): 42 | def getEscapedCodeStr(self) -> str: 43 | expr: Expression = self.getExpression() 44 | if expr is None: 45 | self.codeStr = ";" 46 | else: 47 | self.codeStr = expr.getEscapedCodeStr() + " ;" 48 | return self.codeStr 49 | 50 | # 变量定义语句,每个child对应1个IdentifierDecl 51 | class IdentifierDeclStatement(Statement): 52 | def __init__(self): 53 | super(IdentifierDeclStatement, self).__init__() 54 | self.typeNameContext: ParserRuleContext = None 55 | 56 | def accept(self, visitor: ASTNodeVisitor): 57 | visitor.visit(self) 58 | 59 | class Label(Statement): 60 | # goto语句一般为NormalLabel, Case语句为Case,Default为default 61 | def __init__(self): 62 | super(Label, self).__init__() 63 | self.type: LabelType = None 64 | self.cond: Expression = None 65 | 66 | def addChild(self, node): 67 | if isinstance(node, Expression): 68 | self.cond = node 69 | super().addChild(node) 70 | 71 | -------------------------------------------------------------------------------- /mainTool/ast/walking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/ast/walking/__init__.py -------------------------------------------------------------------------------- /mainTool/ast/walking/visitor.py: -------------------------------------------------------------------------------- 1 | # from ast.ASTNode import ASTNode 2 | 3 | 4 | class ASTNodeVisitor(object): 5 | def visitChildren(self, item): 6 | nChildren: int = item.getChildCount() 7 | for i in range(nChildren): 8 | child = item.getChild(i) 9 | child.accept(self) 10 | 11 | def visit(self, item): 12 | self.visitChildren(item) -------------------------------------------------------------------------------- /mainTool/cdg/CDG.py: -------------------------------------------------------------------------------- 1 | from mainTool.cdg.DominatorTree import * 2 | 3 | # 控制依赖边,连接CFG结点 4 | class CDGEdge(Edge[CFGNode]): 5 | def __init__(self, source: CFGNode, destination: CFGNode): 6 | super(CDGEdge, self).__init__(source, destination) 7 | 8 | def getProperties(self) -> Dict[str, object]: 9 | return None 10 | 11 | def __str__(self): 12 | return str(self.source) + " ==[]==> " + str(self.destination) 13 | 14 | 15 | # 逆向控制流图 16 | class ReverseCFG(AbstractTwoWayGraph[CFGNode]): 17 | def __init__(self): 18 | super(ReverseCFG, self).__init__() 19 | self.entry: CFGNode = None 20 | self.exit: CFGNode = None 21 | 22 | @staticmethod 23 | def newInstance(cfg: CFG): 24 | reverseCFG: ReverseCFG = ReverseCFG() 25 | for vertex in cfg.vertices: 26 | reverseCFG.addVertex(vertex) 27 | 28 | for edge in cfg.getEdges(): 29 | reverseEdge: CFGEdge = CFGEdge(edge.destination, edge.source, edge.label) 30 | reverseCFG.addEdge(reverseEdge) 31 | reverseCFG.entry = cfg.exit 32 | reverseCFG.exit = cfg.entry 33 | 34 | augmentedEdge: CFGEdge = CFGEdge(reverseCFG.entry, reverseCFG.exit, CFGEdgeType.EMPTY_LABEL) 35 | reverseCFG.addEdge(augmentedEdge) 36 | return reverseCFG 37 | 38 | # 控制依赖图定义 39 | class CDG(AbstractGraph[CFGNode]): 40 | def __init__(self): 41 | super(CDG, self).__init__() 42 | self.dominatorTree: DominatorTree[CFGNode] = None 43 | 44 | # 根据支配树建立控制依赖图 45 | @staticmethod 46 | def newInstance(dominatorTree: DominatorTree[CFGNode]): 47 | cdg: CDG = CDG() 48 | cdg.dominatorTree = dominatorTree 49 | for vertex in dominatorTree.getVertices(): 50 | frontier: Set[CFGNode] = dominatorTree.dominanceFrontier(vertex) 51 | if frontier is not None: 52 | cdg.addVertex(vertex) 53 | for f in frontier: 54 | # 跳过entry和自我依赖的情况 55 | if f == vertex or str(f) == "[ENTRY]": 56 | continue 57 | cdg.addVertex(f) 58 | cdg.addEdge(CDGEdge(f, vertex)) 59 | return cdg 60 | 61 | 62 | def createCDG(cfg: CFG) -> CDG: 63 | # 建立逆向CFG 64 | reverseCFG: ReverseCFG = ReverseCFG.newInstance(cfg) 65 | # 根据逆向CFG构建支配树 66 | dominatorTreeCreator: DominatorTreeCreator[CFGNode] = DominatorTreeCreator[CFGNode](reverseCFG, reverseCFG.entry) 67 | dominatorTree: DominatorTree[CFGNode] = dominatorTreeCreator.create() 68 | # 基于支配树创建CDG 69 | return CDG.newInstance(dominatorTree) -------------------------------------------------------------------------------- /mainTool/cdg/DominatorTree.py: -------------------------------------------------------------------------------- 1 | from mainTool.cfg.CFG import * 2 | from typing import Set 3 | 4 | # 支配树 5 | class DominatorTree(Generic[T]): 6 | def __init__(self): 7 | self.dominators: Dict[T, T] = dict() # key -> value 表示 value 支配 key 8 | self.dominanceFrontiers: Dict[T, Set[T]] = dict() # 前向支配 9 | self.postorderEnumeration: Dict[T, int] = dict() # 结点访问顺序 10 | 11 | def getVertices(self) -> List[T]: 12 | return list(self.dominators.keys()) 13 | 14 | def getDominator(self, vertex: T) -> T: 15 | return self.dominators.get(vertex) 16 | 17 | def dominanceFrontier(self, vertex: T) -> Set[T]: 18 | return self.dominanceFrontiers.get(vertex, None) 19 | 20 | def hasDominator(self, vertex: T) -> bool: 21 | return self.dominators.get(vertex, None) is not None 22 | 23 | def contains(self, vertex: T) -> bool: 24 | return vertex in self.dominators.keys() 25 | 26 | # 往支配树中添加结点 27 | def addVertex(self, vertex: T): 28 | if not self.contains(vertex): 29 | self.dominators[vertex] = None 30 | return True 31 | return False 32 | 33 | def setDominator(self, vertex: T, dominator: T) -> bool: 34 | changed: bool = False 35 | if self.contains(vertex): 36 | currentDominator: T = self.dominators.get(vertex) 37 | if currentDominator is None and dominator is not None: 38 | self.dominators[vertex] = dominator 39 | changed = True 40 | elif not currentDominator == dominator: 41 | self.dominators[vertex] = dominator 42 | changed = True 43 | else: 44 | changed = False 45 | return changed 46 | 47 | def commonDominator(self, vertex1: T, vertex2: T) -> T: 48 | finger1: T = vertex1 49 | finger2: T = vertex2 50 | while not finger1 == finger2: 51 | while self.postorderEnumeration.get(finger1) < self.postorderEnumeration.get(finger2): 52 | finger1 = self.getDominator(finger1) 53 | while self.postorderEnumeration.get(finger2) < self.postorderEnumeration.get(finger1): 54 | finger2 = self.getDominator(finger2) 55 | 56 | return finger1 57 | 58 | def commonDominatorList(self, vertices: List[T]) -> T: 59 | stack: List[T] = list() 60 | for vertex in vertices: 61 | if self.hasDominator(vertex): 62 | stack.append(vertex) 63 | 64 | if len(stack) == 0: 65 | return None 66 | while len(stack) > 1: 67 | stack.append(self.commonDominator(stack.pop(), stack.pop())) 68 | return stack.pop() 69 | 70 | 71 | class DominatorTreeCreator(Generic[T]): 72 | def __init__(self, graph: AbstractTwoWayGraph[T], startNode: T): 73 | self.graph: AbstractTwoWayGraph[T] = graph # 逆向CFG 74 | self.startNode: T = startNode # 一般是逆向CFG EntryNode 75 | self.dominatorTree: DominatorTree[T] = DominatorTree[T]() 76 | self.orderedVertices: List[T] = list() # 存储逆向CFG结点访问顺序 77 | 78 | # 获取逆向CFG结点访问顺序 79 | def enumerateVertices(self): 80 | counter: int = 0 81 | iterator: PostorderIterator[T] = PostorderIterator[T](self.graph, self.startNode) 82 | 83 | while iterator.hasNext(): 84 | vertex: T = iterator.__next__() 85 | self.orderedVertices.append(vertex) 86 | self.dominatorTree.postorderEnumeration[vertex] = counter 87 | counter += 1 88 | 89 | if len(self.orderedVertices) < len(self.graph.vertices): 90 | print("warning: incomplete control flow graph") 91 | 92 | # 初始化支配树 93 | def initializeDominatorTree(self): 94 | self.dominatorTree.addVertex(self.startNode) 95 | self.dominatorTree.setDominator(self.startNode, self.startNode) 96 | 97 | def buildDominatorTree(self): 98 | changed: bool = True 99 | while changed: 100 | changed = False 101 | # orderedVertices存储结点访问顺序 102 | reverseVertexIterator = list(reversed(self.orderedVertices)) 103 | cur = 1 104 | while cur < len(reverseVertexIterator): 105 | currentNode: T = reverseVertexIterator[cur] 106 | cur += 1 107 | lis: List[T] = list() 108 | for edge in self.graph.inNeighborhood.get(currentNode, []): 109 | lis.append(edge.source) 110 | newIdom: T = self.dominatorTree.commonDominatorList(lis) 111 | self.dominatorTree.addVertex(currentNode) 112 | if self.dominatorTree.setDominator(currentNode, newIdom): 113 | changed = True 114 | 115 | 116 | def determineDominanceFrontiers(self): 117 | for currentNode in self.orderedVertices: # 后序遍历逆向CFG 118 | if self.graph.inDegree(currentNode) > 1: 119 | for edge in self.graph.inNeighborhood.get(currentNode, []): 120 | predecessor: T = edge.source 121 | if not predecessor in self.orderedVertices: 122 | continue 123 | # runnner序号大于currentNode 124 | runner = predecessor 125 | while not runner == self.dominatorTree.getDominator(currentNode): # value支配于key 126 | if runner not in self.dominatorTree.dominanceFrontiers.keys(): 127 | self.dominatorTree.dominanceFrontiers[runner] = set() 128 | self.dominatorTree.dominanceFrontiers[runner].add(currentNode) 129 | runner = self.dominatorTree.getDominator(runner) 130 | 131 | def create(self): 132 | # 获取逆向CFG结点访问顺序 133 | self.enumerateVertices() 134 | # 初始化 135 | self.initializeDominatorTree() 136 | # 构建支配树 137 | self.buildDominatorTree() 138 | # 控制依赖边计算 139 | self.determineDominanceFrontiers() 140 | return self.dominatorTree -------------------------------------------------------------------------------- /mainTool/cdg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/cdg/__init__.py -------------------------------------------------------------------------------- /mainTool/cfg/CFG.py: -------------------------------------------------------------------------------- 1 | from mainTool.cfg.nodes import * 2 | from mainTool.utils.types import CFGEdgeType 3 | from mainTool.ast.declarations.complexDecls import FunctionDef 4 | 5 | from mainTool.utils.graphUtils import * 6 | 7 | class CFGEdge(Edge[CFGNode]): 8 | # CFG Edge的label对应 true, false, 空 9 | def __init__(self, source: CFGNode, destination: CFGNode, label: str = None): 10 | super(CFGEdge, self).__init__(source, destination) 11 | if label is None: 12 | self.label = CFGEdgeType.EMPTY_LABEL 13 | else: 14 | self.label = label 15 | 16 | def getProperties(self) -> Dict[str, object]: 17 | return { 18 | "flowLabel": self.label 19 | } 20 | 21 | def __hash__(self) -> int: 22 | prime = 31 23 | result = super().__hash__() 24 | result = prime * result + hash(self.label) 25 | return result 26 | 27 | def __str__(self): 28 | return str(self.source) + " ==[" + self.label + "]==> " + str(self.destination) 29 | 30 | def __eq__(self, o: object) -> bool: 31 | if id(self) == id(o): 32 | return True 33 | if not super().__eq__(o): 34 | return False 35 | if not isinstance(o, CFGEdge): 36 | return False 37 | return self.label == o.label 38 | 39 | # 针对一个function的CFG 40 | class CFG(AbstractTwoWayGraph[CFGNode]): 41 | def __init__(self): 42 | super(CFG, self).__init__() 43 | self.entry: CFGEntryNode = CFGEntryNode() 44 | self.exit: CFGExitNode = CFGExitNode() 45 | self.addVertex(self.entry) 46 | self.addVertex(self.exit) 47 | self.parameters: List[CFGNode] = list() 48 | self.errorNode: CFGErrorNode = None 49 | self.name: str = None # 函数名 50 | 51 | def getErrorNode(self): 52 | if self.errorNode is None: 53 | self.errorNode = CFGErrorNode() 54 | return self.errorNode 55 | 56 | def isEmpty(self) -> bool: 57 | return len(self.vertices) == 2 58 | 59 | # addCFG只是将otherCFG和当前CFG的所有结点和边放到当前CFG中,otherCFG中的结点还没有和当前CFG连通 60 | def addCFG(self, otherCFG): 61 | self.addVertices(otherCFG) 62 | self.addEdges(otherCFG) 63 | 64 | # 合并CFG 65 | def appendCFG(self, otherCFG): 66 | self.addCFG(otherCFG) 67 | if not otherCFG.isEmpty(): 68 | # edge1为当前CFG的ExitNode的入边 69 | for edge1 in self.inNeighborhood.get(self.exit, []): 70 | # edge2为otherCFG的EntryNode的出边 71 | for edge2 in otherCFG.outNeighborhood.get(otherCFG.entry, []): 72 | self.addCFGEdge(edge1.source, edge2.destination, edge1.label) 73 | # 删除当前的Exit结点 74 | self.removeEdgesTo(self.exit) 75 | for edge in otherCFG.inNeighborhood.get(otherCFG.exit): 76 | self.addCFGEdge(edge.source, self.exit, edge.label) 77 | 78 | # 处理if-else,while等情况 79 | def mountCFG(self, branchNode: CFGNode, mergeNode: CFGNode, cfg, label: str): 80 | # 在if-else中,cfg为elseBlock对应的CFG,branchNode为condition对应的CFGNode,mergeNode为该Block对应的ExitNode 81 | # 在while语句中,cfg为whileBody对应的CFG,branchNode和mergeNode为Condition对应的CFGNode 82 | # 在for语句中,cfg为forBody,branchNode为forCondition,mergeNode为forExpression或者forCondition 83 | if not cfg.isEmpty(): 84 | self.addCFG(cfg) 85 | for edge in cfg.outNeighborhood.get(cfg.entry, []): 86 | self.addCFGEdge(branchNode, edge.destination, label) 87 | for edge in cfg.inNeighborhood.get(cfg.exit, []): 88 | self.addCFGEdge(edge.source, mergeNode, edge.label) 89 | else: 90 | self.addCFGEdge(branchNode, mergeNode, label) 91 | 92 | 93 | def addVertices(self, cfg): 94 | # 将cfg所有的非Entry和Exit的结点添加到当前CFG 95 | for vertex in cfg.vertices: 96 | if not (vertex == cfg.entry or vertex == cfg.exit): 97 | self.addVertex(vertex) 98 | 99 | def addEdges(self, cfg): 100 | # 将cfg所有非Entry和Exit的边添加到当前CFG 101 | for vertex in cfg.vertices: 102 | for edge in cfg.outNeighborhood.get(vertex, []): 103 | if not (edge.source == cfg.entry or edge.destination == cfg.exit): 104 | self.addEdge(edge) 105 | 106 | def addCFGEdge(self, srcBlock: CFGNode, dstBlock: CFGNode, label: str = None): 107 | edge: CFGEdge = CFGEdge(srcBlock, dstBlock, label) 108 | self.addEdge(edge) 109 | 110 | 111 | class CFGFactory(object): 112 | # Implement this method for each language 113 | @staticmethod 114 | def newInstance(functionDefinition: FunctionDef) -> CFG: 115 | pass -------------------------------------------------------------------------------- /mainTool/cfg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/cfg/__init__.py -------------------------------------------------------------------------------- /mainTool/cfg/nodes.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from mainTool.ast.astNode import ASTNode 3 | 4 | class CFGNode(object): 5 | def __str__(self) -> str: 6 | return self.__class__.__name__ 7 | 8 | def getProperties(self) -> Dict[str, object]: 9 | return { 10 | "type": str(self), 11 | "code": self.__class__.__name__, 12 | "IS_CFG_NODE": True 13 | } 14 | 15 | # 普通CFG结点 16 | class ASTNodeContainer(CFGNode): 17 | def __init__(self, node: ASTNode): 18 | self.astNode: ASTNode = node 19 | self.astNode.markAsCFGNode() 20 | 21 | def getEscapedCodeStr(self): 22 | return self.astNode.getEscapedCodeStr() 23 | 24 | def __str__(self): 25 | return "[" + self.astNode.getEscapedCodeStr() + "]" 26 | 27 | # 入口结点 28 | class CFGEntryNode(CFGNode): 29 | def __str__(self): 30 | return "[ENTRY]" 31 | 32 | def getProperties(self) -> Dict[str, object]: 33 | return { 34 | "type": str(self), 35 | "code": "ENTRY", 36 | "IS_CFG_NODE": True 37 | } 38 | 39 | # 退出结点 40 | class CFGExitNode(CFGNode): 41 | def __str__(self): 42 | return "[EXIT]" 43 | 44 | def getProperties(self) -> Dict[str, object]: 45 | return { 46 | "type": str(self), 47 | "code": "EXIT", 48 | "IS_CFG_NODE": True 49 | } 50 | 51 | # 错误结点 52 | class CFGErrorNode(CFGNode): 53 | def __str__(self): 54 | return "[ERROR]" 55 | 56 | def getProperties(self) -> Dict[str, object]: 57 | return { 58 | "type": str(self), 59 | "code": "ERROR", 60 | "IS_CFG_NODE": True 61 | } 62 | 63 | # for循环条件没写的话就是InfiniteForNode 64 | class InfiniteForNode(CFGNode): 65 | def __str__(self): 66 | return "[INFINITE FOR]" 67 | 68 | def getProperties(self) -> Dict[str, object]: 69 | return { 70 | "type": str(self), 71 | "code": "true", 72 | "IS_CFG_NODE": True 73 | } -------------------------------------------------------------------------------- /mainTool/ddg/DDGCreator.py: -------------------------------------------------------------------------------- 1 | from mainTool.ddg.DefUseGraph import * 2 | from mainTool.udg.useDefGraph import * 3 | from mainTool.cfg.CFG import * 4 | from mainTool.ast.declarations.complexDecls import Parameter 5 | 6 | class CFGAndUDGToDefUseCFG(object): 7 | 8 | def initializeStatements(self, cfg: CFG, defUseCFG: DefUseCFG): 9 | for statement in cfg.vertices: 10 | if isinstance(statement, ASTNodeContainer): 11 | statement = statement.astNode 12 | defUseCFG.addStatement(statement) 13 | 14 | 15 | def initializeParentsAndChildren(self, cfg: CFG, defUseCFG: DefUseCFG): 16 | for edge in cfg.getEdges(): 17 | src = edge.source 18 | dst = edge.destination 19 | if isinstance(src, ASTNodeContainer): 20 | src = src.astNode 21 | if isinstance(dst, ASTNodeContainer): 22 | dst = dst.astNode 23 | defUseCFG.addChildBlock(src, dst) 24 | defUseCFG.addParentBlock(dst, src) 25 | 26 | 27 | def initializeDefUses(self, udg: UseDefGraph, defUseCFG: DefUseCFG): 28 | useDefDict: Dict[str, List[UseOrDefRecord]] = udg.useOrDefRecordTable 29 | for symbol, defUseRecords in useDefDict.items(): 30 | for record in defUseRecords: 31 | if not record.astNode.isInCFG: 32 | continue 33 | if record.isDef: 34 | defUseCFG.addSymbolDefined(record.astNode, symbol) 35 | else: 36 | defUseCFG.addSymbolUsed(record.astNode, symbol) 37 | 38 | 39 | def convert(self, cfg: CFG, udg: UseDefGraph) -> DefUseCFG: 40 | defUseCFG: DefUseCFG = DefUseCFG() 41 | self.initializeStatements(cfg, defUseCFG) 42 | self.initializeDefUses(udg, defUseCFG) 43 | 44 | parameters: List[str] = list() 45 | for parameterCFGNode in cfg.parameters: 46 | astNode: Parameter = parameterCFGNode.astNode 47 | # 参数名称 48 | symbol: str = astNode.name.getEscapedCodeStr() 49 | parameters.append(symbol) 50 | 51 | defUseCFG.exitNode = cfg.exit 52 | defUseCFG.parameters = parameters 53 | defUseCFG.addUsesForExitNode() 54 | self.initializeParentsAndChildren(cfg, defUseCFG) 55 | 56 | return defUseCFG 57 | 58 | 59 | class Definition(object): 60 | def __init__(self, aStatement: object, aIdentifier: str): 61 | self.statement: object = aStatement 62 | self.identifier: str = aIdentifier 63 | 64 | 65 | class DDGCreator(object): 66 | def __init__(self): 67 | self.cfg: DefUseCFG = None 68 | self.In: Dict[object, Set[object]] = dict() # in集合 69 | self.Out: Dict[object, Set[object]] = dict() # out集合 70 | self.Gen: Dict[object, Set[object]] = dict() # gen集合 71 | self.changedNodes: List[object] = list() 72 | 73 | def clear(self): 74 | self.cfg = None 75 | self.In.clear() 76 | self.Out.clear() 77 | self.Gen.clear() 78 | self.changedNodes.clear() 79 | 80 | def initOut(self): 81 | for statement in self.cfg.statements: 82 | # this has the nice side-effect that an empty hash is created for the statement. 83 | self.Out[statement] = set() 84 | symsDefined: List[str] = self.cfg.symbolsDefined.get(statement, []) 85 | for s in symsDefined: 86 | self.Out[statement].add(Definition(statement, s)) 87 | 88 | def initGenFromOut(self): 89 | for statement in self.cfg.statements: 90 | for o in self.Out.get(statement, {}): 91 | if statement not in self.Gen.keys(): 92 | self.Gen[statement] = set() 93 | self.Gen[statement].add(o) 94 | 95 | #Reaching-Def info初始化 96 | def initReachingDefs(self): 97 | self.initOut() 98 | self.initGenFromOut() 99 | self.changedNodes.append(self.cfg.statements[0]) # entry 100 | for statement in self.cfg.statements[2:]: 101 | self.changedNodes.append(statement) 102 | self.changedNodes.append(self.cfg.statements[1]) # exit 103 | 104 | def updateIn(self, x: object): 105 | parents: List[object] = self.cfg.parentBlocks.get(x, None) 106 | if parents is None: 107 | return 108 | 109 | self.In[x] = set() 110 | # in(x) = union(out(p))_{p in parents(x)} 111 | for parent in parents: 112 | parentOut: Set[object] = self.Out.get(parent, {}) 113 | self.In[x].update(parentOut) 114 | 115 | def updateOut(self, x: object) -> bool: 116 | listForKey: Set[object] = self.Out.get(x) 117 | oldOut: Set[object] = listForKey.copy() 118 | self.Out[x] = set() 119 | 120 | # out(x) = in(x) 121 | inForX: Set[object] = self.In.get(x, {}) 122 | self.Out[x].update(inForX) 123 | 124 | # out(x) = out(x) - kill(x) 125 | killX: List[str] = self.cfg.symbolsDefined.get(x, None) 126 | if killX is not None: 127 | outItems: Set[object] = self.Out.get(x) 128 | deleteItems: Set[object] = set() 129 | for item in outItems: 130 | if item.identifier in killX: 131 | deleteItems.add(item) 132 | 133 | outItems.difference_update(deleteItems) 134 | 135 | # gen(x) \cup out(x) 136 | genX: Set[object] = self.Gen.get(x, {}) 137 | self.Out[x].update(genX) 138 | 139 | difference = self.Out[x] == oldOut 140 | return not difference 141 | 142 | 143 | def popFromChangedNodes(self) -> object: 144 | x: object = next(iter(self.changedNodes)) 145 | self.changedNodes.remove(x) 146 | return x 147 | 148 | # Reaching Def Analysis 149 | def calculateReachingDefs(self): 150 | self.initReachingDefs() 151 | 152 | while len(self.changedNodes) > 0: 153 | currentBlock: object = self.popFromChangedNodes() 154 | self.updateIn(currentBlock) # in(x) = out(p) + out(p1) ..... 155 | changed: bool = self.updateOut(currentBlock) # out(x) = gen(x) + in(x) - kill(x) 156 | 157 | if not changed: 158 | continue 159 | # 更新的话添加后继结点 160 | children: List[object] = self.cfg.childBlocks.get(currentBlock, []) 161 | self.changedNodes.extend(children) 162 | 163 | 164 | def createDDGFromReachingDefs(self) -> DDG: 165 | ddg: DDG = DDG() 166 | for statement in self.cfg.statements: 167 | inForBlock: Set[object] = self.In.get(statement, None) 168 | if inForBlock is None: 169 | continue 170 | usedSymbols: List[str] = self.cfg.symbolsUsed.get(statement, None) 171 | if usedSymbols is None: 172 | continue 173 | 174 | for defi in inForBlock: 175 | if defi.statement == statement or isinstance(statement, CFGExitNode): 176 | continue 177 | if defi.identifier in usedSymbols: 178 | ddg.add(defi.statement, statement, defi.identifier) 179 | 180 | return ddg 181 | 182 | def createForDefUseCFG(self, aCfg: DefUseCFG) -> DDG: 183 | self.cfg = aCfg 184 | self.calculateReachingDefs() 185 | return self.createDDGFromReachingDefs() -------------------------------------------------------------------------------- /mainTool/ddg/DefUseGraph.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Set 2 | 3 | class DefUseRelation(object): 4 | def __init__(self, src: object, dst: object, symbol: str): 5 | self.src: object = src 6 | self.dst: object = dst 7 | self.symbol: str = symbol 8 | 9 | def __eq__(self, other): 10 | if not isinstance(other, DefUseRelation): 11 | return False 12 | return self.src == other.src and self.dst == other.dst and self.symbol == other.symbol 13 | 14 | def __hash__(self): 15 | return hash(self.symbol) 16 | 17 | def __str__(self): 18 | return f"{self.src} ----[{self.symbol}]-----{self.dst}" 19 | 20 | # Data Dependence Graph 21 | class DDG(object): 22 | def __init__(self): 23 | self.defUseEdges: Set[DefUseRelation] = set() 24 | 25 | def add(self, srcId: object, dstId: object, symbol: str): 26 | statementPair: DefUseRelation = DefUseRelation(srcId, dstId, symbol) 27 | self.defUseEdges.add(statementPair) 28 | 29 | # A CFG decorated with USE and DEFs suitable to determine reaching definitions. 30 | class DefUseCFG(object): 31 | def __init__(self): 32 | self.statements: List[object] = list() 33 | self.symbolsUsed: Dict[object, List[str]] = dict() # key是语句,value是语句中使用的symbol 34 | self.symbolsDefined: Dict[object, List[str]] = dict() # key是语句,value是语句中定义的symbol 35 | self.parentBlocks: Dict[object, List[str]] = dict() # key是value的CFG后继结点 36 | self.childBlocks: Dict[object, List[str]] = dict() # key是value的CFG前驱结点 37 | self.symbolIds: Dict[str, object] = dict() 38 | 39 | self.exitNode: object = None 40 | self.parameters: List[str] = list() 41 | 42 | def addStatement(self, statementId: object): 43 | self.statements.append(statementId) 44 | 45 | def addSymbolUsed(self, key: object, symbol: str): 46 | if key not in self.symbolsUsed.keys(): 47 | self.symbolsUsed[key] = [] 48 | self.symbolsUsed[key].append(symbol) 49 | 50 | def addSymbolDefined(self, key: object, symbol: str): 51 | if key not in self.symbolsDefined.keys(): 52 | self.symbolsDefined[key] = [] 53 | self.symbolsDefined[key].append(symbol) 54 | 55 | def addParentBlock(self, thisBlockId: object, parentId: object): 56 | if thisBlockId not in self.parentBlocks.keys(): 57 | self.parentBlocks[thisBlockId] = [] 58 | self.parentBlocks[thisBlockId].append(parentId) 59 | 60 | def addChildBlock(self, thisBlockId: object, childId: object): 61 | if thisBlockId not in self.childBlocks.keys(): 62 | self.childBlocks[thisBlockId] = [] 63 | self.childBlocks[thisBlockId].append(childId) 64 | 65 | def setSetSymbolId(self, symbolCode: str, symbolId: object): 66 | self.symbolIds[symbolCode] = symbolId 67 | 68 | def addUsesForExitNode(self): 69 | for symbol in self.parameters: 70 | self.addSymbolUsed(self.exitNode, "* " + symbol) -------------------------------------------------------------------------------- /mainTool/ddg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/ddg/__init__.py -------------------------------------------------------------------------------- /mainTool/udg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/udg/__init__.py -------------------------------------------------------------------------------- /mainTool/udg/astAnalyzers.py: -------------------------------------------------------------------------------- 1 | from mainTool.udg.environments import CallEnvironment, UseDefEnvironment, ArgumentEnvironment, AssignmentEnvironment 2 | from mainTool.udg.environments import IncDecEnvironment, DeclEnvironment, PtrMemberAccessEnvironment, MemberAccessEnvironment 3 | from mainTool.udg.environments import UseEnvironment, ArrayIndexingEnvironment, UnaryOpEnvironment, IdentifierEnvironment 4 | 5 | from mainTool.udg.astProvider import ASTProvider, ASTNodeASTProvider 6 | from mainTool.udg.useDefGraph import UseOrDef, UseDefGraph 7 | 8 | from mainTool.ast.astNode import ASTNode 9 | from mainTool.cfg.CFG import * 10 | 11 | from typing import Set, Dict, List 12 | import sys 13 | 14 | # 这个类主要记录函数调用中参数为指针变量的信息,等同于Joern中的TaintSource,但是Joern考虑了指针def没考虑指针use的情况 15 | class CalleeInfos(object): 16 | def __init__(self): 17 | # 假设key为memset,value为[0],表示memset的第0个参数使用了指针变量,memset(a, xx,xx); 中use的symbol要包含 * a 18 | self.calleeToArgUseIds: Dict[str, List[int]] = dict() 19 | # 假设key为gets, value为[0],表示gets函数重新定义了第0个指针参数,gets(buf) 重新定义了symbol * buf 20 | self.calleeToArgDefIds: Dict[str, List[int]] = dict() 21 | # 参数为可变参数的情况 22 | # 比如 scanf -> 1, 表示 scanf会重新定义第1个以后的所有参数, scanf("%d", &a) 会重新定义 a 23 | self.calleeToDefStartIds: Dict[str, int] = dict() 24 | 25 | # 判断是否使用指针 26 | def judgeUse(self, callEnv: CallEnvironment, childNumber: int) -> bool: 27 | callee: str = callEnv.astProvider.getChild(0).getEscapedCodeStr() 28 | return childNumber in self.calleeToArgUseIds.get(callee, []) 29 | 30 | # 判断是否定义指针 31 | def judgeDef(self, callEnv: CallEnvironment, childNumber: int) -> bool: 32 | callee: str = callEnv.astProvider.getChild(0).getEscapedCodeStr() 33 | if childNumber in self.calleeToArgDefIds.get(callee, []): 34 | return True 35 | return childNumber >= self.calleeToDefStartIds.get(callee, sys.maxsize) 36 | 37 | def addArgUse(self, callee: str, argN: int): 38 | if callee not in self.calleeToArgUseIds.keys(): 39 | self.calleeToArgUseIds[callee] = [] 40 | self.calleeToArgUseIds[callee].append(argN) 41 | 42 | def addArgDef(self, callee: str, argN: int): 43 | if callee not in self.calleeToArgDefIds.keys(): 44 | self.calleeToArgDefIds[callee] = [] 45 | self.calleeToArgDefIds[callee].append(argN) 46 | 47 | def addArgDefStartIds(self, callee: str, argN: int): 48 | self.calleeToDefStartIds[callee] = argN 49 | 50 | 51 | class ASTDefUseAnalyzer(object): 52 | def __init__(self): 53 | self.environmentStack: List[UseDefEnvironment] = list() 54 | self.useDefsOfBlock: Set[UseOrDef] = set() 55 | self.calleeInfos: CalleeInfos = CalleeInfos() 56 | 57 | def reset(self): 58 | self.environmentStack.clear() 59 | self.useDefsOfBlock.clear() 60 | 61 | def emitUseOrDefs(self, toEmit: List[UseOrDef]): 62 | for useOrDef in toEmit: 63 | self.useDefsOfBlock.add(useOrDef) 64 | 65 | # Gets upstream symbols from environment and passes them to 66 | # parent-environment by calling addChildSymbols on the parent. Asks 67 | # parent-environment to generate useOrDefs and emit them. 68 | def reportUpstream(self, env: UseDefEnvironment): 69 | symbols: List[str] = env.upstreamSymbols() 70 | astProvider: ASTProvider = env.astProvider 71 | 72 | if len(self.environmentStack) > 0: 73 | parentEnv: UseDefEnvironment = self.environmentStack[-1] 74 | parentEnv.addChildSymbols(symbols, astProvider) 75 | 76 | def createArgumentEnvironment(self, astProvider: ASTProvider) -> ArgumentEnvironment: 77 | argEnv: ArgumentEnvironment = ArgumentEnvironment() 78 | # 中间还隔着个ArgumentList 79 | callEnv: CallEnvironment = self.environmentStack[-2] 80 | # 该参数是否使用指针 81 | if self.calleeInfos.judgeUse(callEnv, astProvider.getChildNumber()): 82 | argEnv.setIsUsePointer() 83 | # 是否定义指针 84 | if self.calleeInfos.judgeDef(callEnv, astProvider.getChildNumber()): 85 | argEnv.setIsDefPointer() 86 | return argEnv 87 | 88 | # Creates a UseDefEnvironment for a given AST node. 89 | def createUseDefEnvironment(self, astProvider: ASTProvider) -> UseDefEnvironment: 90 | nodeType: str = astProvider.getTypeAsString() 91 | 92 | if nodeType == "AssignmentExpr": 93 | return AssignmentEnvironment() 94 | elif nodeType == "IncDecOp": 95 | return IncDecEnvironment() 96 | elif nodeType == "IdentifierDecl" or nodeType == "Parameter": 97 | return DeclEnvironment() 98 | elif nodeType == "CallExpression": 99 | return CallEnvironment() 100 | elif nodeType == "Argument": 101 | return self.createArgumentEnvironment(astProvider) 102 | elif nodeType == "PtrMemberAccess": 103 | return PtrMemberAccessEnvironment() 104 | elif nodeType == "MemberAccess": 105 | return MemberAccessEnvironment() 106 | # condition和return中只有use没有def 107 | elif nodeType == "Condition" or nodeType == "ReturnStatement": 108 | return UseEnvironment() 109 | elif nodeType == "ArrayIndexing": 110 | return ArrayIndexingEnvironment() 111 | elif nodeType == "UnaryOp": 112 | return UnaryOpEnvironment() 113 | elif nodeType == "Identifier": 114 | return IdentifierEnvironment() 115 | else: 116 | return UseDefEnvironment() 117 | 118 | 119 | def traverseAST(self, astProvider: ASTProvider): 120 | env: UseDefEnvironment = self.createUseDefEnvironment(astProvider) 121 | env.astProvider = astProvider 122 | self.traverseASTChildren(astProvider, env) 123 | 124 | 125 | def traverseASTChildren(self, astProvider: ASTProvider, env: UseDefEnvironment): 126 | numChildren: int = astProvider.getChildCount() 127 | self.environmentStack.append(env) 128 | for i in range(numChildren): 129 | childProvider: ASTProvider = astProvider.getChild(i) 130 | self.traverseAST(childProvider) 131 | toEmit: List[UseOrDef] = env.useOrDefsFromSymbols(childProvider) 132 | self.emitUseOrDefs(toEmit) 133 | self.environmentStack.pop() 134 | self.reportUpstream(env) 135 | 136 | def analyzeAST(self, astProvider: ASTProvider) -> Set[UseOrDef]: 137 | self.reset() 138 | self.traverseAST(astProvider) 139 | return self.useDefsOfBlock 140 | 141 | 142 | class CFGToUDGConverter(object): 143 | def __init__(self): 144 | self.astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer() 145 | 146 | # statementNode是CFGNode 147 | def addToUseDefGraph(self, useDefGraph: UseDefGraph, usesAndDefs: List[UseOrDef], statementNode: ASTNode): 148 | insertedForStatementDef: Set[str] = set() 149 | insertedForStatementUse: Set[str] = set() 150 | 151 | for useOrDef in usesAndDefs: 152 | astProvider: ASTNodeASTProvider = useOrDef.astProvider 153 | # CHECK? 154 | useOrDefNode: ASTNode = astProvider.node 155 | if useOrDef.isDef: 156 | if useOrDef.symbol not in insertedForStatementDef: 157 | useDefGraph.addDefinition(useOrDef.symbol, statementNode) 158 | insertedForStatementDef.add(useOrDef.symbol) 159 | # 给ASTNode添加 160 | if useOrDefNode is not None and useOrDefNode != statementNode: 161 | useDefGraph.addDefinition(useOrDef.symbol, useOrDefNode) 162 | 163 | else: 164 | if useOrDef.symbol not in insertedForStatementUse: 165 | useDefGraph.addUse(useOrDef.symbol, statementNode) 166 | insertedForStatementUse.add(useOrDef.symbol) 167 | # Add use-links from AST nodes to symbols 168 | if useOrDef.astProvider is not None and useOrDefNode is not statementNode: 169 | useDefGraph.addUse(useOrDef.symbol, useOrDefNode) 170 | 171 | # 将CFG转化为UDG 172 | def convert(self, cfg: CFG) -> UseDefGraph: 173 | # Incrementally create a UseDefGraph by generating 174 | # UseOrDefs for each statement separately and adding those 175 | # to the UseDefGraph 176 | useDefGraph: UseDefGraph = UseDefGraph() 177 | statements: List[CFGNode] = cfg.vertices 178 | 179 | # CFG中每个语句独立分析 180 | for cfgNode in statements: 181 | # skip empty blocks 182 | if not isinstance(cfgNode, ASTNodeContainer): 183 | continue 184 | statementNode: ASTNode = cfgNode.astNode 185 | provider: ASTNodeASTProvider = ASTNodeASTProvider() 186 | provider.node = statementNode 187 | usesAndDefs: List[UseOrDef] = self.astAnalyzer.analyzeAST(provider) 188 | self.addToUseDefGraph(useDefGraph, usesAndDefs, statementNode) 189 | 190 | return useDefGraph -------------------------------------------------------------------------------- /mainTool/udg/astProvider.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from mainTool.ast.astNode import ASTNode 3 | from mainTool.ast.expressions.expression import Expression 4 | 5 | class ASTProvider(object): 6 | @abstractmethod 7 | def getTypeAsString(self) -> str: 8 | pass 9 | 10 | @abstractmethod 11 | def getChild(self, i: int): 12 | pass 13 | 14 | @abstractmethod 15 | def getEscapedCodeStr(self) -> str: 16 | pass 17 | 18 | @abstractmethod 19 | def getChildNumber(self) -> int: 20 | pass 21 | 22 | @abstractmethod 23 | def getChildCount(self) -> int: 24 | pass 25 | 26 | @abstractmethod 27 | def getOperatorCode(self) -> str: 28 | pass 29 | 30 | 31 | class ASTNodeASTProvider(ASTProvider): 32 | def __init__(self): 33 | self.node: ASTNode = None 34 | 35 | def getTypeAsString(self) -> str: 36 | return self.node.getTypeAsString() 37 | 38 | def getChild(self, i: int) -> ASTProvider: 39 | childProvider: ASTNodeASTProvider = ASTNodeASTProvider() 40 | childProvider.node = self.node.getChild(i) 41 | return childProvider 42 | 43 | def getChildCount(self) -> int: 44 | return self.node.getChildCount() 45 | 46 | def getEscapedCodeStr(self) -> str: 47 | return self.node.getEscapedCodeStr() 48 | 49 | def getChildNumber(self) -> int: 50 | return self.node.childNumber 51 | 52 | def getOperatorCode(self) -> str: 53 | if isinstance(self.node, Expression): 54 | return self.node.operator 55 | return None 56 | 57 | def __eq__(self, other): 58 | if not isinstance(other, ASTNodeASTProvider): 59 | return False 60 | return self.node == other.node -------------------------------------------------------------------------------- /mainTool/udg/useDefGraph.py: -------------------------------------------------------------------------------- 1 | from mainTool.ast.astNode import ASTNode 2 | from mainTool.udg.astProvider import ASTProvider 3 | 4 | 5 | from typing import List, Dict, Set 6 | 7 | # 一个usedef记录,表示某个symbol在该AST中是定义还是使用 8 | class UseOrDefRecord(object): 9 | def __init__(self, anASTNode: ASTNode, aIsDef: bool): 10 | self.isDef: bool = aIsDef 11 | self.astNode: ASTNode = anASTNode 12 | 13 | class UseOrDef(object): 14 | def __init__(self): 15 | self.isDef: int = None 16 | self.symbol: str = None 17 | self.astProvider: ASTProvider = None 18 | 19 | def __eq__(self, other): 20 | if not isinstance(other, UseOrDef): 21 | return False 22 | return self.isDef == other.isDef and self.symbol == other.symbol \ 23 | and self.astProvider == other.astProvider 24 | 25 | def __hash__(self): 26 | return hash(self.symbol) 27 | 28 | 29 | class UseDefGraph(object): 30 | # A UseDefGraph is a table indexed by identifiers. Each table-entry is 31 | # a list of the UseOrDefRecords of the identifier. 32 | def __init__(self): 33 | # 主要成员变量,每个key(symbol)对应1个list [(stetement, def)] 34 | self.useOrDefRecordTable: Dict[str, List[UseOrDefRecord]] = dict() 35 | 36 | def getUsesAndDefsForSymbol(self, symbol: str) -> List[UseOrDefRecord]: 37 | return self.useOrDefRecordTable.get(symbol, []) 38 | 39 | def add(self, identifier: str, astNode: ASTNode, isDef: bool): 40 | record: UseOrDefRecord = UseOrDefRecord(astNode, isDef) 41 | if identifier in self.useOrDefRecordTable.keys(): 42 | self.useOrDefRecordTable[identifier].append(record) 43 | else: 44 | self.useOrDefRecordTable[identifier] = [record] 45 | 46 | def addDefinition(self, identifier: str, astNode: ASTNode): 47 | self.add(identifier, astNode, True) 48 | 49 | def addUse(self, identifier: str, astNode: ASTNode): 50 | self.add(identifier, astNode, False) -------------------------------------------------------------------------------- /mainTool/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/mainTool/utils/__init__.py -------------------------------------------------------------------------------- /mainTool/utils/graphUtils.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import List, TypeVar, Generic, Dict 3 | 4 | T = TypeVar('T') 5 | 6 | class Edge(Generic[T]): 7 | def __init__(self, source: T, destination: T): 8 | self.source: T = source 9 | self.destination: T = destination 10 | 11 | @abstractmethod 12 | def getProperties(self) -> Dict[str, object]: 13 | pass 14 | 15 | def __eq__(self, o: object) -> bool: 16 | if o is None: 17 | return False 18 | if id(self) == id(o): 19 | return True 20 | if not isinstance(o, Edge): 21 | return False 22 | return self.destination == o.destination and self.source == o.source 23 | 24 | def __hash__(self) -> int: 25 | prime: int = 31 26 | result: int = 1 27 | result = prime * result + hash(self.destination) 28 | result = prime * result + hash(self.source) 29 | return result 30 | 31 | 32 | class AbstractGraph(Generic[T]): 33 | def __init__(self): 34 | self.vertices: List[T] = list() 35 | self.outNeighborhood: Dict[T, List[Edge[T]]] = dict() 36 | 37 | def __iter__(self): 38 | return iter(self.vertices) 39 | 40 | def getEdges(self) -> List[Edge[T]]: 41 | edges: List[Edge[T]] = list() 42 | for v, e in self.outNeighborhood.items(): 43 | edges.extend(e) 44 | return edges 45 | 46 | def outDegree(self, vertex: T) -> int: 47 | return len(self.outNeighborhood.get(vertex, [])) 48 | 49 | def addVertex(self, vertex: T) -> bool: 50 | if not self.vertices.__contains__(vertex): 51 | self.vertices.append(vertex) 52 | return True 53 | return False 54 | 55 | def addEdge(self, edge: Edge[T]): 56 | if edge.source in self.outNeighborhood.keys(): 57 | self.outNeighborhood[edge.source].append(edge) 58 | else: 59 | self.outNeighborhood[edge.source] = [edge] 60 | 61 | def removeEdge(self, src: T, dst: T): 62 | edges: List[Edge[T]] = self.outNeighborhood[src] 63 | for edge in edges: 64 | if edge.destination == dst: 65 | edges.remove(edge) 66 | 67 | def removeEdgesFrom(self, source: T): 68 | self.outNeighborhood.pop(source) 69 | 70 | def removeEdgesTo(self, destination: T): 71 | for src, edges in self.outNeighborhood.items(): 72 | for edge in edges: 73 | if edge.destination == destination: 74 | edges.remove(edge) 75 | 76 | def totalEdgeNum(self) -> int: 77 | size: int = 0 78 | for src, edges in self.outNeighborhood.items(): 79 | size += len(edges) 80 | return size 81 | 82 | def __str__(self) -> str: 83 | res: str = f"Graph with {len(self.vertices)} vertices and {self.totalEdgeNum()} edges:\n" 84 | for vertex in self.vertices: 85 | res += str(vertex) + '\n' 86 | for edge in self.outNeighborhood[vertex]: 87 | res += str(edge) + '\n' 88 | return res 89 | 90 | class AbstractTwoWayGraph(AbstractGraph[T]): 91 | # 保存node -> ingoing edge 92 | def __init__(self): 93 | super(AbstractTwoWayGraph, self).__init__() 94 | self.inNeighborhood: Dict[T, List[Edge[T]]] = dict() 95 | 96 | def inDegree(self, vertex: T) -> int: 97 | return len(self.inNeighborhood.get(vertex, [])) 98 | 99 | def addEdge(self, edge: Edge[T]): 100 | super().addEdge(edge) 101 | if edge.destination in self.inNeighborhood.keys(): 102 | self.inNeighborhood[edge.destination].append(edge) 103 | else: 104 | self.inNeighborhood[edge.destination] = [edge] 105 | 106 | def removeEdgesFrom(self, source: T): 107 | for edge in self.outNeighborhood[source]: 108 | self.inNeighborhood[edge.destination].remove(edge) 109 | super().removeEdgesFrom(source) 110 | 111 | def removeEdgesTo(self, destination: T): 112 | for edge in self.inNeighborhood[destination]: 113 | self.outNeighborhood[edge.source].remove(edge) 114 | self.inNeighborhood.pop(destination) 115 | 116 | def removeEdge(self, src: T, dst: T): 117 | super().removeEdge(src, dst) 118 | edges: List[Edge[T]] = self.inNeighborhood[dst] 119 | for edge in edges: 120 | if edge.source == src: 121 | edges.remove(edge) 122 | 123 | # 后序广度优先遍历逆向CFG 124 | class PostorderIterator(Generic[T]): 125 | def __init__(self, graph: AbstractGraph[T], root: T): 126 | self.graph = graph 127 | self.remainingNodes: List[T] = [root] 128 | self.visitedNodes: List[T] = list() 129 | 130 | def __iter__(self): 131 | return self 132 | 133 | def hasNext(self) -> bool: 134 | return len(self.remainingNodes) > 0 135 | 136 | def __next__(self): 137 | while self.hasNext(): 138 | root: T = self.remainingNodes[-1] 139 | # visit root 140 | if not root in self.visitedNodes: 141 | self.visitedNodes.append(root) 142 | # predecessors first if any 143 | if self.graph.outDegree(root) > 0: 144 | for edge in self.graph.outNeighborhood.get(root, []): 145 | if not edge.destination in self.visitedNodes: 146 | self.remainingNodes.append(edge.destination) 147 | # 深度优先,1个就够了 148 | break 149 | 150 | # 没有后续结点或者后续结点都已经被访问过了 151 | if self.remainingNodes[-1] == root: 152 | return self.remainingNodes.pop() 153 | raise StopIteration() -------------------------------------------------------------------------------- /mainTool/utils/types.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum, Enum 2 | 3 | # label类别: goto label, case label, default 4 | class LabelType(IntEnum): 5 | Normal = 1 6 | Case = 2 7 | Default = 3 8 | 9 | # 类定义类型 10 | class ClassType(IntEnum): 11 | Class = 1 # 类 12 | Struct = 2 # 结构体 13 | Enum = 3 # 枚举 14 | Union = 4 # 联合体 15 | 16 | # CFG边类型 17 | class CFGEdgeType: 18 | EMPTY_LABEL = "" 19 | TRUE_LABEL = "True" 20 | FALSE_LABEL = "False" -------------------------------------------------------------------------------- /resources/calleeInfos.json: -------------------------------------------------------------------------------- 1 | { 2 | "ArgDefs": { 3 | "fgets": [0], 4 | "gets": [0], 5 | "fread": [0], 6 | "snprintf": [0], 7 | 8 | "free": [0], 9 | "qsort": [0], 10 | 11 | "bcopy": [1], 12 | "memccpy": [0], 13 | "memcpy": [0], 14 | "setmem": [0], 15 | "memset": [0], 16 | "movmem": [1], 17 | "memmove": [0], 18 | "stpcpy": [0], 19 | "strcpy": [0], 20 | "strcat": [0], 21 | "strlwr": [0], 22 | "strupr": [0], 23 | "strncat": [0], 24 | "strncpy": [0], 25 | "strset": [0], 26 | 27 | "fgetws": [0], 28 | "swprintf": [0], 29 | "swscanf": [0], 30 | "vswprintf": [0], 31 | 32 | "mbrtowc": [0], 33 | "mbsrtowcs": [0], 34 | "wcrtomb": [0], 35 | "wcsrtombs": [0], 36 | 37 | "wcscat": [0], 38 | "wcscpy": [0], 39 | "wcsncat": [0], 40 | "wcsncpy": [0], 41 | "wcstok": [3], 42 | "wcsxfrm": [0], 43 | "wmemcpy": [0], 44 | "wmemmove": [0], 45 | "wmemset": [0], 46 | 47 | "wcsftime": [3], 48 | "recv": [1] 49 | }, 50 | "ArgUses": { 51 | "fopen": [0, 1], 52 | "freopen": [0, 1, 2], 53 | "fflush": [0], 54 | "fclose": [0], 55 | "remove": [0], 56 | "rename": [0, 1], 57 | "tmpnam": [0], 58 | "setvbuf": [0, 1], 59 | "fprintf": [0, 1], 60 | "scanf": [0], 61 | "fscanf": [0, 1], 62 | "fgetc": [0], 63 | "fgets": [2], 64 | "fputc": [1], 65 | "getc": [0], 66 | "puts": [0], 67 | "ungetc": [1], 68 | "fread": [3], 69 | "fwrite": [0, 3], 70 | "ftell": [0], 71 | "rewind": [0], 72 | "fgetpos": [0], 73 | "fsetpos": [0, 1], 74 | "clearerr": [0], 75 | "feof": [0], 76 | "ferror": [0], 77 | "perror": [0], 78 | "snprintf": [2], 79 | 80 | "atof": [0], 81 | "atoi": [0], 82 | "atol": [0], 83 | "strtod": [0, 1], 84 | "strtol": [0, 1], 85 | "strtoul": [0, 1], 86 | "realloc": [0], 87 | "getenv": [0], 88 | 89 | "bsearch": [0, 1, 2], 90 | 91 | "time": [0], 92 | "mktime": [0], 93 | "asctime": [0], 94 | "ctime": [0], 95 | "gmtime": [0], 96 | "localtime": [0], 97 | "strftime": [0, 1], 98 | 99 | "bcmp": [0, 1], 100 | "bcopy": [0], 101 | "bzero": [0], 102 | "memccpy": [1], 103 | "memchr": [0], 104 | "memcpy": [1], 105 | "memcmp": [0, 1], 106 | "memicmp": [0, 1], 107 | "memmove": [1], 108 | "movmem": [0], 109 | "stpcpy": [1], 110 | "strcpy": [1], 111 | "strcat": [1], 112 | "strchr": [0], 113 | "strcmp": [0, 1], 114 | "stricmp": [0, 1], 115 | "strcspn": [0, 1], 116 | "strdup": [0], 117 | "strlen": [0], 118 | "strncat": [1], 119 | "strnicmp": [0, 1], 120 | "strncpy": [1], 121 | "strpbrk": [0, 1], 122 | "strrev": [0], 123 | "strstr": [0], 124 | "strtok": [0], 125 | 126 | "fgetwc": [0], 127 | "fgetws": [1], 128 | "fputwc": [1], 129 | "fputws": [0, 1], 130 | "fwide": [0], 131 | "fwprintf": [0, 1], 132 | "fwscanf": [0, 1], 133 | "getwc": [0], 134 | "putwc": [1], 135 | "swprintf": [2], 136 | "ungetwc": [1], 137 | 138 | "vfwprintf": [0, 1], 139 | "vfwscanf": [0, 1], 140 | "vswprintf": [2], 141 | "vswscanf": [0, 1], 142 | "vwprintf": [0], 143 | "vwscanf": [0], 144 | "wprintf": [0], 145 | "wscanf": [0], 146 | 147 | "wcstod": [0, 1], 148 | "wcstof": [0, 1], 149 | "wcstol": [0, 1], 150 | "wcstold": [0, 1], 151 | "strtoll": [0, 1], 152 | "wcstoul": [0, 1], 153 | "wcstoull": [0, 1], 154 | 155 | "mbrlen": [0, 2], 156 | "mbrtowc": [1, 3], 157 | "mbsinit": [0], 158 | "mbsrtowcs": [1, 3], 159 | "wcrtomb": [2], 160 | "wcsrtombs": [1, 3], 161 | 162 | "wcscat": [1], 163 | "wcschr": [0], 164 | "wcscmp": [0, 1], 165 | "wcscoll": [0, 1], 166 | "wcscpy": [1], 167 | "wcscspn": [0, 1], 168 | "wcslen": [0], 169 | "wcsncat": [1], 170 | "wcsncmp": [0, 1], 171 | "wcsncpy": [1], 172 | "wcspbrk": [0, 1], 173 | "wcsrchr": [0], 174 | "wcsspn": [0, 1], 175 | "wcsstr": [0, 1], 176 | "wcstok": [0, 1], 177 | "wcsxfrm": [1], 178 | "wmemchr": [0], 179 | "wmemcmp": [0, 1], 180 | "wmemcpy": [1], 181 | "wmemmove": [1], 182 | 183 | "wcsftime": [0, 2] 184 | }, 185 | "ArgDefStartIds": { 186 | "scanf": 1, 187 | "fscanf": 2, 188 | "fwscanf": 2, 189 | "swscanf": 2, 190 | "vfwscanf": 2, 191 | "vswscanf": 2, 192 | "vwscanf": 1, 193 | "wscanf": 1 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /test/extraToolTests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/for-just-we/CppCodeAnalyzer/83ff0a897565e67b89a2ec2762fb70228efb28bb/test/extraToolTests/__init__.py -------------------------------------------------------------------------------- /test/extraToolTests/deepwukongTest.py: -------------------------------------------------------------------------------- 1 | from extraTools.vuldetect.deepwukong import * 2 | from time import time 3 | 4 | 5 | 6 | 7 | def test(): 8 | start = time() 9 | fileName = "../testfiles/sard_test_cases/CWE_119_124_fscanf.c" 10 | file1: str = "../testfiles/sard_test_cases/io.c" 11 | calleeInfs = { 12 | "ArgDef": { 13 | "memcpy": [0], 14 | "memmove": [0], 15 | "memset": [0], 16 | "fgets": [0], 17 | "recv": [1], 18 | }, 19 | "ArgUse": { 20 | "memcpy": [1], 21 | "memmove": [1], 22 | "memset": [1] 23 | }, 24 | "ArgDefStartIds": { 25 | "scanf": 1, 26 | "fscanf": 2 27 | } 28 | } 29 | 30 | systemDefinedFuncs: Set[str] = { "main", "memset", "memmove", "fscanf", "time", "printf", "wprintf", "puts" 31 | "sscanf", "isxdigit", "iswxdigit", "swscanf", "rand"} 32 | systemDefinedVars: Set[str] = { "argc", "argv", "stdin", "stdout", "cin", "cout" } 33 | 34 | calleeInfos = initialCalleeInfos(calleeInfs) 35 | astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer() 36 | astAnalyzer.calleeInfos = calleeInfos 37 | converter: CFGToUDGConverter = CFGToUDGConverter() 38 | converter.astAnalyzer = astAnalyzer 39 | defUseConverter: CFGAndUDGToDefUseCFG = CFGAndUDGToDefUseCFG() 40 | ddgCreator: DDGCreator = DDGCreator() 41 | cpgs: List[CPG] = fileParse(fileName, converter, defUseConverter, ddgCreator) 42 | for cpg in cpgs: 43 | cpg.file = fileName 44 | symbolizingTool: SymbolizingTool = SymbolizingTool(systemDefinedVars, systemDefinedFuncs) 45 | symbolizingTool.getVarFuncNamesInFile(cpgs) 46 | print(symbolizingTool.func2symbol) 47 | print(symbolizingTool.var2symbol) 48 | 49 | print("======================================") 50 | 51 | sensitive_apis: Set[str] = {"malloc", "memset"} 52 | sliceTool: XFGSliceTool = XFGSliceTool(cpgs, sensitive_apis, symbolizingTool) 53 | sliceTool.generateSliceForProgram() 54 | 55 | for slice in sliceTool.slices: 56 | for key, value in slice.toJson().items(): 57 | print(key) 58 | print(value) 59 | print("======================") 60 | 61 | end = time() 62 | print(end - start) 63 | return 64 | 65 | 66 | 67 | def testGenerateSlices(): 68 | file1: str = "../testfiles/sard_test_cases/io.c" 69 | file2: str = "../testfiles/sard_test_cases/CWE_119_122_switch.c" 70 | 71 | testfiles: List[str] = ["../testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53a.c", 72 | "../testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53b.c", 73 | "../testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53c.c", 74 | "../testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53d.c"] 75 | 76 | calleeInfs = { 77 | "ArgDef": { 78 | "memcpy": [0], 79 | "memmove": [0], 80 | "memset": [0], 81 | "fgets": [0], 82 | "recv": [1], 83 | "free": [0] 84 | }, 85 | "ArgUse": { 86 | "memcpy": [1], 87 | "memmove": [1], 88 | "memset": [1], 89 | "connect": [1] 90 | }, 91 | "ArgDefStartIds": { 92 | "scanf": 1, 93 | "fscanf": 2 94 | } 95 | } 96 | 97 | start = time() 98 | systemDefinedFuncs: Set[str] = {"main", "memset", "memmove", "fscanf", "time", "printf", "wprintf", "puts", 99 | "sscanf", "isxdigit", "iswxdigit", "swscanf", "rand", "exit", 100 | "malloc", "free", "srand", "WSAStartup", "socket", "connect", 101 | "inet_addr", "htons", "recv", "CLOSE_SOCKET", "WSACleanup"} 102 | systemDefinedVars: Set[str] = {"argc", "argv", "stdin", "stdout", "cin", "cout", "SOCKET_ERROR"} 103 | sensitive_apis: Set[str] = { "malloc", "memset" } 104 | 105 | calleeInfos = initialCalleeInfos(calleeInfs) 106 | astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer() 107 | astAnalyzer.calleeInfos = calleeInfos 108 | converter: CFGToUDGConverter = CFGToUDGConverter() 109 | converter.astAnalyzer = astAnalyzer 110 | defUseConverter: CFGAndUDGToDefUseCFG = CFGAndUDGToDefUseCFG() 111 | ddgCreator: DDGCreator = DDGCreator() 112 | cpgsCommon: List[CPG] = fileParse(file1, converter, defUseConverter, ddgCreator) # print et al 113 | for cpg in cpgsCommon: 114 | cpg.joinSlice = False 115 | cpg.file = file1 116 | 117 | # cpgsMain: List[CPG] = fileParse(file2, calleeInfos, converter, defUseConverter, ddgCreator) # print et al 118 | # for cpg in cpgsMain: 119 | # cpg.file = file2 120 | cpgMainss: List[CPG] = list() 121 | for testfile in testfiles: 122 | cpgMains: List[CPG] = fileParse(testfile, converter, defUseConverter, ddgCreator) 123 | for cpg in cpgMains: 124 | cpg.file = testfile 125 | cpgMainss.extend(cpgMains) 126 | 127 | cpgs = cpgsCommon + cpgMainss 128 | 129 | symbolizingTool: SymbolizingTool = SymbolizingTool(systemDefinedVars, systemDefinedFuncs) 130 | symbolizingTool.getVarFuncNamesInFile(cpgs) 131 | 132 | print(symbolizingTool.var2symbol) 133 | print(symbolizingTool.func2symbol) 134 | print("======================================") 135 | 136 | sliceTool: XFGSliceTool = XFGSliceTool(cpgs, sensitive_apis, symbolizingTool) 137 | sliceTool.generateSliceForProgram() 138 | 139 | for slice in sliceTool.slices: 140 | for key, value in slice.toJson().items(): 141 | print(key) 142 | print(value) 143 | print("======================") 144 | 145 | end = time() 146 | print(end - start) 147 | return 148 | 149 | if __name__ == '__main__': 150 | test() -------------------------------------------------------------------------------- /test/extraToolTests/ivdetectTest.py: -------------------------------------------------------------------------------- 1 | from extraTools.vuldetect.ivdetect import * 2 | 3 | from mainTool.ast.builders import * 4 | from mainTool.cfg.CCFG import * 5 | from mainTool.CPG import * 6 | 7 | from antlr4.tree.Tree import ParseTree, ParseTreeWalker 8 | 9 | walker: ParseTreeWalker = ParseTreeWalker() 10 | 11 | def getParser(code: str) -> CPP14Parser: 12 | inputStream = InputStream(code) 13 | cpp14Lexer = CPP14Lexer(inputStream) 14 | tokenStream = CommonTokenStream(cpp14Lexer) 15 | parser = CPP14Parser(tokenStream) 16 | return parser 17 | 18 | def varGeneratingTest(): 19 | callStmt: str = 'memset(source, dst, 100);' 20 | declStmt: str = "unsigned int a = b + c, d{a}, e(8);" 21 | declStmt1: str = "char aa ='A', bb{'B'}, cc('C'), **dd(NULL);" 22 | declStmt2: str = "int aa = addr < TASK_SIZE_MAX ? 1 : a;" # 三目表达式初始化赋值 23 | declStmt3: str = "unsigned int test::m_value1 = 0, ::a = stu->score, *bb(4);" 24 | 25 | parser: CPP14Parser = getParser(declStmt2) 26 | tree: ParseTree = parser.simpledeclaration() 27 | builder: FunctionContentBuilder = FunctionContentBuilder() 28 | builder.stack.append(CompoundStatement()) 29 | walker.walk(builder, tree) 30 | 31 | astNode: ASTNode = builder.stack.pop() 32 | astVarAnalyzer: ASTVarAnalyzer = ASTVarAnalyzer() 33 | provider: ASTNodeASTProvider = ASTNodeASTProvider() 34 | provider.node = astNode 35 | astVarAnalyzer.analyzeAST(provider) 36 | 37 | return 38 | 39 | 40 | astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer() 41 | calleeInfos: CalleeInfos = CalleeInfos() 42 | 43 | calleeInfos.addArgDef("memcpy", 0) 44 | calleeInfos.addArgUse("memcpy", 1) 45 | calleeInfos.addArgDef("memmove", 0) 46 | calleeInfos.addArgUse("memmove", 1) 47 | calleeInfos.addArgDef("memset", 0) 48 | calleeInfos.addArgDef("fgets", 0) 49 | calleeInfos.addArgDef("recv", 1) 50 | calleeInfos.addArgDefStartIds("scanf", 1) 51 | astAnalyzer.calleeInfos = calleeInfos 52 | 53 | 54 | def feature3_4_5_Test(): 55 | code = "static void goodG2B2(int a)\n" + \ 56 | "{\n" + \ 57 | " char * data;\n" + \ 58 | " data = NULL;\n" + \ 59 | " switch(6)\n" + \ 60 | " {\n" + \ 61 | " case 6:\n" + \ 62 | " /* FIX: Allocate and point data to a large buffer that is at least as large as the large buffer used in the sink */\n" + \ 63 | " data = (char *)malloc(100*sizeof(char));\n" + \ 64 | " if (data == NULL) {exit(-1);}\n" + \ 65 | " data[0] = '\\0'; /* null terminate */\n" + \ 66 | " break;\n" + \ 67 | " default:\n" + \ 68 | " /* INCIDENTAL: CWE 561 Dead Code, the code below will never run */\n" + \ 69 | " printLine(\"Benign, fixed string\");\n" + \ 70 | " break;\n" + \ 71 | " }\n" + \ 72 | " {\n" + \ 73 | " size_t i;\n" + \ 74 | " char source[100];\n" + \ 75 | " memset(source, 'C', 100-1); /* fill with 'C's */\n" + \ 76 | " source[100-1] = '\\0'; /* null terminate */\n" + \ 77 | " /* POTENTIAL FLAW: Possible buffer overflow if source is larger than data */\n" + \ 78 | " for (i = 0; i < 100; i++)\n" + \ 79 | " {\n" + \ 80 | " data[i] = source[i];\n" + \ 81 | " }\n" + \ 82 | " data[100-1] = '\\0'; /* Ensure the destination buffer is null terminated */\n" + \ 83 | " printLine(data);\n" + \ 84 | " free(data);\n" + \ 85 | " }\n" + \ 86 | "}" 87 | 88 | code1 = "static inline void init_copy_chunk_defaults(struct cifs_tcon *tcon){\n" \ 89 | " tcon->max_chunks = 256;\n" \ 90 | " tcon->max_bytes_chunk = 1048576;\n" \ 91 | " tcon->max_bytes_copy = 16777216;\n" \ 92 | "}" 93 | 94 | code2 = "int is_valid_bugaddr(unsigned long addr){\n" \ 95 | " unsigned short ud;\n" \ 96 | " if (addr < TASK_SIZE_MAX)\n" \ 97 | " return 0;\n" \ 98 | " if (probe_kernel_address((unsigned short *)addr, ud))\n" \ 99 | " return 0;\n" \ 100 | " return ud == INSN_UD0 || ud == INSN_UD2;\n}" 101 | 102 | 103 | parser: CPP14Parser = getParser(code) 104 | tree: ParseTree = parser.functiondefinition() 105 | builder: FunctionDefBuilder = FunctionDefBuilder() 106 | walker.walk(builder, tree) 107 | 108 | functionDef: FunctionDef = builder.functionDef 109 | cfg: CFG = ASTToCFGConvert(functionDef) 110 | cdg: CDG = createCDG(cfg) 111 | # UDG 112 | converter: CFGToUDGConverter = CFGToUDGConverter() 113 | converter.astAnalyzer = astAnalyzer 114 | useDefGraph: UseDefGraph = converter.convert(cfg) 115 | 116 | # DefUseCFG 117 | defUseConverter: CFGAndUDGToDefUseCFG = CFGAndUDGToDefUseCFG() 118 | defUseCFG: DefUseCFG = defUseConverter.convert(cfg, useDefGraph) 119 | 120 | # Data Dependence Graph 121 | ddgCreator: DDGCreator = DDGCreator() 122 | ddg: DDG = ddgCreator.createForDefUseCFG(defUseCFG) 123 | 124 | cpg: CPG = CPG() 125 | cpg.initCFGEdges(cfg) 126 | cpg.initCDGEdges(cdg) 127 | cpg.initDDGEdges(ddg) 128 | 129 | varLists = generate_feature3(cpg.statements) 130 | for varList in varLists: 131 | print(varList) 132 | 133 | feature4s: List[List[List[str]]] = generate_feature4(cpg) 134 | feature5s: List[List[List[str]]] = generate_feature5(cpg) 135 | return 136 | 137 | 138 | if __name__ == '__main__': 139 | feature3_4_5_Test() -------------------------------------------------------------------------------- /test/extraToolTests/sysevrTest.py: -------------------------------------------------------------------------------- 1 | from extraTools.vuldetect.sysevr import * 2 | from time import time 3 | 4 | 5 | def test(): 6 | start = time() 7 | fileName = "../testfiles/sard_test_cases/CWE_119_124_fscanf.c" 8 | file1: str = "../testfiles/sard_test_cases/io.c" 9 | calleeInfs = { 10 | "ArgDef": { 11 | "memcpy": [0], 12 | "memmove": [0], 13 | "memset": [0], 14 | "fgets": [0], 15 | "recv": [1], 16 | }, 17 | "ArgUse": { 18 | "memcpy": [1], 19 | "memmove": [1], 20 | "memset": [1] 21 | }, 22 | "ArgDefStartIds": { 23 | "scanf": 1, 24 | "fscanf": 2 25 | } 26 | } 27 | 28 | systemDefinedFuncs: Set[str] = { "main", "memset", "memmove", "fscanf", "time", "printf", "wprintf", "puts" 29 | "sscanf", "isxdigit", "iswxdigit", "swscanf", "rand"} 30 | systemDefinedVars: Set[str] = { "argc", "argv", "stdin", "stdout", "cin", "cout" } 31 | 32 | calleeInfos = initialCalleeInfos(calleeInfs) 33 | converter: CFGToUDGConverter = CFGToUDGConverter() 34 | defUseConverter: CFGAndUDGToDefUseCFG = CFGAndUDGToDefUseCFG() 35 | ddgCreator: DDGCreator = DDGCreator() 36 | cpgs: List[CPG] = fileParse(file1, calleeInfos, converter, defUseConverter, ddgCreator) 37 | symbolizingTool: SymbolizingTool = SymbolizingTool(systemDefinedVars, systemDefinedFuncs) 38 | symbolizingTool.getVarFuncNamesInFile(cpgs) 39 | print(symbolizingTool.func2symbol) 40 | print(symbolizingTool.var2symbol) 41 | end = time() 42 | print(end - start) 43 | return 44 | 45 | 46 | 47 | def testGenerateSlices(): 48 | file1: str = "../testfiles/sard_test_cases/io.c" 49 | file2: str = "../testfiles/sard_test_cases/CWE_119_122_switch.c" 50 | 51 | testfiles: List[str] = ["../testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53a.c", 52 | "../testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53b.c", 53 | "../testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53c.c", 54 | "../testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53d.c"] 55 | 56 | calleeInfs = { 57 | "ArgDef": { 58 | "memcpy": [0], 59 | "memmove": [0], 60 | "memset": [0], 61 | "fgets": [0], 62 | "recv": [1], 63 | "free": [0] 64 | }, 65 | "ArgUse": { 66 | "memcpy": [1], 67 | "memmove": [1], 68 | "memset": [1], 69 | "connect": [1] 70 | }, 71 | "ArgDefStartIds": { 72 | "scanf": 1, 73 | "fscanf": 2 74 | } 75 | } 76 | 77 | start = time() 78 | systemDefinedFuncs: Set[str] = {"main", "memset", "memmove", "fscanf", "time", "printf", "wprintf", "puts", 79 | "sscanf", "isxdigit", "iswxdigit", "swscanf", "rand", "exit", 80 | "malloc", "free", "srand", "WSAStartup", "socket", "connect", 81 | "inet_addr", "htons", "recv", "CLOSE_SOCKET", "WSACleanup"} 82 | systemDefinedVars: Set[str] = {"argc", "argv", "stdin", "stdout", "cin", "cout", "SOCKET_ERROR"} 83 | sensitive_apis: Set[str] = { "malloc", "memset" } 84 | 85 | calleeInfos = initialCalleeInfos(calleeInfs) 86 | converter: CFGToUDGConverter = CFGToUDGConverter() 87 | defUseConverter: CFGAndUDGToDefUseCFG = CFGAndUDGToDefUseCFG() 88 | ddgCreator: DDGCreator = DDGCreator() 89 | cpgsCommon: List[CPG] = fileParse(file1, calleeInfos, converter, defUseConverter, ddgCreator) # print et al 90 | for cpg in cpgsCommon: 91 | cpg.joinSlice = False 92 | cpg.file = file1 93 | 94 | # cpgsMain: List[CPG] = fileParse(file2, calleeInfos, converter, defUseConverter, ddgCreator) # print et al 95 | # for cpg in cpgsMain: 96 | # cpg.file = file2 97 | cpgMainss: List[CPG] = list() 98 | for testfile in testfiles: 99 | cpgMains: List[CPG] = fileParse(testfile, calleeInfos, converter, defUseConverter, ddgCreator) 100 | for cpg in cpgMains: 101 | cpg.file = testfile 102 | cpgMainss.extend(cpgMains) 103 | 104 | cpgs = cpgsCommon + cpgMainss 105 | 106 | symbolizingTool: SymbolizingTool = SymbolizingTool(systemDefinedVars, systemDefinedFuncs) 107 | symbolizingTool.getVarFuncNamesInFile(cpgs) 108 | 109 | print(symbolizingTool.var2symbol) 110 | print(symbolizingTool.func2symbol) 111 | print("======================================") 112 | 113 | sliceTool: SySeSliceTool = SySeSliceTool(cpgs, sensitive_apis, symbolizingTool) 114 | sliceTool.generateSliceForProgram() 115 | 116 | for slice in sliceTool.slices: 117 | for key, value in slice.toJson().items(): 118 | print(key) 119 | print(value) 120 | print("======================") 121 | 122 | end = time() 123 | print(end - start) 124 | return 125 | 126 | if __name__ == '__main__': 127 | testGenerateSlices() -------------------------------------------------------------------------------- /test/mainToolTests/CDGBuildTest.py: -------------------------------------------------------------------------------- 1 | from mainTool.cdg.CDG import * 2 | from mainTool.antlr.CPP14Lexer import CPP14Lexer, InputStream, CommonTokenStream 3 | from antlr4.tree.Tree import ParseTree 4 | from mainTool.ast.builders import * 5 | 6 | from mainTool.cfg.CCFG import * 7 | 8 | walker: ParseTreeWalker = ParseTreeWalker() 9 | 10 | code: str = "static void goodG2B2()\n" + \ 11 | "{\n" + \ 12 | " char * data;\n" + \ 13 | " data = NULL;\n" + \ 14 | " switch(6)\n" + \ 15 | " {\n" + \ 16 | " case 6:\n" + \ 17 | " /* FIX: Allocate and point data to a large buffer that is at least as large as the large buffer used in the sink */\n" + \ 18 | " data = (char *)malloc(100*sizeof(char));\n" + \ 19 | " if (data == NULL) {exit(-1);}\n" + \ 20 | " data[0] = '\\0'; /* null terminate */\n" + \ 21 | " break;\n" + \ 22 | " default:\n" + \ 23 | " /* INCIDENTAL: CWE 561 Dead Code, the code below will never run */\n" + \ 24 | " printLine(\"Benign, fixed string\");\n" + \ 25 | " break;\n" + \ 26 | " }\n" + \ 27 | " {\n" + \ 28 | " size_t i;\n" + \ 29 | " char source[100];\n" + \ 30 | " memset(source, 'C', 100-1); /* fill with 'C's */\n" + \ 31 | " source[100-1] = '\\0'; /* null terminate */\n" + \ 32 | " /* POTENTIAL FLAW: Possible buffer overflow if source is larger than data */\n" + \ 33 | " for (i = 0; i < 100; i++)\n" + \ 34 | " {\n" + \ 35 | " data[i] = source[i];\n" + \ 36 | " }\n" + \ 37 | " data[100-1] = '\\0'; /* Ensure the destination buffer is null terminated */\n" + \ 38 | " printLine(data);\n" + \ 39 | " free(data);\n" + \ 40 | " }\n" + \ 41 | "}" 42 | 43 | code1: str = "void CWE124_Buffer_Underwrite__CWE839_fscanf_17_bad()\n" + \ 44 | "{\n" + \ 45 | " int i,j;\n" + \ 46 | " int data;\n" + \ 47 | " /* Initialize data */\n" + \ 48 | " data = -1;\n" + \ 49 | " for(i = 0; i < 1; i++)\n" + \ 50 | " {\n" + \ 51 | " /* POTENTIAL FLAW: Read data from the console using fscanf() */\n" + \ 52 | " fscanf(stdin, \"%d\", &data);\n" + \ 53 | " }\n" + \ 54 | " for(j = 0; j < 1; j++)\n" + \ 55 | " {\n" + \ 56 | " {\n" + \ 57 | " int i;\n" + \ 58 | " int buffer[10] = { 0 };\n" + \ 59 | " /* POTENTIAL FLAW: Attempt to access a negative index of the array\n" + \ 60 | " * This code does not check to see if the array index is negative */\n" + \ 61 | " if (data < 10)\n" + \ 62 | " {\n" + \ 63 | " buffer[data] = 1;\n" + \ 64 | " /* Print the array values */\n" + \ 65 | " for(i = 0; i < 10; i++)\n" + \ 66 | " {\n" + \ 67 | " printIntLine(buffer[i]);\n" + \ 68 | " }\n" + \ 69 | " }\n" + \ 70 | " else\n" + \ 71 | " {\n" + \ 72 | " printLine(\"ERROR: Array index is negative.\");\n" + \ 73 | " }\n" + \ 74 | " }\n" + \ 75 | " }\n" + \ 76 | "}\n" 77 | 78 | 79 | code2: str = "void CWE121_Stack_Based_Buffer_Overflow__CWE129_fgets_01_bad()\n" + \ 80 | "{\n" + \ 81 | " int data;\n" + \ 82 | " /* Initialize data */\n" + \ 83 | " data = -1;\n" + \ 84 | " {\n" + \ 85 | " char inputBuffer[CHAR_ARRAY_SIZE] = \"\";\n" + \ 86 | " /* POTENTIAL FLAW: Read data from the console using fgets() */\n" + \ 87 | " if (fgets(inputBuffer, CHAR_ARRAY_SIZE, stdin) != NULL)\n" + \ 88 | " {\n" + \ 89 | " /* Convert to int */\n" + \ 90 | " data = atoi(inputBuffer);\n" + \ 91 | " }\n" + \ 92 | " else\n" + \ 93 | " {\n" + \ 94 | " printLine(\"fgets() failed.\");\n" + \ 95 | " }\n" + \ 96 | " }\n" + \ 97 | " {\n" + \ 98 | " int i;\n" + \ 99 | " int buffer[10] = { 0 };\n" + \ 100 | " /* POTENTIAL FLAW: Attempt to write to an index of the array that is above the upper bound\n" + \ 101 | " * This code does check to see if the array index is negative */\n" + \ 102 | " if (data >= 0)\n" + \ 103 | " {\n" + \ 104 | " buffer[data] = 1;\n" + \ 105 | " /* Print the array values */\n" + \ 106 | " for(i = 0; i < 10; i++)\n" + \ 107 | " {\n" + \ 108 | " printIntLine(buffer[i]);\n" + \ 109 | " }\n" + \ 110 | " }\n" + \ 111 | " else\n" + \ 112 | " {\n" + \ 113 | " printLine(\"ERROR: Array index is negative.\");\n" + \ 114 | " }\n" + \ 115 | " }\n" + \ 116 | "}" 117 | 118 | 119 | def getParser(code: str) -> CPP14Parser: 120 | inputStream = InputStream(code) 121 | cpp14Lexer = CPP14Lexer(inputStream) 122 | tokenStream = CommonTokenStream(cpp14Lexer) 123 | parser = CPP14Parser(tokenStream) 124 | return parser 125 | 126 | def testFunctionDef(): 127 | parser: CPP14Parser = getParser(code2) 128 | tree: ParseTree = parser.functiondefinition() 129 | builder: FunctionDefBuilder = FunctionDefBuilder() 130 | walker.walk(builder, tree) 131 | 132 | functionDef: FunctionDef = builder.functionDef 133 | compCFG: CFG = ASTToCFGConvert(functionDef) 134 | 135 | for i, (node, edges) in enumerate(compCFG.outNeighborhood.items()): 136 | print(f"{i} : {node}") 137 | for edge in edges: 138 | print(edge) 139 | 140 | print("=========================================") 141 | 142 | cdg: CDG = createCDG(compCFG) 143 | for edge in cdg.getEdges(): 144 | print(edge) 145 | return 146 | 147 | if __name__ == '__main__': 148 | testFunctionDef() -------------------------------------------------------------------------------- /test/mainToolTests/CFGBuildTest.py: -------------------------------------------------------------------------------- 1 | from mainTool.cfg.CCFG import * 2 | from mainTool.antlr.CPP14Lexer import CPP14Lexer, InputStream, CommonTokenStream 3 | from mainTool.ast.builders import * 4 | 5 | from antlr4.tree.Tree import ParseTree, ParseTreeWalker 6 | 7 | walker: ParseTreeWalker = ParseTreeWalker() 8 | 9 | def getParser(code: str) -> CPP14Parser: 10 | inputStream = InputStream(code) 11 | cpp14Lexer = CPP14Lexer(inputStream) 12 | tokenStream = CommonTokenStream(cpp14Lexer) 13 | parser = CPP14Parser(tokenStream) 14 | return parser 15 | 16 | def testCompoundStatement(): 17 | stmt: str = "{ exit(-1); *p = a * b; int s = *p + d; }" 18 | parser: CPP14Parser = getParser(stmt) 19 | tree: ParseTree = parser.statement() 20 | builder: FunctionContentBuilder = FunctionContentBuilder() 21 | builder.stack.append(CompoundStatement()) 22 | walker.walk(builder, tree) 23 | 24 | compoundStatement: CompoundStatement = builder.stack.pop() 25 | compCFG: CFG = CCFGFactory.newCompoundInstance(compoundStatement) 26 | return 27 | 28 | 29 | def testSelectionStatement(): 30 | ifstmt: str = "if (a == 1)\n" + \ 31 | " a = 1;\n" + \ 32 | "else if (a == 2)\n" + \ 33 | " a = 2;\n" + \ 34 | "else \n" + \ 35 | " a = 3;" 36 | 37 | ifCode1: str = "if (staticFalse){ exit(-1); goto loop; *p = a * b; return a + 1; }" 38 | 39 | switchCode: str = "switch (staticTrue){\n" + \ 40 | " case 1:\n" + \ 41 | " test::a = 1;\n" + \ 42 | " break;\n" + \ 43 | " case 2:\n" + \ 44 | " a = 2;\n" + \ 45 | " break;\n" + \ 46 | " default:\n" + \ 47 | " a = 3;\n" + \ 48 | "}" 49 | 50 | parser: CPP14Parser = getParser(switchCode) 51 | tree: ParseTree = parser.selectionstatement() 52 | builder: FunctionContentBuilder = FunctionContentBuilder() 53 | builder.stack.append(Statement()) 54 | walker.walk(builder, tree) 55 | 56 | # ifStatement: IfStatement = builder.stack.pop() 57 | switchStatement: SwitchStatement = builder.stack.pop() 58 | compCFG: CFG = CCFGFactory.newSwitchInstance(switchStatement) 59 | 60 | for node, edges in compCFG.outNeighborhood.items(): 61 | print(node) 62 | for edge in edges: 63 | print(edge) 64 | return 65 | 66 | 67 | def testIteration(): 68 | whileStmt: str = "while(x <= 1){\n" + \ 69 | " (*x)++;\n" + \ 70 | " ++*x;\n" + \ 71 | " if (cond)\n" + \ 72 | " break;\n" + \ 73 | " func(a);\n" + \ 74 | "}" 75 | 76 | doStmt: str = "do {\n" + \ 77 | " int a = sizeof(int);\n" + \ 78 | " b = sizeof(a);\n" + \ 79 | "}while(a + b <= 4);" 80 | 81 | forStmt: str = "for(int i = 0; i < 10; ++i){ \n" + \ 82 | " int a = 1;\n" + \ 83 | " b = c + a;" + \ 84 | " if (a > 0)\n" + \ 85 | " break;\n" + \ 86 | " func(a);\n" + \ 87 | "}" 88 | 89 | forRangeStmt: str = "for (unsigned int * p: vec){\n" + \ 90 | " unsigned int a = b + c, d{a}, e(8);\n" + \ 91 | " char source[100], *dst[100], p[50][40];\n" + \ 92 | " if (staticTrue)\n" + \ 93 | " break;\n" + \ 94 | " func(a);\n" + \ 95 | " }" 96 | 97 | parser: CPP14Parser = getParser(forRangeStmt) 98 | tree: ParseTree = parser.iterationstatement() 99 | builder: FunctionContentBuilder = FunctionContentBuilder() 100 | builder.stack.append(Statement()) 101 | walker.walk(builder, tree) 102 | 103 | # whileStatement: WhileStatement = builder.stack.pop() 104 | # doStatement: DoStatement = builder.stack.pop() 105 | # forStatement: ForStatement = builder.stack.pop() 106 | forRangeStatement: ForRangeStatement = builder.stack.pop() 107 | 108 | # compCFG: CFG = CCFGFactory.newWhileInstance(whileStatement) 109 | # compCFG: CFG = CCFGFactory.newDoInstance(doStatement) 110 | # compCFG: CFG = CCFGFactory.newForInstance(forStatement) 111 | compCFG: CFG = CCFGFactory.newForRangeInstance(forRangeStatement) 112 | 113 | for node, edges in compCFG.outNeighborhood.items(): 114 | print(node) 115 | for edge in edges: 116 | print(edge) 117 | return 118 | 119 | 120 | def testTry(): 121 | code: str = "try{\n" + \ 122 | " const int& a = 1;\n" + \ 123 | " }catch(const int& e){\n" + \ 124 | " }catch(...){\n" + \ 125 | " }" 126 | 127 | parser: CPP14Parser = getParser(code) 128 | tree: ParseTree = parser.tryblock() 129 | builder: FunctionContentBuilder = FunctionContentBuilder() 130 | builder.stack.append(Statement()) 131 | walker.walk(builder, tree) 132 | 133 | tryStatement: TryStatement = builder.stack.pop() 134 | compCFG: CFG = CCFGFactory.newTryInstance(tryStatement) 135 | return 136 | 137 | 138 | def testFunctionDef(): 139 | code = "static void goodG2B2(int a)\n" + \ 140 | "{\n" + \ 141 | " char * data;\n" + \ 142 | " data = NULL;\n" + \ 143 | " switch(6)\n" + \ 144 | " {\n" + \ 145 | " case 6:\n" + \ 146 | " /* FIX: Allocate and point data to a large buffer that is at least as large as the large buffer used in the sink */\n" + \ 147 | " data = (char *)malloc(100*sizeof(char));\n" + \ 148 | " if (data == NULL) {exit(-1);}\n" + \ 149 | " data[0] = '\\0'; /* null terminate */\n" + \ 150 | " break;\n" + \ 151 | " default:\n" + \ 152 | " /* INCIDENTAL: CWE 561 Dead Code, the code below will never run */\n" + \ 153 | " printLine(\"Benign, fixed string\");\n" + \ 154 | " break;\n" + \ 155 | " }\n" + \ 156 | " {\n" + \ 157 | " size_t i;\n" + \ 158 | " char source[100];\n" + \ 159 | " memset(source, 'C', 100-1); /* fill with 'C's */\n" + \ 160 | " source[100-1] = '\\0'; /* null terminate */\n" + \ 161 | " /* POTENTIAL FLAW: Possible buffer overflow if source is larger than data */\n" + \ 162 | " for (i = 0; i < 100; i++)\n" + \ 163 | " {\n" + \ 164 | " data[i] = source[i];\n" + \ 165 | " }\n" + \ 166 | " data[100-1] = '\\0'; /* Ensure the destination buffer is null terminated */\n" + \ 167 | " printLine(data);\n" + \ 168 | " free(data);\n" + \ 169 | " }\n" + \ 170 | "}" 171 | 172 | parser: CPP14Parser = getParser(code) 173 | tree: ParseTree = parser.functiondefinition() 174 | builder: FunctionDefBuilder = FunctionDefBuilder() 175 | walker.walk(builder, tree) 176 | 177 | functionDef: FunctionDef = builder.functionDef 178 | compCFG: CFG = ASTToCFGConvert(functionDef) 179 | 180 | 181 | for i, (node, edges) in enumerate(compCFG.outNeighborhood.items()): 182 | print(f"{i} : {node}") 183 | for edge in edges: 184 | print(edge) 185 | 186 | return 187 | 188 | if __name__ == '__main__': 189 | testIteration() -------------------------------------------------------------------------------- /test/mainToolTests/CPGBuildTest.py: -------------------------------------------------------------------------------- 1 | from mainTool.udg.astAnalyzers import ASTDefUseAnalyzer, CalleeInfos, CFGToUDGConverter 2 | from mainTool.ddg.DDGCreator import * 3 | from mainTool.cfg.CCFG import ASTToCFGConvert 4 | from mainTool.CPG import CPG 5 | from mainTool.cdg.CDG import * 6 | 7 | from mainTool.antlr.CPP14Lexer import CPP14Lexer, InputStream, CommonTokenStream 8 | from mainTool.ast.builders import * 9 | 10 | import json 11 | from time import time 12 | 13 | from antlr4.tree.Tree import ParseTree, ParseTreeWalker 14 | 15 | walker: ParseTreeWalker = ParseTreeWalker() 16 | 17 | 18 | astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer() 19 | calleeInfos: CalleeInfos = CalleeInfos() 20 | 21 | calleeInfos.addArgDef("memcpy", 0) 22 | calleeInfos.addArgUse("memcpy", 1) 23 | calleeInfos.addArgDef("memmove", 0) 24 | calleeInfos.addArgUse("memmove", 1) 25 | calleeInfos.addArgDef("memset", 0) 26 | calleeInfos.addArgDef("fgets", 0) 27 | calleeInfos.addArgUse("atoi", 1) 28 | calleeInfos.addArgDef("recv", 1) 29 | calleeInfos.addArgDefStartIds("scanf", 1) 30 | astAnalyzer.calleeInfos = calleeInfos 31 | 32 | 33 | def getParser(code: str) -> CPP14Parser: 34 | inputStream = InputStream(code) 35 | cpp14Lexer = CPP14Lexer(inputStream) 36 | tokenStream = CommonTokenStream(cpp14Lexer) 37 | parser = CPP14Parser(tokenStream) 38 | return parser 39 | 40 | 41 | code = "static void goodG2B2(int a)\n" + \ 42 | "{\n" + \ 43 | " char * data;\n" + \ 44 | " data = NULL;\n" + \ 45 | " switch(6)\n" + \ 46 | " {\n" + \ 47 | " case 6:\n" + \ 48 | " /* FIX: Allocate and point data to a large buffer that is at least as large as the large buffer used in the sink */\n" + \ 49 | " data = (char *)malloc(100*sizeof(char));\n" + \ 50 | " if (data == NULL) {exit(-1);}\n" + \ 51 | " data[0] = '\\0'; /* null terminate */\n" + \ 52 | " break;\n" + \ 53 | " default:\n" + \ 54 | " /* INCIDENTAL: CWE 561 Dead Code, the code below will never run */\n" + \ 55 | " printLine(\"Benign, fixed string\");\n" + \ 56 | " break;\n" + \ 57 | " }\n" + \ 58 | " {\n" + \ 59 | " size_t i;\n" + \ 60 | " char source[100];\n" + \ 61 | " memset(source, 'C', 100-1); /* fill with 'C's */\n" + \ 62 | " source[100-1] = '\\0'; /* null terminate */\n" + \ 63 | " /* POTENTIAL FLAW: Possible buffer overflow if source is larger than data */\n" + \ 64 | " for (i = 0; i < 100; i++)\n" + \ 65 | " {\n" + \ 66 | " data[i] = source[i];\n" + \ 67 | " }\n" + \ 68 | " data[100-1] = '\\0'; /* Ensure the destination buffer is null terminated */\n" + \ 69 | " printLine(data);\n" + \ 70 | " free(data);\n" + \ 71 | " return 0;\n" + \ 72 | " }\n" + \ 73 | "}" 74 | 75 | 76 | def test(): 77 | # AST 78 | start = time() 79 | parser: CPP14Parser = getParser(code) 80 | tree: ParseTree = parser.functiondefinition() 81 | builder: FunctionDefBuilder = FunctionDefBuilder() 82 | walker.walk(builder, tree) 83 | 84 | # CFG 85 | functionDef: FunctionDef = builder.functionDef 86 | compCFG: CFG = ASTToCFGConvert(functionDef) 87 | 88 | # UDG 89 | converter: CFGToUDGConverter = CFGToUDGConverter() 90 | converter.astAnalyzer = astAnalyzer 91 | useDefGraph: UseDefGraph = converter.convert(compCFG) 92 | 93 | # DefUseCFG 94 | defUseConverter: CFGAndUDGToDefUseCFG = CFGAndUDGToDefUseCFG() 95 | defUseCFG: DefUseCFG = defUseConverter.convert(compCFG, useDefGraph) 96 | 97 | # Data Dependence Graph 98 | ddgCreator: DDGCreator = DDGCreator() 99 | ddg: DDG = ddgCreator.createForDefUseCFG(defUseCFG) 100 | 101 | # Control Dependence Graph 102 | cdg: CDG = createCDG(compCFG) 103 | 104 | # Code Property Graph 105 | cpg: CPG = CPG() 106 | cpg.initCFGEdges(compCFG) 107 | cpg.initCDGEdges(cdg) 108 | cpg.initDDGEdges(ddg) 109 | 110 | end = time() 111 | 112 | jsonCPG: dict = cpg.toJson() 113 | jsonSerCPG: dict = cpg.toSerializedJson() 114 | new_cpg: CPG = CPG.fromJson(jsonCPG) 115 | print(json.dumps(jsonSerCPG, indent=2)) 116 | new_cpg1: CPG = CPG.fromSerJson(jsonSerCPG) 117 | print(end - start) 118 | return 119 | 120 | 121 | if __name__ == '__main__': 122 | test() -------------------------------------------------------------------------------- /test/mainToolTests/DDGBuildTest.py: -------------------------------------------------------------------------------- 1 | from mainTool.udg.useDefGraph import * 2 | from mainTool.udg.astAnalyzers import ASTDefUseAnalyzer, CalleeInfos, CFGToUDGConverter 3 | from mainTool.udg.astProvider import ASTNodeASTProvider 4 | from mainTool.ddg.DefUseGraph import DefUseCFG 5 | from mainTool.ddg.DDGCreator import * 6 | from mainTool.cfg.CCFG import ASTToCFGConvert 7 | 8 | from mainTool.antlr.CPP14Lexer import CPP14Lexer, InputStream, CommonTokenStream 9 | from mainTool.ast.builders import * 10 | 11 | from antlr4.tree.Tree import ParseTree, ParseTreeWalker 12 | from time import time 13 | walker: ParseTreeWalker = ParseTreeWalker() 14 | 15 | 16 | astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer() 17 | calleeInfos: CalleeInfos = CalleeInfos() 18 | 19 | calleeInfos.addArgDef("memcpy", 0) 20 | calleeInfos.addArgUse("memcpy", 1) 21 | calleeInfos.addArgDef("memmove", 0) 22 | calleeInfos.addArgUse("memmove", 1) 23 | calleeInfos.addArgDef("memset", 0) 24 | calleeInfos.addArgDef("fgets", 0) 25 | calleeInfos.addArgDef("recv", 1) 26 | calleeInfos.addArgDefStartIds("scanf", 1) 27 | astAnalyzer.calleeInfos = calleeInfos 28 | 29 | 30 | def getParser(code: str) -> CPP14Parser: 31 | inputStream = InputStream(code) 32 | cpp14Lexer = CPP14Lexer(inputStream) 33 | tokenStream = CommonTokenStream(cpp14Lexer) 34 | parser = CPP14Parser(tokenStream) 35 | return parser 36 | 37 | 38 | code = "static void goodG2B2(int a)\n" + \ 39 | "{\n" + \ 40 | " char * data;\n" + \ 41 | " data = NULL;\n" + \ 42 | " switch(6)\n" + \ 43 | " {\n" + \ 44 | " case 6:\n" + \ 45 | " /* FIX: Allocate and point data to a large buffer that is at least as large as the large buffer used in the sink */\n" + \ 46 | " data = (char *)malloc(100*sizeof(char));\n" + \ 47 | " if (data == NULL) {exit(-1);}\n" + \ 48 | " data[0] = '\\0'; /* null terminate */\n" + \ 49 | " break;\n" + \ 50 | " default:\n" + \ 51 | " /* INCIDENTAL: CWE 561 Dead Code, the code below will never run */\n" + \ 52 | " printLine(\"Benign, fixed string\");\n" + \ 53 | " break;\n" + \ 54 | " }\n" + \ 55 | " {\n" + \ 56 | " size_t i;\n" + \ 57 | " char source[100];\n" + \ 58 | " memset(source, 'C', 100-1); /* fill with 'C's */\n" + \ 59 | " source[100-1] = '\\0'; /* null terminate */\n" + \ 60 | " /* POTENTIAL FLAW: Possible buffer overflow if source is larger than data */\n" + \ 61 | " for (i = 0; i < 100; i++)\n" + \ 62 | " {\n" + \ 63 | " data[i] = source[i];\n" + \ 64 | " }\n" + \ 65 | " data[100-1] = '\\0'; /* Ensure the destination buffer is null terminated */\n" + \ 66 | " printLine(data);\n" + \ 67 | " free(data);\n" + \ 68 | " }\n" + \ 69 | "}" 70 | 71 | 72 | code1 = "void CWE124_Buffer_Underwrite__char_alloca_memmove_82_bad::action(char * data = NULL){\n" + \ 73 | " char source[100];\n" + \ 74 | " memset(source, 'C', 100-1);\n" + \ 75 | " source[100-1] = '\0';\n" + \ 76 | " memmove(data, source, 100*sizeof(char));\n" + \ 77 | " data[100-1] = '\0';\n" + \ 78 | " printLine(data);\n" + \ 79 | "}" 80 | 81 | code2 = "void CWE122_Heap_Based_Buffer_Overflow__c_CWE805_char_memmove_04_bad(){\n" + \ 82 | " char * data;\n" + \ 83 | " data = NULL;\n" + \ 84 | " data = (char *)malloc(50*sizeof(char));\n" + \ 85 | " if (data == NULL) {exit(-1);}\n" + \ 86 | " data[0] = '\0';\n" + \ 87 | " char source[100];\n" + \ 88 | " memset(source, 'C', 100-1);\n" + \ 89 | " source[100-1] = '\0';\n" + \ 90 | " memmove(data, source, 100*sizeof(char));\n" + \ 91 | " printLine(data);\n" + \ 92 | " free(data);\n" + \ 93 | "}" 94 | 95 | 96 | def test(): 97 | startTime = time() 98 | # AST 99 | parser: CPP14Parser = getParser(code) 100 | tree: ParseTree = parser.functiondefinition() 101 | builder: FunctionDefBuilder = FunctionDefBuilder() 102 | walker.walk(builder, tree) 103 | 104 | # CFG 105 | functionDef: FunctionDef = builder.functionDef 106 | compCFG: CFG = ASTToCFGConvert(functionDef) 107 | 108 | # UDG 109 | converter: CFGToUDGConverter = CFGToUDGConverter() 110 | converter.astAnalyzer = astAnalyzer 111 | useDefGraph: UseDefGraph = converter.convert(compCFG) 112 | 113 | # DefUseCFG 114 | defUseConverter: CFGAndUDGToDefUseCFG = CFGAndUDGToDefUseCFG() 115 | defUseCFG: DefUseCFG = defUseConverter.convert(compCFG, useDefGraph) 116 | 117 | # Data Dependence Graph 118 | ddgCreator: DDGCreator = DDGCreator() 119 | ddg: DDG = ddgCreator.createForDefUseCFG(defUseCFG) 120 | 121 | endTime = time() 122 | 123 | print(endTime - startTime) 124 | for edge in ddg.defUseEdges: 125 | print(f"{str(edge.src)} ----[{edge.symbol}] ----- {str(edge.dst)}") 126 | return 127 | 128 | if __name__ == '__main__': 129 | test() -------------------------------------------------------------------------------- /test/mainToolTests/FileParsingTest.py: -------------------------------------------------------------------------------- 1 | from mainTool.CPG import * 2 | from time import time 3 | 4 | 5 | def test(): 6 | start = time() 7 | fileName = "../testfiles/sard_test_cases/CWE121_new_goto.c" 8 | 9 | calleeInfs = { 10 | "ArgDef": { 11 | "memcpy": [0], 12 | "memmove": [0], 13 | "memset": [0], 14 | "fgets": [0], 15 | "recv": [1], 16 | }, 17 | "ArgUse": { 18 | "memcpy": [1], 19 | "memmove": [1], 20 | "memset": [1] 21 | }, 22 | "ArgDefStartIds": { 23 | "scanf": 1, 24 | "fscanf": 2 25 | } 26 | } 27 | calleeInfos = initialCalleeInfos(calleeInfs) 28 | converter: CFGToUDGConverter = CFGToUDGConverter() 29 | astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer() 30 | astAnalyzer.calleeInfos = calleeInfos 31 | converter.astAnalyzer = astAnalyzer 32 | defUseConverter: CFGAndUDGToDefUseCFG = CFGAndUDGToDefUseCFG() 33 | ddgCreator: DDGCreator = DDGCreator() 34 | cpgs: List[CPG] = fileParse(fileName, converter, defUseConverter, ddgCreator) 35 | end = time() 36 | print(end - start) 37 | return 38 | 39 | 40 | if __name__ == '__main__': 41 | test() -------------------------------------------------------------------------------- /test/mainToolTests/UDGBuildTest.py: -------------------------------------------------------------------------------- 1 | from mainTool.udg.useDefGraph import * 2 | from mainTool.udg.astAnalyzers import ASTDefUseAnalyzer, CalleeInfos 3 | from mainTool.udg.astProvider import ASTNodeASTProvider 4 | 5 | from mainTool.antlr.CPP14Lexer import CPP14Lexer, InputStream, CommonTokenStream 6 | from mainTool.ast.builders import * 7 | 8 | from antlr4.tree.Tree import ParseTree, ParseTreeWalker 9 | 10 | walker: ParseTreeWalker = ParseTreeWalker() 11 | 12 | 13 | astAnalyzer: ASTDefUseAnalyzer = ASTDefUseAnalyzer() 14 | calleeInfos: CalleeInfos = CalleeInfos() 15 | 16 | def getParser(code: str) -> CPP14Parser: 17 | inputStream = InputStream(code) 18 | cpp14Lexer = CPP14Lexer(inputStream) 19 | tokenStream = CommonTokenStream(cpp14Lexer) 20 | parser = CPP14Parser(tokenStream) 21 | return parser 22 | 23 | def testFuncCallStmt(): 24 | calleeInfos.addArgDef("memcpy", 0) 25 | calleeInfos.addArgUse("memcpy", 1) 26 | calleeInfos.addArgDef("memmove", 0) 27 | calleeInfos.addArgUse("memmove", 1) 28 | calleeInfos.addArgDef("memset", 0) 29 | calleeInfos.addArgDef("fgets", 0) 30 | calleeInfos.addArgDef("recv", 1) 31 | 32 | calleeInfos.addArgDefStartIds("scanf", 1) 33 | calleeInfos.addArgDefStartIds("fscanf", 2) 34 | astAnalyzer.calleeInfos = calleeInfos 35 | 36 | # function call 37 | code = "memcpy(data, source, 100*sizeof(char));" # 定义了 *data,使用了 data, source, *source 38 | code1 = "memset(source, 'C' ,100- 1);" # 定义了 *source,使用了source 39 | code2 = "scanf(\"%d-%d\", &a, &b);" 40 | coden = "fscanf(stdin, \"%d\", &data);" 41 | code3 = "fgets(inputBuffer, CHAR_ARRAY_SIZE, stdin);" 42 | code4 = "fgets(data+dataLen, (int)(FILENAME_MAX - dataLen), stdin);" 43 | code5 = "recvResult = recv(connectSocket, (char*)(data+dataLen) , " + \ 44 | "sizeof(char)*(FILENAME_MAX-dataLen-1), 0);" 45 | code6 = "recv(connectSocket, (char*)(data+dataLen) , sizeof(char)*(FILENAME_MAX-dataLen-1), 0);" 46 | 47 | # ptr access 48 | ptrAccessCode = "*(p + 1 + i) = *(a + j);" 49 | # array access 50 | arrayAccessCode = "p[1 + i] = a[j][i];" 51 | arrayAccessCode1 = "a[i][j] = b[1 + i];" 52 | 53 | # struct access 54 | structAccCode = "foo.bar = 10;" 55 | structAccCode1 = "foo->bar = foo1.f1.f2;" 56 | structAccCode2 = "structCharVoid->charFirst[(sizeof(structCharVoid->charFirst) / sizeof(char))-1] = '\0';" 57 | structAccCode3 = "structCharVoid->voidSecond = (void*)SRC_STR;" 58 | structAccCode4 = "memmove(structCharVoid->charFirst, SRC_STR, sizeof(*structCharVoid));" 59 | 60 | # assign 61 | assignCode1 = "*p = a;" 62 | assignCode2 = "*p += a;" 63 | 64 | parser: CPP14Parser = getParser(coden) 65 | tree: ParseTree = parser.statement() 66 | builder: FunctionContentBuilder = FunctionContentBuilder() 67 | builder.stack.append(CompoundStatement()) 68 | walker.walk(builder, tree) 69 | expressionStatement: ExpressionStatement = builder.stack[-1].getChild(0) 70 | 71 | provider: ASTNodeASTProvider = ASTNodeASTProvider() 72 | provider.node = expressionStatement 73 | usesAndDefs: Set[UseOrDef] = astAnalyzer.analyzeAST(provider) 74 | 75 | for useOrDef in usesAndDefs: 76 | print(f"{useOrDef.astProvider.getEscapedCodeStr()} --- {useOrDef.symbol} : {useOrDef.isDef}") 77 | return 78 | 79 | 80 | def testIdentifierDecl(): 81 | code = "char source[100] = '\0';" 82 | code1 = "char* dst = (char*)malloc(sizeof(char)*100);" 83 | code2 = "struct my_struct foo;" 84 | 85 | calleeInfos.addArgDef("recv", 1) 86 | astAnalyzer.calleeInfos = calleeInfos 87 | 88 | parser: CPP14Parser = getParser(code2) 89 | tree: ParseTree = parser.statement() 90 | builder: FunctionContentBuilder = FunctionContentBuilder() 91 | builder.stack.append(CompoundStatement()) 92 | walker.walk(builder, tree) 93 | declStatement: IdentifierDeclStatement = builder.stack[-1].getChild(0) 94 | 95 | provider: ASTNodeASTProvider = ASTNodeASTProvider() 96 | provider.node = declStatement 97 | usesAndDefs: Set[UseOrDef] = astAnalyzer.analyzeAST(provider) 98 | 99 | for useOrDef in usesAndDefs: 100 | print(f"{useOrDef.astProvider.getEscapedCodeStr()} --- {useOrDef.symbol} : {useOrDef.isDef}") 101 | return 102 | 103 | 104 | if __name__ == '__main__': 105 | testFuncCallStmt() -------------------------------------------------------------------------------- /test/mainToolTests/parsingCode.py: -------------------------------------------------------------------------------- 1 | from mainTool.antlr.CPP14Lexer import CPP14Lexer, InputStream, CommonTokenStream 2 | from mainTool.antlr.CPP14Parser import CPP14Parser 3 | 4 | class A(object): 5 | pass 6 | 7 | class B(A): 8 | pass 9 | 10 | class C(B): 11 | pass 12 | 13 | if __name__ == '__main__': 14 | code = "while(x <= 1){\n x++;\n ++x;\n}" 15 | inputStream = InputStream(code) 16 | cpp14Lexer = CPP14Lexer(inputStream) 17 | tokenStream = CommonTokenStream(cpp14Lexer) 18 | parser = CPP14Parser(tokenStream) 19 | tree = parser.iterationstatement() 20 | print("========") 21 | 22 | set1 = set([(1, 2), (1, 3), (1, 4)]) 23 | set2 = set([(1, 2)]) 24 | print(set2.difference(set1)) 25 | 26 | print({"main", "memset", "memmove", "fscanf", "time", "printf", "wprintf", "puts", 27 | "sscanf", "isxdigit", "iswxdigit", "swscanf", "rand", 28 | "malloc", "free", "srand"}) 29 | -------------------------------------------------------------------------------- /test/testfiles/ComplexStruct.c: -------------------------------------------------------------------------------- 1 | struct Date { 2 | int d, m, y ; 3 | void init(int dd, int mm, int yy) { //对三个成员变量进行初始化 4 | d = dd; 5 | m = mm; 6 | y = yy; 7 | } 8 | void print() { //打印类的具体对象 9 | cout << y << "-" << m << "-" << d << endl; 10 | } 11 | }; -------------------------------------------------------------------------------- /test/testfiles/IdentifierDeclTest.c: -------------------------------------------------------------------------------- 1 | 2 | void main(){ 3 | unsigned int a = 1, b{10}, c(11), *p; 4 | a = 1; 5 | struct St s; 6 | } -------------------------------------------------------------------------------- /test/testfiles/inputcases: -------------------------------------------------------------------------------- 1 | fscanf(stdin, "%d", &data); 2 | fscanf(stdin, "%u", &data); 3 | fscanf(stdin, "%zu", &data); 4 | 5 | recvResult = recv(connectSocket, inputBuffer, CHAR_ARRAY_SIZE - 1, 0); 6 | recvResult = recv(connectSocket, (char*)(data+dataLen) , sizeof(char)*(FILENAME_MAX-dataLen-1), 0); 7 | 8 | fgets(inputBuffer, CHAR_ARRAY_SIZE, stdin); 9 | fgets(data+dataLen, (int)(100-dataLen), stdin); 10 | fgets(data+dataLen, (int)(FILENAME_MAX - dataLen), stdin); 11 | fgetws(data+dataLen , (int)(100 - dataLen), pFile); 12 | 13 | cin >> buf; -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE119_1.c: -------------------------------------------------------------------------------- 1 | #include "std_testcase.h" 2 | 3 | #include 4 | 5 | wchar_t * CWE127_Buffer_Underread__wchar_t_declare_memmove_68_badData; 6 | wchar_t * CWE127_Buffer_Underread__wchar_t_declare_memmove_68_goodG2BData; 7 | 8 | #ifndef OMITBAD 9 | 10 | /* bad function declaration */ 11 | void CWE127_Buffer_Underread__wchar_t_declare_memmove_68b_badSink(); 12 | 13 | void CWE127_Buffer_Underread__wchar_t_declare_memmove_68_bad() 14 | { 15 | wchar_t * data; 16 | wchar_t dataBuffer[100]; 17 | wmemset(dataBuffer, L'A', 100-1); 18 | dataBuffer[100-1] = L'\0'; 19 | /* FLAW: Set data pointer to before the allocated memory buffer */ 20 | data = dataBuffer - 8; 21 | CWE127_Buffer_Underread__wchar_t_declare_memmove_68_badData = data; 22 | CWE127_Buffer_Underread__wchar_t_declare_memmove_68b_badSink(); 23 | } 24 | 25 | #endif /* OMITBAD */ 26 | 27 | #ifndef OMITGOOD 28 | 29 | /* good function declarations */ 30 | void CWE127_Buffer_Underread__wchar_t_declare_memmove_68b_goodG2BSink(); 31 | 32 | /* goodG2B uses the GoodSource with the BadSink */ 33 | static void goodG2B() 34 | { 35 | wchar_t * data; 36 | wchar_t dataBuffer[100]; 37 | wmemset(dataBuffer, L'A', 100-1); 38 | dataBuffer[100-1] = L'\0'; 39 | /* FIX: Set data pointer to the allocated memory buffer */ 40 | data = dataBuffer; 41 | CWE127_Buffer_Underread__wchar_t_declare_memmove_68_goodG2BData = data; 42 | CWE127_Buffer_Underread__wchar_t_declare_memmove_68b_goodG2BSink(); 43 | } 44 | 45 | void CWE127_Buffer_Underread__wchar_t_declare_memmove_68_good() 46 | { 47 | goodG2B(); 48 | } 49 | 50 | #endif /* OMITGOOD */ 51 | 52 | /* Below is the main(). It is only used when building this testcase on 53 | * its own for testing or for building a binary to use in testing binary 54 | * analysis tools. It is not used when compiling all the testcases as one 55 | * application, which is how source code analysis tools are tested. 56 | */ 57 | 58 | #ifdef INCLUDEMAIN 59 | 60 | int main(int argc, char * argv[]) 61 | { 62 | /* seed randomness */ 63 | srand( (unsigned)time(NULL) ); 64 | #ifndef OMITGOOD 65 | printLine("Calling good()..."); 66 | CWE127_Buffer_Underread__wchar_t_declare_memmove_68_good(); 67 | printLine("Finished good()"); 68 | #endif /* OMITGOOD */ 69 | #ifndef OMITBAD 70 | printLine("Calling bad()..."); 71 | CWE127_Buffer_Underread__wchar_t_declare_memmove_68_bad(); 72 | printLine("Finished bad()"); 73 | #endif /* OMITBAD */ 74 | return 0; 75 | } -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE121_new_goto.c: -------------------------------------------------------------------------------- 1 | #include "std_testcase.h" 2 | 3 | #ifndef _WIN32 4 | #include 5 | #endif 6 | 7 | /* SRC_STR is 32 char long, including the null terminator, for 64-bit architectures */ 8 | #define SRC_STR "0123456789abcdef0123456789abcde" 9 | 10 | typedef struct _charVoid 11 | { 12 | char charFirst[16]; 13 | void * voidSecond; 14 | void * voidThird; 15 | } charVoid; 16 | 17 | #ifndef OMITBAD 18 | 19 | void CWE121_Stack_Based_Buffer_Overflow__char_type_overrun_memmove_18_bad() 20 | { 21 | goto sink; 22 | sink: 23 | { 24 | charVoid structCharVoid; 25 | structCharVoid.voidSecond = (void *)SRC_STR; 26 | /* Print the initial block pointed to by structCharVoid.voidSecond */ 27 | printLine((char *)structCharVoid.voidSecond); 28 | /* FLAW: Use the sizeof(structCharVoid) which will overwrite the pointer voidSecond */ 29 | memmove(structCharVoid.charFirst, SRC_STR, sizeof(structCharVoid)); 30 | structCharVoid.charFirst[(sizeof(structCharVoid.charFirst)/sizeof(char))-1] = '\0'; /* null terminate the string */ 31 | printLine((char *)structCharVoid.charFirst); 32 | printLine((char *)structCharVoid.voidSecond); 33 | } 34 | } 35 | 36 | #endif /* OMITBAD */ 37 | 38 | #ifndef OMITGOOD 39 | 40 | /* good1() reverses the blocks on the goto statement */ 41 | static void good1() 42 | { 43 | goto sink; 44 | sink: 45 | { 46 | charVoid structCharVoid; 47 | structCharVoid.voidSecond = (void *)SRC_STR; 48 | /* Print the initial block pointed to by structCharVoid.voidSecond */ 49 | printLine((char *)structCharVoid.voidSecond); 50 | /* FIX: Use sizeof(structCharVoid.charFirst) to avoid overwriting the pointer voidSecond */ 51 | memmove(structCharVoid.charFirst, SRC_STR, sizeof(structCharVoid.charFirst)); 52 | structCharVoid.charFirst[(sizeof(structCharVoid.charFirst)/sizeof(char))-1] = '\0'; /* null terminate the string */ 53 | printLine((char *)structCharVoid.charFirst); 54 | printLine((char *)structCharVoid.voidSecond); 55 | } 56 | } 57 | 58 | void CWE121_Stack_Based_Buffer_Overflow__char_type_overrun_memmove_18_good() 59 | { 60 | good1(); 61 | } 62 | 63 | #endif /* OMITGOOD */ 64 | 65 | /* Below is the main(). It is only used when building this testcase on 66 | its own for testing or for building a binary to use in testing binary 67 | analysis tools. It is not used when compiling all the testcases as one 68 | application, which is how source code analysis tools are tested. */ 69 | 70 | #ifdef INCLUDEMAIN 71 | 72 | int main(int argc, char * argv[]) 73 | { 74 | /* seed randomness */ 75 | srand( (unsigned)time(NULL) ); 76 | #ifndef OMITGOOD 77 | printLine("Calling good()..."); 78 | CWE121_Stack_Based_Buffer_Overflow__char_type_overrun_memmove_18_good(); 79 | printLine("Finished good()"); 80 | #endif /* OMITGOOD */ 81 | #ifndef OMITBAD 82 | printLine("Calling bad()..."); 83 | CWE121_Stack_Based_Buffer_Overflow__char_type_overrun_memmove_18_bad(); 84 | printLine("Finished bad()"); 85 | #endif /* OMITBAD */ 86 | return 0; 87 | } 88 | 89 | #endif -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53a.c: -------------------------------------------------------------------------------- 1 | /* TEMPLATE GENERATED TESTCASE FILE 2 | Filename: CWE123_Write_What_Where_Condition__connect_socket_53a.c 3 | Label Definition File: CWE123_Write_What_Where_Condition.label.xml 4 | Template File: sources-sink-53a.tmpl.c 5 | */ 6 | /* 7 | * @description 8 | * CWE: 123 Write-What-Where Condition 9 | * BadSource: connect_socket Overwrite linked list pointers using a connect socket (client side) 10 | * GoodSource: Don't overwrite linked list pointers 11 | * Sink: 12 | * BadSink : Remove element from list 13 | * Flow Variant: 53 Data flow: data passed as an argument from one function through two others to a fourth; all four functions are in different source files 14 | * 15 | * */ 16 | 17 | #include "std_testcase.h" 18 | 19 | typedef struct _linkedList 20 | { 21 | struct _linkedList *next; 22 | struct _linkedList *prev; 23 | } linkedList; 24 | 25 | typedef struct _badStruct 26 | { 27 | linkedList list; 28 | } badStruct; 29 | 30 | static linkedList *linkedListPrev, *linkedListNext; 31 | 32 | #ifdef _WIN32 33 | #include 34 | #include 35 | #include 36 | #pragma comment(lib, "ws2_32") /* include ws2_32.lib when linking */ 37 | #define CLOSE_SOCKET closesocket 38 | #else /* NOT _WIN32 */ 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #define INVALID_SOCKET -1 45 | #define SOCKET_ERROR -1 46 | #define CLOSE_SOCKET close 47 | #define SOCKET int 48 | #endif 49 | 50 | #define TCP_PORT 27015 51 | #define IP_ADDRESS "127.0.0.1" 52 | 53 | #ifndef OMITBAD 54 | 55 | /* bad function declaration */ 56 | void CWE123_Write_What_Where_Condition__connect_socket_53b_badSink(badStruct data); 57 | 58 | void CWE123_Write_What_Where_Condition__connect_socket_53_bad() 59 | { 60 | badStruct data; 61 | linkedList head = { &head, &head }; 62 | /* This simulates a Microsoft-style linked list insertion */ 63 | data.list.next = head.next; 64 | data.list.prev = head.prev; 65 | head.next = &data.list; 66 | head.prev = &data.list; 67 | { 68 | #ifdef _WIN32 69 | WSADATA wsaData; 70 | int wsaDataInit = 0; 71 | #endif 72 | int recvResult; 73 | struct sockaddr_in service; 74 | SOCKET connectSocket = INVALID_SOCKET; 75 | do 76 | { 77 | #ifdef _WIN32 78 | if (WSAStartup(MAKEWORD(2,2), &wsaData) != NO_ERROR) 79 | { 80 | break; 81 | } 82 | wsaDataInit = 1; 83 | #endif 84 | connectSocket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 85 | if (connectSocket == INVALID_SOCKET) 86 | { 87 | break; 88 | } 89 | memset(&service, 0, sizeof(service)); 90 | service.sin_family = AF_INET; 91 | service.sin_addr.s_addr = inet_addr(IP_ADDRESS); 92 | service.sin_port = htons(TCP_PORT); 93 | if (connect(connectSocket, (struct sockaddr*)&service, sizeof(service)) == SOCKET_ERROR) 94 | { 95 | break; 96 | } 97 | /* Abort on error or the connection was closed, make sure to recv one 98 | * less char than is in the recv_buf in order to append a terminator */ 99 | /* FLAW: overwrite linked list pointers with data */ 100 | recvResult = recv(connectSocket, (char*)&data, sizeof(data), 0); 101 | if (recvResult == SOCKET_ERROR || recvResult == 0) 102 | { 103 | break; 104 | } 105 | } 106 | while (0); 107 | if (connectSocket != INVALID_SOCKET) 108 | { 109 | CLOSE_SOCKET(connectSocket); 110 | } 111 | #ifdef _WIN32 112 | if (wsaDataInit) 113 | { 114 | WSACleanup(); 115 | } 116 | #endif 117 | } 118 | CWE123_Write_What_Where_Condition__connect_socket_53b_badSink(data); 119 | } 120 | 121 | #endif /* OMITBAD */ 122 | 123 | #ifndef OMITGOOD 124 | 125 | /* good function declaration */ 126 | void CWE123_Write_What_Where_Condition__connect_socket_53b_goodG2BSink(badStruct data); 127 | 128 | /* goodG2B uses the GoodSource with the BadSink */ 129 | static void goodG2B() 130 | { 131 | badStruct data; 132 | linkedList head = { &head, &head }; 133 | /* This simulates a Microsoft-style linked list insertion */ 134 | data.list.next = head.next; 135 | data.list.prev = head.prev; 136 | head.next = &data.list; 137 | head.prev = &data.list; 138 | /* FIX: don't overwrite linked list pointers */ 139 | ; /* empty statement needed by some flow variants */ 140 | CWE123_Write_What_Where_Condition__connect_socket_53b_goodG2BSink(data); 141 | } 142 | 143 | void CWE123_Write_What_Where_Condition__connect_socket_53_good() 144 | { 145 | goodG2B(); 146 | } 147 | 148 | #endif /* OMITGOOD */ 149 | 150 | /* Below is the main(). It is only used when building this testcase on 151 | * its own for testing or for building a binary to use in testing binary 152 | * analysis tools. It is not used when compiling all the testcases as one 153 | * application, which is how source code analysis tools are tested. 154 | */ 155 | 156 | #ifdef INCLUDEMAIN 157 | 158 | int main(int argc, char * argv[]) 159 | { 160 | /* seed randomness */ 161 | srand( (unsigned)time(NULL) ); 162 | #ifndef OMITGOOD 163 | printLine("Calling good()..."); 164 | CWE123_Write_What_Where_Condition__connect_socket_53_good(); 165 | printLine("Finished good()"); 166 | #endif /* OMITGOOD */ 167 | #ifndef OMITBAD 168 | printLine("Calling bad()..."); 169 | CWE123_Write_What_Where_Condition__connect_socket_53_bad(); 170 | printLine("Finished bad()"); 171 | #endif /* OMITBAD */ 172 | return 0; 173 | } 174 | 175 | #endif 176 | -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53b.c: -------------------------------------------------------------------------------- 1 | /* TEMPLATE GENERATED TESTCASE FILE 2 | Filename: CWE123_Write_What_Where_Condition__connect_socket_53b.c 3 | Label Definition File: CWE123_Write_What_Where_Condition.label.xml 4 | Template File: sources-sink-53b.tmpl.c 5 | */ 6 | /* 7 | * @description 8 | * CWE: 123 Write-What-Where Condition 9 | * BadSource: connect_socket Overwrite linked list pointers using a connect socket (client side) 10 | * GoodSource: Don't overwrite linked list pointers 11 | * Sink: 12 | * BadSink : Remove element from list 13 | * Flow Variant: 53 Data flow: data passed as an argument from one function through two others to a fourth; all four functions are in different source files 14 | * 15 | * */ 16 | 17 | #include "std_testcase.h" 18 | 19 | typedef struct _linkedList 20 | { 21 | struct _linkedList *next; 22 | struct _linkedList *prev; 23 | } linkedList; 24 | 25 | typedef struct _badStruct 26 | { 27 | linkedList list; 28 | } badStruct; 29 | 30 | static linkedList *linkedListPrev, *linkedListNext; 31 | 32 | #ifdef _WIN32 33 | #include 34 | #include 35 | #include 36 | #pragma comment(lib, "ws2_32") /* include ws2_32.lib when linking */ 37 | #define CLOSE_SOCKET closesocket 38 | #else /* NOT _WIN32 */ 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #define INVALID_SOCKET -1 45 | #define SOCKET_ERROR -1 46 | #define CLOSE_SOCKET close 47 | #define SOCKET int 48 | #endif 49 | 50 | #define TCP_PORT 27015 51 | #define IP_ADDRESS "127.0.0.1" 52 | 53 | /* all the sinks are the same, we just want to know where the hit originated if a tool flags one */ 54 | 55 | #ifndef OMITBAD 56 | 57 | /* bad function declaration */ 58 | void CWE123_Write_What_Where_Condition__connect_socket_53c_badSink(badStruct data); 59 | 60 | void CWE123_Write_What_Where_Condition__connect_socket_53b_badSink(badStruct data) 61 | { 62 | CWE123_Write_What_Where_Condition__connect_socket_53c_badSink(data); 63 | } 64 | 65 | #endif /* OMITBAD */ 66 | 67 | #ifndef OMITGOOD 68 | 69 | /* good function declaration */ 70 | void CWE123_Write_What_Where_Condition__connect_socket_53c_goodG2BSink(badStruct data); 71 | 72 | /* goodG2B uses the GoodSource with the BadSink */ 73 | void CWE123_Write_What_Where_Condition__connect_socket_53b_goodG2BSink(badStruct data) 74 | { 75 | CWE123_Write_What_Where_Condition__connect_socket_53c_goodG2BSink(data); 76 | } 77 | 78 | #endif /* OMITGOOD */ 79 | -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53c.c: -------------------------------------------------------------------------------- 1 | /* TEMPLATE GENERATED TESTCASE FILE 2 | Filename: CWE123_Write_What_Where_Condition__connect_socket_53c.c 3 | Label Definition File: CWE123_Write_What_Where_Condition.label.xml 4 | Template File: sources-sink-53c.tmpl.c 5 | */ 6 | /* 7 | * @description 8 | * CWE: 123 Write-What-Where Condition 9 | * BadSource: connect_socket Overwrite linked list pointers using a connect socket (client side) 10 | * GoodSource: Don't overwrite linked list pointers 11 | * Sink: 12 | * BadSink : Remove element from list 13 | * Flow Variant: 53 Data flow: data passed as an argument from one function through two others to a fourth; all four functions are in different source files 14 | * 15 | * */ 16 | 17 | #include "std_testcase.h" 18 | 19 | typedef struct _linkedList 20 | { 21 | struct _linkedList *next; 22 | struct _linkedList *prev; 23 | } linkedList; 24 | 25 | typedef struct _badStruct 26 | { 27 | linkedList list; 28 | } badStruct; 29 | 30 | static linkedList *linkedListPrev, *linkedListNext; 31 | 32 | #ifdef _WIN32 33 | #include 34 | #include 35 | #include 36 | #pragma comment(lib, "ws2_32") /* include ws2_32.lib when linking */ 37 | #define CLOSE_SOCKET closesocket 38 | #else /* NOT _WIN32 */ 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #define INVALID_SOCKET -1 45 | #define SOCKET_ERROR -1 46 | #define CLOSE_SOCKET close 47 | #define SOCKET int 48 | #endif 49 | 50 | #define TCP_PORT 27015 51 | #define IP_ADDRESS "127.0.0.1" 52 | 53 | /* all the sinks are the same, we just want to know where the hit originated if a tool flags one */ 54 | 55 | #ifndef OMITBAD 56 | 57 | /* bad function declaration */ 58 | void CWE123_Write_What_Where_Condition__connect_socket_53d_badSink(badStruct data); 59 | 60 | void CWE123_Write_What_Where_Condition__connect_socket_53c_badSink(badStruct data) 61 | { 62 | CWE123_Write_What_Where_Condition__connect_socket_53d_badSink(data); 63 | } 64 | 65 | #endif /* OMITBAD */ 66 | 67 | #ifndef OMITGOOD 68 | 69 | /* good function declaration */ 70 | void CWE123_Write_What_Where_Condition__connect_socket_53d_goodG2BSink(badStruct data); 71 | 72 | /* goodG2B uses the GoodSource with the BadSink */ 73 | void CWE123_Write_What_Where_Condition__connect_socket_53c_goodG2BSink(badStruct data) 74 | { 75 | CWE123_Write_What_Where_Condition__connect_socket_53d_goodG2BSink(data); 76 | } 77 | 78 | #endif /* OMITGOOD */ 79 | -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE123_Write_What_Where_Condition__connect_socket_53d.c: -------------------------------------------------------------------------------- 1 | /* TEMPLATE GENERATED TESTCASE FILE 2 | Filename: CWE123_Write_What_Where_Condition__connect_socket_53d.c 3 | Label Definition File: CWE123_Write_What_Where_Condition.label.xml 4 | Template File: sources-sink-53d.tmpl.c 5 | */ 6 | /* 7 | * @description 8 | * CWE: 123 Write-What-Where Condition 9 | * BadSource: connect_socket Overwrite linked list pointers using a connect socket (client side) 10 | * GoodSource: Don't overwrite linked list pointers 11 | * Sink: 12 | * BadSink : Remove element from list 13 | * Flow Variant: 53 Data flow: data passed as an argument from one function through two others to a fourth; all four functions are in different source files 14 | * 15 | * */ 16 | 17 | #include "std_testcase.h" 18 | 19 | typedef struct _linkedList 20 | { 21 | struct _linkedList *next; 22 | struct _linkedList *prev; 23 | } linkedList; 24 | 25 | typedef struct _badStruct 26 | { 27 | linkedList list; 28 | } badStruct; 29 | 30 | static linkedList *linkedListPrev, *linkedListNext; 31 | 32 | #ifdef _WIN32 33 | #include 34 | #include 35 | #include 36 | #pragma comment(lib, "ws2_32") /* include ws2_32.lib when linking */ 37 | #define CLOSE_SOCKET closesocket 38 | #else /* NOT _WIN32 */ 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #define INVALID_SOCKET -1 45 | #define SOCKET_ERROR -1 46 | #define CLOSE_SOCKET close 47 | #define SOCKET int 48 | #endif 49 | 50 | #define TCP_PORT 27015 51 | #define IP_ADDRESS "127.0.0.1" 52 | 53 | /* all the sinks are the same, we just want to know where the hit originated if a tool flags one */ 54 | 55 | #ifndef OMITBAD 56 | 57 | void CWE123_Write_What_Where_Condition__connect_socket_53d_badSink(badStruct data) 58 | { 59 | /* POTENTIAL FLAW: The following removes 'a' from the list. Because of the possible overflow this 60 | * causes a "write-what-where" aka "write4". It does another write as 61 | * well. But this is the prototypical "write-what-where" at least from 62 | * the Windows perspective. 63 | * 64 | * linkedListPrev = a->list->prev WHAT 65 | * linkedListNext = a->list->next WHERE 66 | * linkedListPrev->next = linkedListNext "at the address that prev/WHERE points, write 67 | * next/WHAT" 68 | * aka "write-what-where" 69 | * linkedListNext->prev = linkedListPrev "at the address that next/WHAT points plus 4 70 | * (because prev is the second field in 'list' hence 71 | * 4 bytes away on 32b machines), write prev/WHERE" 72 | */ 73 | linkedListPrev = data.list.prev; 74 | linkedListNext = data.list.next; 75 | linkedListPrev->next = linkedListNext; 76 | linkedListNext->prev = linkedListPrev; 77 | } 78 | 79 | #endif /* OMITBAD */ 80 | 81 | #ifndef OMITGOOD 82 | 83 | /* goodG2B uses the GoodSource with the BadSink */ 84 | void CWE123_Write_What_Where_Condition__connect_socket_53d_goodG2BSink(badStruct data) 85 | { 86 | /* POTENTIAL FLAW: The following removes 'a' from the list. Because of the possible overflow this 87 | * causes a "write-what-where" aka "write4". It does another write as 88 | * well. But this is the prototypical "write-what-where" at least from 89 | * the Windows perspective. 90 | * 91 | * linkedListPrev = a->list->prev WHAT 92 | * linkedListNext = a->list->next WHERE 93 | * linkedListPrev->next = linkedListNext "at the address that prev/WHERE points, write 94 | * next/WHAT" 95 | * aka "write-what-where" 96 | * linkedListNext->prev = linkedListPrev "at the address that next/WHAT points plus 4 97 | * (because prev is the second field in 'list' hence 98 | * 4 bytes away on 32b machines), write prev/WHERE" 99 | */ 100 | linkedListPrev = data.list.prev; 101 | linkedListNext = data.list.next; 102 | linkedListPrev->next = linkedListNext; 103 | linkedListNext->prev = linkedListPrev; 104 | } 105 | 106 | #endif /* OMITGOOD */ 107 | -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE_119_122_Struct.c: -------------------------------------------------------------------------------- 1 | #include "std_testcase.h" 2 | 3 | #include 4 | 5 | typedef struct _CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67_structType 6 | { 7 | wchar_t * structFirst; 8 | } CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67_structType; 9 | 10 | #ifndef OMITBAD 11 | 12 | /* bad function declaration */ 13 | void CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67b_badSink(CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67_structType myStruct); 14 | 15 | void CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67_bad() 16 | { 17 | wchar_t * data; 18 | CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67_structType myStruct; 19 | data = (wchar_t *)malloc(100*sizeof(wchar_t)); 20 | /* FLAW: Initialize data as a large buffer that is larger than the small buffer used in the sink */ 21 | wmemset(data, L'A', 100-1); /* fill with L'A's */ 22 | data[100-1] = L'\0'; /* null terminate */ 23 | myStruct.structFirst = data; 24 | CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67b_badSink(myStruct); 25 | } 26 | 27 | #endif /* OMITBAD */ 28 | 29 | #ifndef OMITGOOD 30 | 31 | /* goodG2B uses the GoodSource with the BadSink */ 32 | void CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67b_goodG2BSink(CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67_structType myStruct); 33 | 34 | static void goodG2B() 35 | { 36 | wchar_t * data; 37 | CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67_structType myStruct; 38 | data = (wchar_t *)malloc(100*sizeof(wchar_t)); 39 | /* FIX: Initialize data as a small buffer that as small or smaller than the small buffer used in the sink */ 40 | wmemset(data, L'A', 50-1); /* fill with L'A's */ 41 | data[50-1] = L'\0'; /* null terminate */ 42 | myStruct.structFirst = data; 43 | CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67b_goodG2BSink(myStruct); 44 | } 45 | 46 | void CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67_good() 47 | { 48 | goodG2B(); 49 | } 50 | 51 | #endif /* OMITGOOD */ 52 | 53 | /* Below is the main(). It is only used when building this testcase on 54 | * its own for testing or for building a binary to use in testing binary 55 | * analysis tools. It is not used when compiling all the testcases as one 56 | * application, which is how source code analysis tools are tested. 57 | */ 58 | 59 | #ifdef INCLUDEMAIN 60 | 61 | int main(int argc, char * argv[]) 62 | { 63 | /* seed randomness */ 64 | srand( (unsigned)time(NULL) ); 65 | #ifndef OMITGOOD 66 | printLine("Calling good()..."); 67 | CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67_good(); 68 | printLine("Finished good()"); 69 | #endif /* OMITGOOD */ 70 | #ifndef OMITBAD 71 | printLine("Calling bad()..."); 72 | CWE122_Heap_Based_Buffer_Overflow__c_src_wchar_t_cpy_67_bad(); 73 | printLine("Finished bad()"); 74 | #endif /* OMITBAD */ 75 | return 0; 76 | } 77 | 78 | #endif -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE_119_122_switch.c: -------------------------------------------------------------------------------- 1 | #include "std_testcase.h" 2 | 3 | #include 4 | 5 | #ifndef OMITBAD 6 | 7 | void CWE122_Heap_Based_Buffer_Overflow__c_CWE805_char_loop_15_bad() 8 | { 9 | char * data; 10 | data = NULL; 11 | switch(6) 12 | { 13 | case 6: 14 | /* FLAW: Allocate and point data to a small buffer that is smaller than the large buffer used in the sinks */ 15 | data = (char *)malloc(50*sizeof(char)); 16 | if (data == NULL) {exit(-1);} 17 | data[0] = '\0'; /* null terminate */ 18 | break; 19 | default: 20 | /* INCIDENTAL: CWE 561 Dead Code, the code below will never run */ 21 | printLine("Benign, fixed string"); 22 | break; 23 | } 24 | { 25 | size_t i; 26 | char source[100]; 27 | memset(source, 'C', 100-1); /* fill with 'C's */ 28 | source[100-1] = '\0'; /* null terminate */ 29 | /* POTENTIAL FLAW: Possible buffer overflow if source is larger than data */ 30 | for (i = 0; i < 100; i++) 31 | { 32 | data[i] = source[i]; 33 | } 34 | data[100-1] = '\0'; /* Ensure the destination buffer is null terminated */ 35 | printLine(data); 36 | free(data); 37 | } 38 | } 39 | 40 | #endif /* OMITBAD */ 41 | 42 | #ifndef OMITGOOD 43 | 44 | /* goodG2B1() - use goodsource and badsink by changing the switch to switch(5) */ 45 | static void goodG2B1() 46 | { 47 | char * data; 48 | data = NULL; 49 | switch(5) 50 | { 51 | case 6: 52 | /* INCIDENTAL: CWE 561 Dead Code, the code below will never run */ 53 | printLine("Benign, fixed string"); 54 | break; 55 | default: 56 | /* FIX: Allocate and point data to a large buffer that is at least as large as the large buffer used in the sink */ 57 | data = (char *)malloc(100*sizeof(char)); 58 | if (data == NULL) {exit(-1);} 59 | data[0] = '\0'; /* null terminate */ 60 | break; 61 | } 62 | { 63 | size_t i; 64 | char source[100]; 65 | memset(source, 'C', 100-1); /* fill with 'C's */ 66 | source[100-1] = '\0'; /* null terminate */ 67 | /* POTENTIAL FLAW: Possible buffer overflow if source is larger than data */ 68 | for (i = 0; i < 100; i++) 69 | { 70 | data[i] = source[i]; 71 | } 72 | data[100-1] = '\0'; /* Ensure the destination buffer is null terminated */ 73 | printLine(data); 74 | free(data); 75 | } 76 | } 77 | 78 | /* goodG2B2() - use goodsource and badsink by reversing the blocks in the switch */ 79 | static void goodG2B2() 80 | { 81 | char * data; 82 | data = NULL; 83 | switch(6) 84 | { 85 | case 6: 86 | /* FIX: Allocate and point data to a large buffer that is at least as large as the large buffer used in the sink */ 87 | data = (char *)malloc(100*sizeof(char)); 88 | if (data == NULL) {exit(-1);} 89 | data[0] = '\0'; /* null terminate */ 90 | break; 91 | default: 92 | /* INCIDENTAL: CWE 561 Dead Code, the code below will never run */ 93 | printLine("Benign, fixed string"); 94 | break; 95 | } 96 | { 97 | size_t i; 98 | char source[100]; 99 | memset(source, 'C', 100-1); /* fill with 'C's */ 100 | source[100-1] = '\0'; /* null terminate */ 101 | /* POTENTIAL FLAW: Possible buffer overflow if source is larger than data */ 102 | for (i = 0; i < 100; i++) 103 | { 104 | data[i] = source[i]; 105 | } 106 | data[100-1] = '\0'; /* Ensure the destination buffer is null terminated */ 107 | printLine(data); 108 | free(data); 109 | } 110 | } 111 | 112 | void CWE122_Heap_Based_Buffer_Overflow__c_CWE805_char_loop_15_good() 113 | { 114 | goodG2B1(); 115 | goodG2B2(); 116 | } 117 | 118 | #endif /* OMITGOOD */ 119 | 120 | /* Below is the main(). It is only used when building this testcase on 121 | * its own for testing or for building a binary to use in testing binary 122 | * analysis tools. It is not used when compiling all the testcases as one 123 | * application, which is how source code analysis tools are tested. 124 | */ 125 | 126 | #ifdef INCLUDEMAIN 127 | 128 | int main(int argc, char * argv[]) 129 | { 130 | /* seed randomness */ 131 | srand( (unsigned)time(NULL) ); 132 | #ifndef OMITGOOD 133 | printLine("Calling good()..."); 134 | CWE122_Heap_Based_Buffer_Overflow__c_CWE805_char_loop_15_good(); 135 | printLine("Finished good()"); 136 | #endif /* OMITGOOD */ 137 | #ifndef OMITBAD 138 | printLine("Calling bad()..."); 139 | CWE122_Heap_Based_Buffer_Overflow__c_CWE805_char_loop_15_bad(); 140 | printLine("Finished bad()"); 141 | #endif /* OMITBAD */ 142 | return 0; 143 | } -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE_119_124_class_decl.c: -------------------------------------------------------------------------------- 1 | #include "std_testcase.h" 2 | 3 | #include 4 | 5 | namespace CWE124_Buffer_Underwrite__char_alloca_memmove_82 6 | { 7 | 8 | class CWE124_Buffer_Underwrite__char_alloca_memmove_82_base 9 | { 10 | public: 11 | /* pure virtual function */ 12 | virtual void action(char * data) = 0; 13 | }; 14 | 15 | #ifndef OMITBAD 16 | 17 | class CWE124_Buffer_Underwrite__char_alloca_memmove_82_bad : public CWE124_Buffer_Underwrite__char_alloca_memmove_82_base 18 | { 19 | public: 20 | void action(char * data); 21 | }; 22 | 23 | #endif /* OMITBAD */ 24 | 25 | #ifndef OMITGOOD 26 | 27 | class CWE124_Buffer_Underwrite__char_alloca_memmove_82_goodG2B : public CWE124_Buffer_Underwrite__char_alloca_memmove_82_base 28 | { 29 | public: 30 | void action(char * data); 31 | }; 32 | 33 | #endif /* OMITGOOD */ 34 | 35 | } -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE_119_124_class_method_decl.c: -------------------------------------------------------------------------------- 1 | #ifndef OMITBAD 2 | 3 | #include "std_testcase.h" 4 | #include "CWE124_Buffer_Underwrite__char_alloca_memmove_82.h" 5 | 6 | namespace CWE124_Buffer_Underwrite__char_alloca_memmove_82 7 | { 8 | 9 | void CWE124_Buffer_Underwrite__char_alloca_memmove_82_bad::action(char * data = NULL) 10 | { 11 | { 12 | char source[100]; 13 | memset(source, 'C', 100-1); /* fill with 'C's */ 14 | source[100-1] = '\0'; /* null terminate */ 15 | /* POTENTIAL FLAW: Possibly copying data to memory before the destination buffer */ 16 | memmove(data, source, 100*sizeof(char)); 17 | /* Ensure the destination buffer is null terminated */ 18 | data[100-1] = '\0'; 19 | printLine(data); 20 | } 21 | } 22 | 23 | } -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE_119_124_fscanf.c: -------------------------------------------------------------------------------- 1 | #include "std_testcase.h" 2 | 3 | #ifndef OMITBAD 4 | 5 | void CWE124_Buffer_Underwrite__CWE839_fscanf_17_bad() 6 | { 7 | int i,j; 8 | int data; 9 | /* Initialize data */ 10 | data = -1; 11 | for(i = 0; i < 1; i++) 12 | { 13 | /* POTENTIAL FLAW: Read data from the console using fscanf() */ 14 | fscanf(stdin, "%d", &data); 15 | } 16 | for(j = 0; j < 1; j++) 17 | { 18 | { 19 | int i; 20 | int buffer[10] = { 0 }; 21 | /* POTENTIAL FLAW: Attempt to access a negative index of the array 22 | * This code does not check to see if the array index is negative */ 23 | if (data < 10) 24 | { 25 | buffer[data] = 1; 26 | /* Print the array values */ 27 | for(i = 0; i < 10; i++) 28 | { 29 | printIntLine(buffer[i]); 30 | } 31 | } 32 | else 33 | { 34 | printLine("ERROR: Array index is negative."); 35 | } 36 | } 37 | } 38 | } 39 | 40 | #endif /* OMITBAD */ 41 | 42 | #ifndef OMITGOOD 43 | 44 | /* goodB2G() - use badsource and goodsink in the for statements */ 45 | static void goodB2G() 46 | { 47 | int i,k; 48 | int data; 49 | /* Initialize data */ 50 | data = -1; 51 | for(i = 0; i < 1; i++) 52 | { 53 | /* POTENTIAL FLAW: Read data from the console using fscanf() */ 54 | fscanf(stdin, "%d", &data); 55 | } 56 | for(k = 0; k < 1; k++) 57 | { 58 | { 59 | int i; 60 | int buffer[10] = { 0 }; 61 | /* FIX: Properly validate the array index and prevent a buffer underwrite */ 62 | if (data >= 0 && data < (10)) 63 | { 64 | buffer[data] = 1; 65 | /* Print the array values */ 66 | for(i = 0; i < 10; i++) 67 | { 68 | printIntLine(buffer[i]); 69 | } 70 | } 71 | else 72 | { 73 | printLine("ERROR: Array index is out-of-bounds"); 74 | } 75 | } 76 | } 77 | } 78 | 79 | /* goodG2B() - use goodsource and badsink in the for statements */ 80 | static void goodG2B() 81 | { 82 | int h,j; 83 | int data; 84 | /* Initialize data */ 85 | data = -1; 86 | for(h = 0; h < 1; h++) 87 | { 88 | /* FIX: Use a value greater than 0, but less than 10 to avoid attempting to 89 | * access an index of the array in the sink that is out-of-bounds */ 90 | data = 7; 91 | } 92 | for(j = 0; j < 1; j++) 93 | { 94 | { 95 | int i; 96 | int buffer[10] = { 0 }; 97 | /* POTENTIAL FLAW: Attempt to access a negative index of the array 98 | * This code does not check to see if the array index is negative */ 99 | if (data < 10) 100 | { 101 | buffer[data] = 1; 102 | /* Print the array values */ 103 | for(i = 0; i < 10; i++) 104 | { 105 | printIntLine(buffer[i]); 106 | } 107 | } 108 | else 109 | { 110 | printLine("ERROR: Array index is negative."); 111 | } 112 | } 113 | } 114 | } 115 | 116 | void CWE124_Buffer_Underwrite__CWE839_fscanf_17_good() 117 | { 118 | goodB2G(); 119 | goodG2B(); 120 | } 121 | 122 | #endif /* OMITGOOD */ 123 | 124 | /* Below is the main(). It is only used when building this testcase on 125 | its own for testing or for building a binary to use in testing binary 126 | analysis tools. It is not used when compiling all the testcases as one 127 | application, which is how source code analysis tools are tested. */ 128 | 129 | #ifdef INCLUDEMAIN 130 | 131 | int main(int argc, char * argv[]) 132 | { 133 | /* seed randomness */ 134 | srand( (unsigned)time(NULL) ); 135 | #ifndef OMITGOOD 136 | printLine("Calling good()..."); 137 | CWE124_Buffer_Underwrite__CWE839_fscanf_17_good(); 138 | printLine("Finished good()"); 139 | #endif /* OMITGOOD */ 140 | #ifndef OMITBAD 141 | printLine("Calling bad()..."); 142 | CWE124_Buffer_Underwrite__CWE839_fscanf_17_bad(); 143 | printLine("Finished bad()"); 144 | #endif /* OMITBAD */ 145 | return 0; 146 | } 147 | 148 | #endif -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/CWE_119_fget.c: -------------------------------------------------------------------------------- 1 | #include "std_testcase.h" 2 | 3 | #define CHAR_ARRAY_SIZE (3 * sizeof(data) + 2) 4 | 5 | #ifndef OMITBAD 6 | 7 | void CWE121_Stack_Based_Buffer_Overflow__CWE129_fgets_01_bad() 8 | { 9 | int data; 10 | /* Initialize data */ 11 | data = -1; 12 | { 13 | char inputBuffer[CHAR_ARRAY_SIZE] = ""; 14 | /* POTENTIAL FLAW: Read data from the console using fgets() */ 15 | if (fgets(inputBuffer, CHAR_ARRAY_SIZE, stdin) != NULL) 16 | { 17 | /* Convert to int */ 18 | data = atoi(inputBuffer); 19 | } 20 | else 21 | { 22 | printLine("fgets() failed."); 23 | } 24 | } 25 | { 26 | int i; 27 | int buffer[10] = { 0 }; 28 | /* POTENTIAL FLAW: Attempt to write to an index of the array that is above the upper bound 29 | * This code does check to see if the array index is negative */ 30 | if (data >= 0) 31 | { 32 | buffer[data] = 1; 33 | /* Print the array values */ 34 | for(i = 0; i < 10; i++) 35 | { 36 | printIntLine(buffer[i]); 37 | } 38 | } 39 | else 40 | { 41 | printLine("ERROR: Array index is negative."); 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /test/testfiles/sard_test_cases/io.c: -------------------------------------------------------------------------------- 1 | #include // for PRId64 2 | #include 3 | #include 4 | #include 5 | #include "std_testcase.h" 6 | 7 | #ifndef _WIN32 8 | #include 9 | #endif 10 | 11 | void printLine (const char * line) 12 | { 13 | if(line != NULL) 14 | { 15 | printf("%s\n", line); 16 | } 17 | } 18 | 19 | void printWLine (const wchar_t * line) 20 | { 21 | if(line != NULL) 22 | { 23 | wprintf(L"%ls\n", line); 24 | } 25 | } 26 | 27 | void printIntLine (int intNumber) 28 | { 29 | printf("%d\n", intNumber); 30 | } 31 | 32 | void printShortLine (short shortNumber) 33 | { 34 | printf("%hd\n", shortNumber); 35 | } 36 | 37 | void printFloatLine (float floatNumber) 38 | { 39 | printf("%f\n", floatNumber); 40 | } 41 | 42 | void printLongLine (long longNumber) 43 | { 44 | printf("%ld\n", longNumber); 45 | } 46 | 47 | void printLongLongLine (int64_t longLongIntNumber) 48 | { 49 | printf("%lld\n", longLongIntNumber); 50 | } 51 | 52 | void printSizeTLine (size_t sizeTNumber) 53 | { 54 | printf("%zu\n", sizeTNumber); 55 | } 56 | 57 | void printHexCharLine (char charHex) 58 | { 59 | printf("%02x\n", charHex); 60 | } 61 | 62 | void printWcharLine(wchar_t wideChar) 63 | { 64 | /* ISO standard dictates wchar_t can be ref'd only with %ls, so we must make a 65 | * string to print a wchar */ 66 | wchar_t s[2]; 67 | s[0] = wideChar; 68 | s[1] = L'\0'; 69 | printf("%ls\n", s); 70 | } 71 | 72 | void printUnsignedLine(unsigned unsignedNumber) 73 | { 74 | printf("%u\n", unsignedNumber); 75 | } 76 | 77 | void printHexUnsignedCharLine(unsigned char unsignedCharacter) 78 | { 79 | printf("%02x\n", unsignedCharacter); 80 | } 81 | 82 | void printDoubleLine(double doubleNumber) 83 | { 84 | printf("%g\n", doubleNumber); 85 | } 86 | 87 | void printStructLine (const twoIntsStruct * structTwoIntsStruct) 88 | { 89 | printf("%d -- %d\n", structTwoIntsStruct->intOne, structTwoIntsStruct->intTwo); 90 | } 91 | 92 | void printBytesLine(const unsigned char * bytes, size_t numBytes) 93 | { 94 | size_t i; 95 | for (i = 0; i < numBytes; ++i) 96 | { 97 | printf("%02x", bytes[i]); 98 | } 99 | puts(""); /* output newline */ 100 | } 101 | 102 | /* Decode a string of hex characters into the bytes they represent. The second 103 | * parameter specifies the length of the output buffer. The number of bytes 104 | * actually written to the output buffer is returned. */ 105 | size_t decodeHexChars(unsigned char * bytes, size_t numBytes, const char * hex) 106 | { 107 | size_t numWritten = 0; 108 | 109 | /* We can't sscanf directly into the byte array since %02x expects a pointer to int, 110 | * not a pointer to unsigned char. Also, since we expect an unbroken string of hex 111 | * characters, we check for that before calling sscanf; otherwise we would get a 112 | * framing error if there's whitespace in the input string. */ 113 | while (numWritten < numBytes && isxdigit(hex[2 * numWritten]) && isxdigit(hex[2 * numWritten + 1])) 114 | { 115 | int byte; 116 | sscanf(&hex[2 * numWritten], "%02x", &byte); 117 | bytes[numWritten] = (unsigned char) byte; 118 | ++numWritten; 119 | } 120 | 121 | return numWritten; 122 | } 123 | 124 | /* Decode a string of hex characters into the bytes they represent. The second 125 | * parameter specifies the length of the output buffer. The number of bytes 126 | * actually written to the output buffer is returned. */ 127 | size_t decodeHexWChars(unsigned char * bytes, size_t numBytes, const wchar_t * hex) 128 | { 129 | size_t numWritten = 0; 130 | 131 | /* We can't swscanf directly into the byte array since %02x expects a pointer to int, 132 | * not a pointer to unsigned char. Also, since we expect an unbroken string of hex 133 | * characters, we check for that before calling swscanf; otherwise we would get a 134 | * framing error if there's whitespace in the input string. */ 135 | while (numWritten < numBytes && iswxdigit(hex[2 * numWritten]) && iswxdigit(hex[2 * numWritten + 1])) 136 | { 137 | int byte; 138 | swscanf(&hex[2 * numWritten], L"%02x", &byte); 139 | bytes[numWritten] = (unsigned char) byte; 140 | ++numWritten; 141 | } 142 | 143 | return numWritten; 144 | } 145 | 146 | /* The two functions always return 1 or 0, so a tool should be able to 147 | identify that uses of these functions will always return these values */ 148 | int globalReturnsTrue() 149 | { 150 | return 1; 151 | } 152 | 153 | int globalReturnsFalse() 154 | { 155 | return 0; 156 | } 157 | 158 | int globalReturnsTrueOrFalse() 159 | { 160 | return (rand() % 2); 161 | } 162 | 163 | /* The variables below are declared "const", so a tool should 164 | be able to identify that reads of these will always return their 165 | initialized values. */ 166 | const int GLOBAL_CONST_TRUE = 1; /* true */ 167 | const int GLOBAL_CONST_FALSE = 0; /* false */ 168 | const int GLOBAL_CONST_FIVE = 5; 169 | 170 | /* The variables below are not defined as "const", but are never 171 | assigned any other value, so a tool should be able to identify that 172 | reads of these will always return their initialized values. */ 173 | int globalTrue = 1; /* true */ 174 | int globalFalse = 0; /* false */ 175 | int globalFive = 5; 176 | 177 | 178 | 179 | /* define global argc and argv */ 180 | 181 | #ifdef __cplusplus 182 | extern "C" { 183 | #endif 184 | 185 | int globalArgc = 0; 186 | char** globalArgv = NULL; 187 | 188 | #ifdef __cplusplus 189 | } 190 | #endif 191 | --------------------------------------------------------------------------------