├── .github └── workflows │ ├── documentation.yml │ └── python-package.yml ├── .gitignore ├── LICENSE ├── README.md ├── cinspector ├── __init__.py ├── _gen_parser.py ├── analysis │ ├── __init__.py │ ├── call_graph.py │ └── cfg.py ├── interfaces.py ├── nodes │ ├── __init__.py │ ├── abstract_node.py │ ├── basic_node.py │ ├── edit.py │ └── node.py └── parser.py ├── docs ├── Makefile ├── _static │ └── .gitkeep ├── _templates │ └── .gitkeep ├── conf.py ├── index.rst ├── make.bat └── md │ └── introduction.md ├── pyproject.toml ├── script ├── check.sh └── rebuild.sh ├── setup.py └── test ├── test_Util.py ├── test_analysis_CFG.py ├── test_analysis_CallGraph.py ├── test_node_AssignmentExpressionNode.py ├── test_node_CompoundStatementNode.py ├── test_node_DeclarationNode.py ├── test_node_Edit.py ├── test_node_EnumSpecifierNode.py ├── test_node_FunctionDefinitionNode.py ├── test_node_IfStatementNode.py ├── test_node_ParameterDeclarationNode.py ├── test_node_ParenthesizedExpressionNode.py ├── test_node_PreprocDefNode.py ├── test_node_StructSpecifierNode.py └── test_node_tokenize.py /.github/workflows/documentation.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Documentation 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | permissions: 10 | contents: read 11 | pages: write 12 | id-token: write 13 | 14 | jobs: 15 | deploy: 16 | environment: 17 | name: github-pages 18 | url: ${{ steps.deployment.outputs.page_url }} 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v3 22 | - uses: actions/setup-python@v4 23 | - name: Install dependencies 24 | run: | 25 | pip install tree-sitter networkx 26 | pip install sphinx sphinx_rtd_theme myst_parser 27 | - name: Generation 28 | run: | 29 | sphinx-apidoc cinspector -o docs 30 | cd docs 31 | make html 32 | - name: Setup Pages 33 | uses: actions/configure-pages@v3 34 | - name: Upload artifact 35 | uses: actions/upload-pages-artifact@v1 36 | with: 37 | path: 'docs/_build/html/' 38 | - name: Deploy to GitHub Pages 39 | id: deployment 40 | uses: actions/deploy-pages@v1 41 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ "master" ] 9 | pull_request: 10 | branches: [ "master" ] 11 | 12 | jobs: 13 | yapf-format: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: ["3.11"] 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | python -m pip install yapf 30 | - name: Test with yapf 31 | run: | 32 | yapf -d -e docs --style=google --recursive . 33 | 34 | pytest: 35 | runs-on: ${{ matrix.os }} 36 | strategy: 37 | fail-fast: false 38 | matrix: 39 | python-version: ["3.8", "3.9", "3.10", "3.11"] 40 | os: ['ubuntu-latest', 'macos-latest'] 41 | 42 | steps: 43 | - uses: actions/checkout@v3 44 | - name: Set up Python ${{ matrix.python-version }} 45 | uses: actions/setup-python@v3 46 | with: 47 | python-version: ${{ matrix.python-version }} 48 | - name: Install dependencies 49 | run: | 50 | python -m pip install --upgrade pip 51 | python -m pip install tree-sitter==0.20.4 pytest wheel 52 | - name: Test with pytest 53 | run: | 54 | python setup.py sdist bdist_wheel 55 | pip install dist/*.tar.gz 56 | cd test 57 | pytest 58 | 59 | mypy: 60 | runs-on: ${{ matrix.os }} 61 | strategy: 62 | fail-fast: false 63 | matrix: 64 | python-version: ["3.11"] 65 | os: ['ubuntu-latest'] 66 | 67 | steps: 68 | - uses: actions/checkout@v3 69 | - name: Set up Python ${{ matrix.python-version }} 70 | uses: actions/setup-python@v3 71 | with: 72 | python-version: ${{ matrix.python-version }} 73 | - name: Install dependencies 74 | run: | 75 | python -m pip install --upgrade pip 76 | python -m pip install mypy tree-sitter==0.20.4 types-tree-sitter networkx 77 | - name: Test with mypy 78 | run: | 79 | mypy cinspector 80 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 3 | 4 | ### MacOS ### 5 | .DS_Store 6 | 7 | ### Python ### 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | # *.so -> comment since tree-sitter.so 15 | 16 | # mute testcase 17 | test/testcase 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | cover/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/* 83 | !docs/conf.py 84 | !docs/make.bat 85 | !docs/Makefile 86 | !docs/_static/ 87 | !docs/_templates/ 88 | !docs/md/ 89 | !docs/index.rst 90 | 91 | # PyBuilder 92 | .pybuilder/ 93 | target/ 94 | 95 | # Jupyter Notebook 96 | .ipynb_checkpoints 97 | 98 | # IPython 99 | profile_default/ 100 | ipython_config.py 101 | 102 | # pyenv 103 | # For a library or package, you might want to ignore these files since the code is 104 | # intended to run in multiple environments; otherwise, check them in: 105 | # .python-version 106 | 107 | # pipenv 108 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 109 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 110 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 111 | # install all needed dependencies. 112 | #Pipfile.lock 113 | 114 | # poetry 115 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 116 | # This is especially recommended for binary packages to ensure reproducibility, and is more 117 | # commonly ignored for libraries. 118 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 119 | #poetry.lock 120 | 121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 122 | __pypackages__/ 123 | 124 | # Celery stuff 125 | celerybeat-schedule 126 | celerybeat.pid 127 | 128 | # SageMath parsed files 129 | *.sage.py 130 | 131 | # Environments 132 | .env 133 | .venv 134 | env/ 135 | venv/ 136 | ENV/ 137 | env.bak/ 138 | venv.bak/ 139 | 140 | # Spyder project settings 141 | .spyderproject 142 | .spyproject 143 | 144 | # Rope project settings 145 | .ropeproject 146 | 147 | # mkdocs documentation 148 | /site 149 | 150 | # mypy 151 | .mypy_cache/ 152 | .dmypy.json 153 | dmypy.json 154 | 155 | # Pyre type checker 156 | .pyre/ 157 | 158 | # pytype static type analyzer 159 | .pytype/ 160 | 161 | # Cython debug symbols 162 | cython_debug/ 163 | 164 | # PyCharm 165 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 166 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 167 | # and can be added to the global gitignore or merged into this file. For a more nuclear 168 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 169 | #.idea/ 170 | 171 | # End of https://www.toptal.com/developers/gitignore/api/python 172 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2023 Peiwei Hu 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cinspector 2 | 3 | ## 1. Introduction 4 | 5 | ### 1.1 What is cinspector? 6 | 7 | **cinspector** is a static analysis framework targeting the C-based project and supports the parse, edit, analysis of the code. Based on tree-sitter, it achieves the compilation-needless analysis, which relieves the burden of configuring environment dependencies, compilation, and so on. This also enables cinspector suitable for the situations like analyzing the decompiled pseudocode. 8 | 9 | ### 1.2 What is it good for? 10 | 11 | Any situations requiring a quick and lightweight code edit and analysis, including but not limited to: 12 | 13 | + Implement bug oracles for bug detection 14 | + Analyze the decompiler output 15 | + Inspect and discover the code properties for research requirements 16 | 17 | 18 | ## 2. Install 19 | 20 | ### 2.1 pip 21 | 22 | Currently the version in the pip repository is not frequently updated. We won't recommand this way before cinspector owns a mature version. 23 | 24 | ### 2.2 local 25 | 26 | Download this repository: 27 | 28 | ```bash 29 | git clone https://github.com/PeiweiHu/cinspector.git 30 | ``` 31 | 32 | Build and install locally: 33 | 34 | ```bash 35 | cd cinspector 36 | ./script/rebuild.sh 37 | ``` 38 | 39 | ## 3. Usage 40 | 41 | [Here](https://peiweihu.github.io/cinspector/) is the automatic generated documentation by sphinx. 42 | 43 | As a quick start, we provide a usage example that how to extract the call relationship in a source file (lets say it's foo.c) in the following. The content of foo.c is shown as below. 44 | 45 | ```c 46 | // foo.c 47 | #include "stdio.h" 48 | 49 | int callee() { 50 | int a; 51 | scanf("%d", &a); 52 | return a; 53 | } 54 | 55 | void caller() { 56 | printf("%d\n", callee()); 57 | } 58 | 59 | int main(void) { 60 | printf("%d\n", callee()); 61 | caller(); 62 | return 0; 63 | } 64 | ``` 65 | 66 | First, we leverage the `CCode` in `cinspector.interfaces` to read the content for the later analysis. `CCode` can accept any code snippet. 67 | 68 | ```python 69 | from cinspector.interfaces import CCode 70 | 71 | with open('foo.c', 'r') as r: 72 | content = r.read() 73 | 74 | cc = CCode(content) # now we get an instance of CCode 75 | ``` 76 | 77 | Now we wanna extract all function definitions in foo.c. `CCode` contains the method `get_by_type_name(type_name: str)` which can collect all nodes with the type `type_name`. The node types in cinspector are consistent with the node types in tree-sitter. You can inspect the node types with the console scipt `cinspector-parser` which is available after installing cinspector. 78 | 79 | ```bash 80 | $ cinspector-parser -f foo.c # print the ast tree 81 | 82 | ...... 83 | FunctionDefinitionNode type=function_definition start_point=(8, 0) end_point=(10, 1) 'void caller() { printf("%d\\n", callee());}' 84 | ...... 85 | 86 | ``` 87 | 88 | We can see the function definition has the type (the attribute `node_type` in BasicNode) `function_definition`, thus we can get all function defnitions in foo.c by the following code. 89 | 90 | ```python 91 | func_defs = cc.get_by_type_name('function_definition') 92 | ``` 93 | 94 | Let's sort the nodes in `func_defs` and check whether the first function definition is `callee`. 95 | 96 | ```python 97 | from cinspector.nodes import * 98 | 99 | func_defs = Util.sort_nodes(func_defs) 100 | print(func_defs[0].src) 101 | 102 | """ 103 | output: 104 | 105 | int callee() { int a; 106 | scanf("%d", &a); 107 | return a;} 108 | """ 109 | ``` 110 | 111 | Now we need to get the call within the function definition node. There are two types of nodes in cinspector, the class `Node` is their base class. One is `BasicNode` (and subclass), representing the actual syntactic element in the source code. The other is `AbstractNode` (and subclass), which is logical and mainly for code analysis (like CFG). The function definition node `FunctionDefinitionNode` is the subclass of `BasicNode`, which owns the method `children_by_type_name(name: Union[str, List[str]])` to get all the children nodes with specific type(s) recursively. Thus, we can print the call relationship of each function definition as below. 112 | 113 | ```python 114 | for _f in func_defs: 115 | callees = _f.children_by_type_name('call_expression') 116 | for _c in callees: 117 | print(f"{_f.name} invokes {_c.function}") 118 | 119 | """ 120 | output: 121 | 122 | callee invokes scanf 123 | caller invokes printf 124 | caller invokes callee 125 | main invokes printf 126 | main invokes callee 127 | main invokes caller 128 | """ 129 | ``` 130 | 131 | All done. By the way, cinspector already provides the call graph analysis. You can find it in `cinspector.analysis`. 132 | -------------------------------------------------------------------------------- /cinspector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeiweiHu/cinspector/e827cfd9fccb1c5d7c4e0169c0da0bd2006de63b/cinspector/__init__.py -------------------------------------------------------------------------------- /cinspector/_gen_parser.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from tree_sitter import Language 3 | 4 | subprocess.check_call([ 5 | 'git', 'clone', '-b', 'v0.20.2', 6 | 'https://github.com/tree-sitter/tree-sitter-c.git' 7 | ]) 8 | Language.build_library('cinspector-tree-sitter.so', ['tree-sitter-c']) 9 | -------------------------------------------------------------------------------- /cinspector/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | from .call_graph import CallGraph 2 | from .cfg import CFG, BaseCFG 3 | -------------------------------------------------------------------------------- /cinspector/analysis/call_graph.py: -------------------------------------------------------------------------------- 1 | """ 2 | Call Graph 3 | """ 4 | 5 | from typing import Iterable 6 | from networkx import DiGraph # type: ignore 7 | from cinspector.nodes import FunctionDefinitionNode, CallExpressionNode, VariadicParameterNode 8 | 9 | 10 | class CallGraph: 11 | """ Generate the call graph 12 | 13 | Accept a list of function nodes and analyze the 14 | invocation relationship between them. Note that 15 | only explicit invocations will be catched instead 16 | of indirect calls. 17 | 18 | Attribtes: 19 | funcs (Iterable[FunctionDefinitionNode]): involved functions 20 | """ 21 | 22 | def __init__(self, funcs: Iterable[FunctionDefinitionNode]) -> None: 23 | self.funcs = funcs 24 | 25 | def is_identical(self, call: CallExpressionNode, 26 | func: FunctionDefinitionNode) -> bool: 27 | """ check whether the is the invocation of 28 | 29 | We involve two checks to decide whether is the invocation of 30 | 1. same func name 31 | 2. same parameter number 32 | 33 | #TODO: It would be more precise if add parameter type check. But this 34 | requires dataflow analysis which we will implement later. 35 | """ 36 | 37 | assert (not call.is_indirect()) 38 | # name check 39 | call_name = call.function.src 40 | func_name = func.name.src 41 | name_check = call_name == func_name 42 | 43 | # parameter check 44 | call_para_num = len(call.arguments) 45 | fixed_para_num = 0 46 | variadic_para = False 47 | for _a in func.parameters.children: 48 | if isinstance(_a, VariadicParameterNode): 49 | variadic_para = True 50 | else: 51 | fixed_para_num += 1 52 | if variadic_para: 53 | para_num_check = call_para_num >= fixed_para_num 54 | else: 55 | para_num_check = call_para_num == fixed_para_num 56 | return name_check and para_num_check 57 | 58 | def analysis(self) -> DiGraph: 59 | graph = DiGraph() 60 | for _f in self.funcs: 61 | graph.add_node(_f) 62 | # start analyzing each function 63 | calls = _f.descendants_by_type_name('call_expression') 64 | for _c in calls: 65 | if _c.is_indirect(): 66 | continue 67 | for _ in self.funcs: 68 | if self.is_identical(_c, _): 69 | graph.add_edge(_f, _) 70 | return graph 71 | -------------------------------------------------------------------------------- /cinspector/analysis/cfg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Control Flow Graph-related analysis 3 | """ 4 | from networkx import DiGraph # type: ignore 5 | import networkx as nx # type: ignore 6 | from typing import Optional, Dict, Union, List 7 | from cinspector.nodes import Util, BorderNode, Node 8 | from cinspector.nodes import ForStatementNode, YForLoopNode, NForLoopNode 9 | from cinspector.nodes import WhileStatementNode, YWhileLoopNode, NWhileLoopNode 10 | from cinspector.nodes import IfStatementNode, YConditionNode, NConditionNode 11 | from cinspector.nodes import SwitchNode, FunctionDefinitionNode 12 | from cinspector.nodes import DoWhileLoopNode, DoStatementNode 13 | 14 | 15 | class BasicBlock(Util): 16 | 17 | def __init__(self, nodes: list) -> None: 18 | self.nodes = nodes 19 | 20 | 21 | class BaseCFG(Util): 22 | 23 | def __init__(self, stmts: List[Node]) -> None: 24 | self.stmts = stmts 25 | self.start: Optional[BorderNode] = None 26 | self.end: Optional[BorderNode] = None 27 | self.cfg = DiGraph() 28 | self.generate() 29 | 30 | def execution_path(self): 31 | path = list(nx.all_simple_paths(self.cfg, self.start, self.end)) 32 | # filter out start and end 33 | path = [_[1:-1] for _ in path] 34 | return path 35 | 36 | def generate(self): 37 | """CFG.generate() generate the control flow graph""" 38 | """ 39 | construct the initial state first 40 | int a, b; 41 | a = 1; 42 | b = 2; 43 | to: 44 | 45 | | 46 | int a, b; 47 | | 48 | a = 1; 49 | | 50 | b = 2; 51 | | 52 | 53 | """ 54 | stmt_lst = self.stmts 55 | stmt_lst.insert(0, BorderNode(node_type='')) 56 | stmt_lst.append(BorderNode(node_type='')) 57 | self.start, self.end = stmt_lst[0], stmt_lst[-1] 58 | for _ in range(len(stmt_lst) - 1): 59 | self.cfg.add_edge(stmt_lst[_], stmt_lst[_ + 1]) 60 | 61 | # iteratively detect branch 62 | cur = [self.start] 63 | """ 64 | record the label and corresponding statement 65 | 66 | fail: 67 | free(sth); 68 | return -1; 69 | 70 | label_map will create the mapping between "fail" and free statement 71 | """ 72 | label_map: Dict[str:Union[Node, List[Node]]] = dict() 73 | 74 | def _update_label_map(_c: Node, header: Union[Node, List[Node]]): 75 | """ 76 | The initial recorded label statement may be 77 | deconstructed, we need to track this variation. 78 | The
may also be list, e.g. [ycond, ncond] 79 | for if statement. This function should typically 80 | invoked while some nodes is deleted in self.cfg. 81 | 82 | 83 | Args: 84 | _c (Node): the deleted node 85 | header (Union[Node, List[node]]): the statement that replaces _c 86 | 87 | Returns: 88 | no return value 89 | """ 90 | update = list() 91 | for _k, _v in label_map.items(): 92 | if _v == _c: 93 | update.append(_k) 94 | for _u in update: 95 | # print(f'{_u}: {label_map[_u]} -> {header}') 96 | label_map[_u] = header 97 | 98 | while True: 99 | if not cur: 100 | break 101 | next = [] 102 | changed = 0 103 | 104 | for _c in cur: 105 | next += list(self.cfg.successors(_c)) 106 | 107 | if _c.node_type == 'compound_statement': 108 | # divide compound statement directly 109 | pred = list(self.cfg.predecessors(_c)) 110 | succ = list(self.cfg.successors(_c)) 111 | children = [ 112 | _ for _ in _c.children if not _.node_type in ['{', '}'] 113 | ] 114 | # skip empty compound statement 115 | if not children: 116 | continue 117 | # link every child 118 | for _ in range(len(children) - 1): 119 | self.cfg.add_edge(children[_], children[_ + 1]) 120 | # link the first and last child node with pred and succ node 121 | for _p in pred: 122 | self.cfg.add_edge(_p, children[0]) 123 | for _s in succ: 124 | self.cfg.add_edge(children[-1], _s) 125 | # remove the old node 126 | self.cfg.remove_node(_c) 127 | # since the graph is changed, analyse from scratch 128 | changed = 1 129 | _update_label_map(_c, children[0]) 130 | 131 | elif _c.node_type == 'labeled_statement': 132 | # record the statement corresponding to label 133 | label = _c.child_by_field_name('label').src 134 | label_statement = _c.children[-1] 135 | label_map[label] = label_statement 136 | # replace label with statement 137 | pred = list(self.cfg.predecessors(_c)) 138 | succ = list(self.cfg.successors(_c)) 139 | for _p in pred: 140 | self.cfg.add_edge(_p, label_statement) 141 | for _s in succ: 142 | self.cfg.add_edge(label_statement, _s) 143 | # remove label node 144 | self.cfg.remove_node(_c) 145 | changed = 1 146 | _update_label_map(_c, label_statement) 147 | 148 | elif _c.node_type == 'if_statement': 149 | """ 150 | Ideally: 151 | from: 152 | pred 153 | | 154 | condition 155 | / \ 156 | consequence alternative 157 | \ / 158 | succ 159 | to: 160 | pred 161 | / \ 162 | ycond ncond 163 | | | 164 | consequence alternative 165 | \ / 166 | succ 167 | 168 | if alternative doesn't exist, the edge between alternative 169 | and succ nodes will be replaced by the edge between ncond 170 | and succ node. 171 | 172 | from: 173 | pred 174 | | 175 | condition 176 | / \ 177 | consequence | 178 | \ / 179 | succ 180 | to: 181 | pred 182 | / \ 183 | ycond | 184 | | | 185 | consequence ncond 186 | \ / 187 | succ 188 | """ 189 | assert (isinstance(_c, IfStatementNode)) 190 | ycond = YConditionNode(_c.condition) 191 | ncond = NConditionNode(_c.condition) 192 | self.cfg.add_edge(ycond, _c.consequence) 193 | if _c.alternative: 194 | self.cfg.add_edge(ncond, _c.alternative) 195 | # add new edge. don't remove old edges, they will be removed while deleting old node 196 | pred = list(self.cfg.predecessors(_c)) 197 | succ = list(self.cfg.successors(_c)) 198 | for _p in pred: 199 | self.cfg.add_edge(_p, ycond) 200 | self.cfg.add_edge(_p, ncond) 201 | for _s in succ: 202 | self.cfg.add_edge(_c.consequence, _s) 203 | if _c.alternative: 204 | self.cfg.add_edge(_c.alternative, _s) 205 | else: 206 | self.cfg.add_edge(ncond, _s) 207 | # remove old node 208 | self.cfg.remove_node(_c) 209 | # since the graph is changed, analyse from scratch 210 | changed = 1 211 | _update_label_map(_c, [ycond, ncond]) 212 | 213 | elif _c.node_type == 'switch_statement': 214 | pred = list(self.cfg.predecessors(_c)) 215 | succ = list(self.cfg.successors(_c)) 216 | condition = _c.child_by_field_name('condition') 217 | case_statement_lst = _c.descendants_by_type_name( 218 | 'case_statement') 219 | _sw_node_lst = [] 220 | for _case_statement in case_statement_lst: 221 | _value = _case_statement.child_by_field_name('value') 222 | _sw_node = SwitchNode(condition, _value) 223 | _sw_node_lst.append(_sw_node) 224 | _children = [] 225 | for _ in _case_statement.children: 226 | if _.node_type in ['case', ':', 'default']: 227 | continue 228 | if _value and _.src == _value.src: 229 | continue 230 | _children.append(_) 231 | for _p in pred: 232 | self.cfg.add_edge(_p, _sw_node) 233 | # statements exist in case 234 | if _children: 235 | self.cfg.add_edge(_sw_node, _children[0]) 236 | for _ in range(len(_children) - 1): 237 | self.cfg.add_edge(_children[_], 238 | _children[_ + 1]) 239 | for _s in succ: 240 | self.cfg.add_edge(_children[-1], _s) 241 | # no statement within case 242 | else: 243 | for _s in succ: 244 | self.cfg.add_edge(_sw_node, _s) 245 | self.cfg.remove_node(_c) 246 | changed = 1 247 | _update_label_map(_c, _sw_node_lst) 248 | 249 | elif _c.node_type == 'return_statement': 250 | """ To avoid the statements after return statement (e.g. 251 | the labeled_statement) is ignored directly, we unlink 252 | the return statement with successors later. 253 | """ 254 | pass 255 | 256 | elif _c.node_type == 'for_statement': 257 | assert (isinstance(_c, ForStatementNode)) 258 | yloop = YForLoopNode(_c.initializer, _c.condition, 259 | _c.update) 260 | nloop = NForLoopNode(_c.initializer, _c.condition, 261 | _c.update) 262 | if _c.body: 263 | loop_body = _c.body 264 | self.cfg.add_edge(yloop, loop_body) 265 | else: 266 | # loop body may not exist 267 | loop_body = yloop 268 | pred = list(self.cfg.predecessors(_c)) 269 | succ = list(self.cfg.successors(_c)) 270 | for _p in pred: 271 | self.cfg.add_edge(_p, yloop) 272 | self.cfg.add_edge(_p, nloop) 273 | for _s in succ: 274 | self.cfg.add_edge(loop_body, _s) 275 | self.cfg.add_edge(nloop, _s) 276 | # remove old for statement 277 | self.cfg.remove_node(_c) 278 | changed = 1 279 | _update_label_map(_c, [yloop, nloop]) 280 | 281 | elif _c.node_type == 'do_statement': 282 | assert (isinstance(_c, DoStatementNode)) 283 | cond = DoWhileLoopNode(_c.condition) 284 | body = _c.body 285 | pred = list(self.cfg.predecessors(_c)) 286 | succ = list(self.cfg.successors(_c)) 287 | self.cfg.add_edge(body, cond) 288 | for _p in pred: 289 | self.cfg.add_edge(_p, body) 290 | for _s in succ: 291 | self.cfg.add_edge(cond, _s) 292 | self.cfg.remove_node(_c) 293 | changed = 1 294 | _update_label_map(_c, [body]) 295 | 296 | elif _c.node_type == 'while_statement': 297 | assert (isinstance(_c, WhileStatementNode)) 298 | yloop = YWhileLoopNode(_c.condition) 299 | nloop = NWhileLoopNode(_c.condition) 300 | if _c.body: 301 | loop_body = _c.body 302 | self.cfg.add_edge(yloop, loop_body) 303 | else: 304 | # loop body may not exist 305 | loop_body = yloop 306 | pred = list(self.cfg.predecessors(_c)) 307 | succ = list(self.cfg.successors(_c)) 308 | for _p in pred: 309 | self.cfg.add_edge(_p, yloop) 310 | self.cfg.add_edge(_p, nloop) 311 | for _s in succ: 312 | self.cfg.add_edge(loop_body, _s) 313 | self.cfg.add_edge(nloop, _s) 314 | # remove old for statement 315 | self.cfg.remove_node(_c) 316 | changed = 1 317 | _update_label_map(_c, [yloop, nloop]) 318 | 319 | else: 320 | assert "Undefined statement" 321 | 322 | if changed: 323 | next = [self.start] 324 | break 325 | 326 | # start analysing next nodes 327 | cur = next 328 | 329 | # link goto and label 330 | goto_lst = [] 331 | for _n in self.cfg.nodes: 332 | if _n.node_type == 'goto_statement': 333 | goto_lst.append(_n) 334 | # remove old edges 335 | succ = list(self.cfg.successors(_n)) 336 | for _s in succ: 337 | self.cfg.remove_edge(_n, _s) 338 | 339 | for _g in goto_lst: 340 | label = _g.child_by_field_name('label').src 341 | assert (label in list(label_map.keys())) 342 | if not isinstance(label_map[label], list): 343 | label_map[label] = [label_map[label]] 344 | for _ in label_map[label]: 345 | self.cfg.add_edge(_g, _) 346 | 347 | # unlink return statement and successors 348 | for _n in self.cfg.nodes: 349 | if _n.node_type == 'return_statement': 350 | succ = list(self.cfg.successors(_n)) 351 | for _s in succ: 352 | self.cfg.remove_edge(_n, _s) 353 | self.cfg.add_edge(_n, self.end) 354 | 355 | def merge(self): 356 | """ 357 | self.generate generate the DiGraph in which nodes are single statement 358 | self.merge merge the statements into the basic block 359 | """ 360 | pass 361 | 362 | 363 | class CFG(BaseCFG): 364 | 365 | def __init__(self, function_def: FunctionDefinitionNode) -> None: 366 | self.function_def = function_def 367 | stmt_lst = [ 368 | _ for _ in self.function_def.body.children 369 | if _.node_type not in ['{', '}'] 370 | ] 371 | super().__init__(stmt_lst) 372 | -------------------------------------------------------------------------------- /cinspector/interfaces.py: -------------------------------------------------------------------------------- 1 | """ Interfaces for users 2 | 3 | This file defines several interfaces to ease the 4 | use of cinspector. 5 | 6 | In particular, CProj is the interface for the whole 7 | C-based project, which usually contains some directories 8 | including header and source files. 9 | 10 | CCode is the base interface for any interfaces that 11 | represent source code, such as CFile. 12 | """ 13 | 14 | from typing import Dict, Callable 15 | from .nodes import BasicNode, Node 16 | 17 | 18 | class CProj: 19 | """ TODO 20 | """ 21 | 22 | def __init__(self, proj_path: str) -> None: 23 | self.proj_path = proj_path 24 | 25 | 26 | class CCode: 27 | 28 | def __init__(self, src) -> None: 29 | self.src = src 30 | self.node = BasicNode(self.src) 31 | 32 | def get_by_type_name(self, type_name: str) -> list: 33 | return self.node.descendants_by_type_name(type_name) 34 | 35 | def get_by_condition(self, condition: Callable[[Node], bool]) -> list: 36 | """ 37 | Access the nodes that satisfy the condition 38 | """ 39 | 40 | all_descendants = self.node.descendants() 41 | return [_ for _ in all_descendants if condition(_)] 42 | 43 | def get_by_type_name_and_query(self, type_name: str, 44 | query: Dict[str, str]) -> list: 45 | n_lst = self.get_by_type_name(type_name) 46 | return [n for n in n_lst if n.query(query)] 47 | 48 | def get_by_type_name_and_field(self, type_name: str, 49 | field: Dict[str, str]) -> list: 50 | """ 51 | Access the nodes by assigning node_type and fields. Note that 52 | the nodes that don't statisfy the type requirements, don't own 53 | the assigned FIELDS, and don't own the assigned FIELD VALUES will 54 | be filter out. 55 | 56 | Return: 57 | list containing the nodes that satisfy the type and field 58 | requirements 59 | """ 60 | 61 | n_lst = self.get_by_type_name(type_name) 62 | 63 | def own_field(n: BasicNode) -> bool: 64 | for _k, _v in field.items(): 65 | child = n.child_by_field_name(_k) 66 | if not child: 67 | return False 68 | if not _v == child.src: 69 | return False 70 | return True 71 | 72 | return [n for n in n_lst if own_field(n)] 73 | 74 | 75 | class CFile(CCode): 76 | """ TODO 77 | """ 78 | 79 | def __init__(self, file_path) -> None: 80 | self.file_path = file_path 81 | -------------------------------------------------------------------------------- /cinspector/nodes/__init__.py: -------------------------------------------------------------------------------- 1 | from .node import Node, Util, Query 2 | from .abstract_node import * 3 | from .basic_node import * 4 | from .edit import * 5 | -------------------------------------------------------------------------------- /cinspector/nodes/abstract_node.py: -------------------------------------------------------------------------------- 1 | from .node import Node 2 | 3 | 4 | class AbstractNode(Node): 5 | """ 6 | AbstractNode is the base class of abstract node classes. 7 | The so-called abstract node is the node that exists for the convenience 8 | of analysis (for example, CFG generation). It doesn't correspond 9 | to the acutal code elemetnts. 10 | """ 11 | 12 | def __init__(self, node_type=None) -> None: 13 | self.node_type = node_type 14 | 15 | 16 | class BorderNode(AbstractNode): 17 | 18 | def __init__(self, node_type=None) -> None: 19 | super().__init__(node_type) 20 | 21 | def __str__(self) -> str: 22 | return self.node_type 23 | 24 | def __repr__(self) -> str: 25 | return self.node_type 26 | 27 | 28 | class IfConditionNode(AbstractNode): 29 | 30 | def __init__(self, condition, node_type='if_condition') -> None: 31 | super().__init__(node_type) 32 | self.condition = condition 33 | self.start_point = self.condition.start_point 34 | self.end_point = self.condition.end_point 35 | 36 | def descendants_by_type_name(self, type_name): 37 | return self.condition.descendants_by_type_name(type_name) 38 | 39 | def common_entry_constraints(self): 40 | """ 41 | Many possible entry constraints exist for one condition. 42 | There are some strong constraints that are contained in every 43 | entry constraint. 44 | 45 | For example, for condition if (a < 1 && (b < 0 || b > 1)), 46 | it contains entry constraints [[a < 1, b < 0], [a < 1, b > 1]], 47 | a < 1 is a strong constraint. In other word, it is a must 48 | obeyed constraint. 49 | 50 | This function also a one-dimension list. 51 | """ 52 | common_constraints = [] 53 | e_cons = self.entry_constraints() 54 | # the comparison can be conducted by BasicNode.src or BasicNode.equal() 55 | for _e in e_cons[0]: # checking the first is enough 56 | yes = True 57 | for _cons in e_cons[1:]: 58 | _cons_src = [_.src for _ in _cons] 59 | if _e.src not in _cons_src: 60 | yes = False 61 | if yes: 62 | common_constraints.append(_e) 63 | return common_constraints 64 | 65 | def entry_constraints(self): 66 | """ 67 | output the constraints for going into the condition 68 | it returns a two-dimension list 69 | 70 | if (a || (a < 1 && a > 0)) -> (a) (a < 1, a > 0) 71 | if (b && (a < 1 || a > 0)) -> (b, a < 1) (b, a > 0) 72 | """ 73 | return self._constraints(self.condition) 74 | 75 | def _constraints(self, node): 76 | from .basic_node import ParenthesizedExpressionNode, BinaryExpressionNode 77 | # proud of elegant code :-) 78 | if isinstance(node, ParenthesizedExpressionNode): 79 | assert (len(node.children) == 1) 80 | node = node.children[0] 81 | 82 | if isinstance(node, BinaryExpressionNode): 83 | if node.symbol == '||': 84 | return self._constraints(node.left) + self._constraints( 85 | node.right) 86 | if node.symbol == '&&': 87 | """ 88 | [[a,b], [c,d]] && [[e,f]] -> [[a,b,e,f], [c,d,e,f]] 89 | """ 90 | ret = [] 91 | _right_constraints = self._constraints(node.right) 92 | for _left in self._constraints(node.left): 93 | for _right in _right_constraints: 94 | ret.append(_left + _right) 95 | return ret 96 | if node.symbol in ['>', '<', '>=', '<=', '==']: 97 | return [[node]] 98 | 99 | return [[node]] 100 | 101 | 102 | class YConditionNode(IfConditionNode): 103 | 104 | def __init__(self, condition, node_type='y_if') -> None: 105 | super().__init__(condition, node_type) 106 | 107 | def __str__(self) -> str: 108 | return '[if][Y]' + str(self.condition) 109 | 110 | def __repr__(self) -> str: 111 | return '[if][Y]' + str(self.condition) 112 | 113 | 114 | class NConditionNode(IfConditionNode): 115 | 116 | def __init__(self, condition, node_type='n_if') -> None: 117 | super().__init__(condition, node_type) 118 | 119 | def __str__(self) -> str: 120 | return '[if][N]' + str(self.condition) 121 | 122 | def __repr__(self) -> str: 123 | return '[if][N]' + str(self.condition) 124 | 125 | 126 | class ForLoopNode(AbstractNode): 127 | 128 | def __init__(self, 129 | initializer, 130 | condition, 131 | update, 132 | node_type='loop_condition') -> None: 133 | self.node_type = node_type 134 | self.initializer = initializer 135 | self.condition = condition 136 | self.update = update 137 | 138 | def descendants_by_type_name(self, name): 139 | children = [] 140 | if self.initializer: 141 | children += self.initializer.descendants_by_type_name(name) 142 | if self.condition: 143 | children += self.condition.descendants_by_type_name(name) 144 | if self.update: 145 | children += self.update.descendants_by_type_name(name) 146 | return children 147 | 148 | 149 | class YForLoopNode(ForLoopNode): 150 | 151 | def __init__(self, 152 | initializer, 153 | condition, 154 | update, 155 | node_type='y_loop') -> None: 156 | super().__init__(initializer, condition, update, node_type) 157 | 158 | def __str__(self) -> str: 159 | return f'[for][Y]({self.initializer} {self.condition}; {self.update})' 160 | 161 | def __repr__(self) -> str: 162 | return f'[for][Y]({self.initializer} {self.condition}; {self.update})' 163 | 164 | 165 | class NForLoopNode(ForLoopNode): 166 | 167 | def __init__(self, 168 | initializer, 169 | condition, 170 | update, 171 | node_type='n_loop') -> None: 172 | super().__init__(initializer, condition, update, node_type) 173 | 174 | def __str__(self) -> str: 175 | return f'[for][N]({self.initializer} {self.condition}; {self.update})' 176 | 177 | def __repr__(self) -> str: 178 | return f'[for][N]({self.initializer} {self.condition}; {self.update})' 179 | 180 | 181 | class DoWhileLoopNode(AbstractNode): 182 | """ 183 | Used to represent the condition of do-while loop 184 | """ 185 | 186 | def __init__(self, condition, node_type='loop_condition') -> None: 187 | super().__init__(node_type) 188 | self.condition = condition 189 | 190 | def __str__(self) -> str: 191 | return f'[do-while][]({self.condition})' 192 | 193 | def __repr__(self) -> str: 194 | return f'[do-while][]({self.condition})' 195 | 196 | 197 | class WhileLoopNode(AbstractNode): 198 | 199 | def __init__(self, condition, node_type='loop_condition') -> None: 200 | super().__init__(node_type) 201 | self.condition = condition 202 | 203 | def descendants_by_type_name(self, name): 204 | return self.condition.descendants_by_type_name(name) 205 | 206 | 207 | class YWhileLoopNode(WhileLoopNode): 208 | 209 | def __init__(self, condition, node_type='y_loop') -> None: 210 | super().__init__(condition, node_type) 211 | 212 | def __str__(self) -> str: 213 | return f'[while][Y]({self.condition})' 214 | 215 | def __repr__(self) -> str: 216 | return f'[while][Y]({self.condition})' 217 | 218 | 219 | class NWhileLoopNode(WhileLoopNode): 220 | 221 | def __init__(self, condition, node_type='n_loop') -> None: 222 | super().__init__(condition, node_type) 223 | 224 | def __str__(self) -> str: 225 | return f'[while][N]({self.condition})' 226 | 227 | def __repr__(self) -> str: 228 | return f'[while][N]({self.condition})' 229 | 230 | 231 | class SwitchNode(AbstractNode): 232 | 233 | def __init__(self, condition, case_value, node_type='switch_node') -> None: 234 | super().__init__(node_type) 235 | self.condition = condition 236 | self.case_value = case_value 237 | 238 | def descendants_by_type_name(self, types): 239 | rtn = [] 240 | if self.condition: 241 | rtn += self.condition.descendants_by_type_name(types) 242 | if self.case_value: 243 | rtn += self.case_value.descendants_by_type_name(types) 244 | return rtn 245 | 246 | def __str__(self) -> str: 247 | return f'switch {self.condition} case {self.case_value} ' 248 | 249 | def __repr__(self) -> str: 250 | return f'switch {self.condition} case {self.case_value} ' 251 | -------------------------------------------------------------------------------- /cinspector/nodes/basic_node.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains wrapper classes of tree-sitter nodes. In general, 3 | the wrapper classes are designed following the following rules: 4 | 5 | 1. Naming Convention. The class is named based on the type of tree-sitter 6 | node. For example, the wrapper class of tree-sitter node 'function_definition' 7 | is named FunctionDefinitionNode. 8 | 9 | 2. Attributes. The attributes within each class can by divided into two 10 | groups. The first group corresponds to the fields of tree-sitter node. For 11 | example, the node of 'function_definition' type in tree-sitter has the 12 | fields 'type', 'declarator', and 'body'. The wrapper class of 13 | 'function_definition' FunctionDefinitionNode will also contain the 14 | attributes 'type', 'declarator', and 'body'. The second group of attributes 15 | is added by cinspector to facilitate the analysis of source code. We call them 16 | additional attributes. 17 | 18 | 3. Property representation. The properties of the node classes in this file, 19 | i.e., classes inheriting from BasicNode, should be represented by BasicNode 20 | instances as much as possible. For example, the property 'name' of the class 21 | ParameterDeclarationNode is an instance of IdentifierNode instead of str. Of 22 | course, the user can also get the str format by invoking the method src(). 23 | """ 24 | 25 | from typing import List, Optional, Dict, Iterable, Union 26 | from .node import Node, Util, Query 27 | 28 | 29 | class BasicNode(Node, Util, Query): 30 | """ The ancestor of all wrapper classes of tree-sitter nodes 31 | 32 | For those tree-sitter nodes that do not have corresponding wrapper, they 33 | use BasicNode as the default wrapper. 34 | 35 | Pay attention to the difference betweeen and . 36 | Considering the way cinspector is used, every BasicNode (or its subclass) 37 | belongs to a code snippet. The stores the source code of 38 | the whole code snippet while src stores the source code of the current 39 | node. For example, if we have a code snippet: 40 | 41 | int a; 42 | int func() {return 0;} 43 | 44 | Assume func_node represents the function in the code snippet and is 45 | an instance of FunctionDefinitionNode. Then, the of func_node is 'int 46 | func() {return 0;}'. While the of func_node is the whole 47 | code snippet. 48 | 49 | Attributes: 50 | internal_src (str): the source code of the whole code snippet. 51 | internal (tree_sitter.Node): the corresponding tree-sitter node of 52 | the current wrapper class. 53 | internal_tree (tree_sitter.Tree): the corresponding tree-sitter 54 | tree of the whole code snippet, internal belongs to this tree. 55 | node_type (str): the type of the current wrapper class. 56 | ts_type (str): deprecated, type of the corresponding tree-sitter node, 57 | we design this since the wrapper class and corresponding 58 | tree-sitter node may have different type under some situations. 59 | start_point (tuple): the start position of the current node in 60 | internal_src. 61 | end_point (tuple): the end position of the current node in 62 | internal_src. 63 | child_count (int): the number of children of the current node. 64 | src (str): the source code of the current node. 65 | 66 | Properties: 67 | parent (BasicNode): the parent node of the current node. 68 | children (List[BasicNode]): the children nodes of the current node. 69 | 70 | Methods: 71 | equal(_o: 'BasicNode'): check whether the current node is equal to _o. 72 | make_wrapper(ts_node: tree-sitter.Node): make a wrapper class for the 73 | tree-sitter node. 74 | child_by_field_name(field_name: str): get the specific field of the 75 | current node. 76 | descendants(): get all descendants. 77 | descendants_by_type_name(type_name: Union[str, List[str]): get the 78 | descendants nodes belonging to the specific type. 79 | print_tree(): print the parsed tree 80 | """ 81 | 82 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 83 | """ 84 | Initialize the BasicNode 85 | 86 | Args: 87 | src (str): the source code of the WHOLE code snippet, not the 88 | current BasicNode. 89 | ts_node (tree_sitter.Node): the corresponding tree-sitter node of 90 | the current BasicNode 91 | ts_tree (tree_sitter.Tree): the corresponding tree-sitter tree of 92 | the WHOLE code snippet, this also is the tree that ts_node 93 | belongs to. 94 | 95 | Returns: 96 | None 97 | """ 98 | 99 | self.internal_src = src 100 | """ 101 | Ideally, the following condtion is true only when file content is 102 | passed to BasicNode. Other nodes such as FunctionDefinitionNode 103 | should be initialized with ts_node dictated. 104 | """ 105 | if not ts_node: 106 | ts_tree = self.get_tree(src) 107 | ts_node = ts_tree.root_node 108 | self.internal = ts_node 109 | self.internal_tree = ts_tree 110 | self.node_type = ts_node.type 111 | # ts_type: deprecated, node_type should be totally consistent with the type of tree-sitter node 112 | self.ts_type = ts_node.type 113 | self._parent = None 114 | self.start_point = ts_node.start_point 115 | self.end_point = ts_node.end_point 116 | self.child_count = ts_node.child_count 117 | self._children = None 118 | self.src = self.internal.text.decode("utf8") 119 | 120 | @property 121 | def children(self): 122 | """ 123 | DO NOT use this attribute on huge BasicNode such as ndoe 124 | of file, otherwise horrible recursion. 125 | """ 126 | if not self._children: 127 | self._children = [ 128 | self.make_wrapper(_ch) 129 | for _ch in self.internal.children 130 | # TODO: Maybe it's better to keep them for consistency. 131 | if _ch.type not in ['(', ')', ',', ';', '{', '}'] 132 | ] 133 | return self._children 134 | 135 | def __str__(self) -> str: 136 | # return f'({self.type}){self.src}' 137 | return self.src 138 | 139 | def __repr__(self) -> str: 140 | # return f'({self.__hash__}){self.src}' 141 | return self.src 142 | 143 | def equal(self, _o: 'BasicNode') -> bool: 144 | assert (isinstance(_o, BasicNode)) 145 | internal_src_eq = (self.internal_src == _o.internal_src) 146 | position_eq = (self.internal.start_point == _o.internal.start_point) 147 | position_eq = position_eq and (self.internal.end_point 148 | == _o.internal.end_point) 149 | return internal_src_eq and position_eq 150 | 151 | @property 152 | def parent(self): 153 | if not self._parent: 154 | self._parent = self.make_wrapper(self.internal.parent) 155 | return self._parent 156 | 157 | def in_front(self, node: 'BasicNode'): 158 | return self.start_point[0] < node.start_point[0] or \ 159 | (self.start_point[0] == node.start_point[0] and self.start_point[1] < node.start_point[1]) 160 | 161 | def make_wrapper(self, ts_node): 162 | if not ts_node: 163 | return None 164 | wrapper_dict = { 165 | 'assignment_expression': AssignmentExpressionNode, 166 | 'binary_expression': BinaryExpressionNode, 167 | 'call_expression': CallExpressionNode, 168 | 'cast_expression': CaseExpressionNode, 169 | 'conditional_expression': ConditionalExpressionNode, 170 | 'compound_statement': CompoundStatementNode, 171 | 'declaration': DeclarationNode, 172 | 'do_statement': DoStatementNode, 173 | 'enum_specifier': EnumSpecifierNode, 174 | 'enumerator_list': EnumeratorListNode, 175 | 'enumerator': EnumeratorNode, 176 | 'expression_statement': ExpressionStatementNode, 177 | 'field_identifier': FieldIdentifierNode, 178 | 'field_declaration_list': FieldDeclarationListNode, 179 | 'field_declaration': FieldDeclarationNode, 180 | 'function_definition': FunctionDefinitionNode, 181 | 'function_declarator': FunctionDeclaratorNode, 182 | 'for_statement': ForStatementNode, 183 | 'if_statement': IfStatementNode, 184 | 'init_declarator': InitDeclaratorNode, 185 | 'identifier': IdentifierNode, 186 | 'type_identifier': TypeIdentifierNode, 187 | 'primitive_type': TypeNode, 188 | 'number_literal': NumberLiteralNode, 189 | 'parenthesized_expression': ParenthesizedExpressionNode, 190 | 'preproc_function_def': PreprocFunctionDefNode, 191 | 'preproc_def': PreprocDefNode, 192 | 'preproc_arg': PreprocArgNode, 193 | 'parameter_declaration': ParameterDeclarationNode, 194 | 'parameter_list': ParameterListNode, 195 | 'return_statement': ReturnStatementNode, 196 | 'struct_specifier': StructSpecifierNode, 197 | 'subscript_expression': SubscriptExpressionNode, 198 | 'storage_class_specifier': StorageClassSpecifierNode, 199 | 'sized_type_specifier': TypeNode, 200 | 'macro_type_specifier': TypeNode, 201 | 'type_qualifier': TypeQualifierNode, 202 | 'type_identifier': TypeIdentifierNode, 203 | 'unary_expression': UnaryExpressionNode, 204 | 'variadic_parameter': VariadicParameterNode, 205 | 'while_statement': WhileStatementNode, 206 | } 207 | init_func = wrapper_dict[ 208 | ts_node.type] if ts_node.type in wrapper_dict.keys() else BasicNode 209 | return init_func(self.internal_src, ts_node, self.internal_tree) 210 | 211 | def child_by_field_name(self, name: str): 212 | assert (type(name) == str) 213 | return self.make_wrapper(self.internal.child_by_field_name(name)) 214 | 215 | def descendants(self): 216 | """ 217 | Depth-first traverse to collect all descendants of the 218 | current node, the current node itself will not be collected. 219 | """ 220 | 221 | node_lst = [] 222 | cursor = self.internal.walk() 223 | root_node = cursor.node 224 | while True: 225 | if cursor.node != root_node: 226 | node_lst.append(self.make_wrapper(cursor.node)) 227 | if not cursor.goto_first_child(): 228 | while not cursor.goto_next_sibling(): 229 | if not cursor.goto_parent(): 230 | return node_lst 231 | 232 | def descendants_by_type_name(self, name: Union[str, List[str]]): 233 | """ 234 | Depth-first traverse to collect all descendants that satisfy the node_type 235 | requirements. 236 | """ 237 | 238 | if type(name) == str: 239 | name = [name] 240 | 241 | return [_ for _ in self.descendants() if _.node_type in name] 242 | 243 | def tokenize(self) -> List['BasicNode']: 244 | """Tokenize the current code snippet 245 | 246 | Returns: 247 | A list (order-sensitive) of tokens 248 | 249 | """ 250 | 251 | level = 0 252 | node_lst = [] 253 | cursor = self.internal.walk() 254 | while True: 255 | # only append leaf nodes 256 | if not cursor.node.children: 257 | node_lst.append(self.make_wrapper(cursor.node)) 258 | 259 | if not cursor.goto_first_child(): 260 | while not cursor.goto_next_sibling(): 261 | if not cursor.goto_parent(): 262 | return node_lst 263 | else: 264 | level -= 1 265 | else: 266 | level += 1 267 | 268 | def print_tree(self): 269 | """ 270 | Print the parsed tree 271 | """ 272 | 273 | def _tree_nodes(): 274 | level = 0 275 | node_lst = [] 276 | cursor = self.internal.walk() 277 | while True: 278 | node_lst.append((self.make_wrapper(cursor.node), level)) 279 | if not cursor.goto_first_child(): 280 | while not cursor.goto_next_sibling(): 281 | if not cursor.goto_parent(): 282 | return node_lst 283 | else: 284 | level -= 1 285 | else: 286 | level += 1 287 | 288 | nlst = _tree_nodes() 289 | for _n in nlst: 290 | node, level = _n 291 | # prepare the format 292 | indent = " " * 4 * level 293 | t = f"type={node.node_type}" 294 | pos = f"start_point={node.start_point} end_point={node.end_point}" 295 | print(indent, type(node).__name__, t, pos, node.src.__repr__()) 296 | 297 | 298 | # currently only add commonly used type 299 | class TypeNode(BasicNode): 300 | """ 301 | TODO: remove the pointer_level and array_level, use analysis 302 | module to conclude the related property. 303 | """ 304 | 305 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 306 | super().__init__(src, ts_node, ts_tree) 307 | self.pointer_level = 0 308 | self.array_level = 0 309 | 310 | def is_pointer(self): 311 | return self.pointer_level != 0 312 | 313 | def is_array(self): 314 | return self.array_level != 0 315 | 316 | 317 | class TypeIdentifierNode(BasicNode): 318 | """ Wrapper for type_identifier node in tree-sitter 319 | """ 320 | pass 321 | 322 | 323 | class FieldIdentifierNode(BasicNode): 324 | """ Wrapper for field_identifier node in tree-sitter 325 | """ 326 | pass 327 | 328 | 329 | class FieldDeclarationListNode(BasicNode): 330 | """ Wrapper for field_declaration_list node in tree-sitter 331 | 332 | One can get all field_declaration by children() 333 | """ 334 | pass 335 | 336 | 337 | class FieldDeclarationNode(BasicNode): 338 | """ Wrapper for field_declaration node in tree-sitter 339 | """ 340 | 341 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 342 | super().__init__(src, ts_node, ts_tree) 343 | self.type: BasicNode = self.child_by_field_name('type') 344 | # declarator is None for the field declaration of anonymous struct 345 | self.declarator: Optional[BasicNode] = self.child_by_field_name( 346 | 'declarator') 347 | 348 | 349 | class StructSpecifierNode(BasicNode): 350 | """ Wrapper for struct_specifier node in Tree-sitter 351 | """ 352 | 353 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 354 | super().__init__(src, ts_node, ts_tree) 355 | # name may be None for anaonymous struct 356 | self.name: Optional[TypeIdentifierNode] = self.child_by_field_name( 357 | 'name') 358 | self.body: FieldDeclarationListNode = self.child_by_field_name('body') 359 | 360 | def _type_identifier_result(self) -> Optional[str]: 361 | return self.name.src if self.name else None 362 | 363 | 364 | class TypeQualifierNode(BasicNode): 365 | """ Wrapper for type qualifier node in tree-sitter 366 | 367 | A type qualifier is a keyword that is applied to a type, 368 | resulting in a qualified type. 369 | 370 | As of 2014 and C11, there are four type qualifiers in 371 | standard C: const (C89), volatile (C89), restrict (C99) 372 | and _Atomic (C11) 373 | """ 374 | pass 375 | 376 | 377 | class StorageClassSpecifierNode(BasicNode): 378 | """ Wrapper for storage class specifier node in tree-sitter 379 | 380 | Every variable has two properties in C language that are: data 381 | type (int, char, float, etc.) and storage class. The Storage 382 | Class of a variable decides its scope, lifetime, storage location, 383 | and default value. 384 | 385 | There are four storage classes in C language: auto, extern, 386 | static, and register. 387 | """ 388 | 389 | pass 390 | 391 | 392 | class WhileStatementNode(BasicNode): 393 | 394 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 395 | super().__init__(src, ts_node, ts_tree) 396 | self.condition = self.child_by_field_name('condition') 397 | if self.children: 398 | self.body = self.children[-1] 399 | else: 400 | self.body = None 401 | 402 | 403 | class IdentifierNode(BasicNode): 404 | 405 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 406 | super().__init__(src, ts_node, ts_tree) 407 | 408 | def __eq__(self, obj): 409 | return super().__eq__(obj) 410 | 411 | def __hash__(self): 412 | return super().__hash__() 413 | 414 | 415 | class CaseExpressionNode(BasicNode): 416 | 417 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 418 | super().__init__(src, ts_node, ts_tree) 419 | self.type = self.child_by_field_name('type') 420 | self.value = self.child_by_field_name('value') 421 | 422 | 423 | class UnaryExpressionNode(BasicNode): 424 | 425 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 426 | super().__init__(src, ts_node, ts_tree) 427 | self.argument = self.child_by_field_name('argument') 428 | 429 | def used_ids(self): 430 | ids = self.descendants_by_type_name('identifier') 431 | return ids 432 | 433 | 434 | class ConditionalExpressionNode(BasicNode): 435 | 436 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 437 | super().__init__(src, ts_node, ts_tree) 438 | self.condition = self.child_by_field_name('condition') 439 | self.consequence = self.child_by_field_name('consequence') 440 | self.alternative = self.child_by_field_name('alternative') 441 | 442 | 443 | class NumberLiteralNode(BasicNode): 444 | 445 | pass 446 | 447 | 448 | class ReturnStatementNode(BasicNode): 449 | 450 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 451 | super().__init__(src, ts_node, ts_tree) 452 | self.value = self.children[1] if len(self.children) > 1 else None 453 | 454 | 455 | class PreprocFunctionDefNode(BasicNode): 456 | 457 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 458 | super().__init__(src, ts_node, ts_tree) 459 | self.name = self.child_by_field_name('name') 460 | self.parameters = self.child_by_field_name('parameters') 461 | self.value = self.child_by_field_name('value') 462 | 463 | 464 | class ForStatementNode(BasicNode): 465 | 466 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 467 | super().__init__(src, ts_node, ts_tree) 468 | self.initializer = self.child_by_field_name('initializer') 469 | self.condition = self.child_by_field_name('condition') 470 | self.update = self.child_by_field_name('update') 471 | if self.children: 472 | self.body = self.children[-1] 473 | else: 474 | self.body = None 475 | 476 | 477 | class BinaryExpressionNode(BasicNode): 478 | 479 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 480 | super().__init__(src, ts_node, ts_tree) 481 | self.left = self.child_by_field_name('left') 482 | self.right = self.child_by_field_name('right') 483 | self.symbol = self.get_raw(self.internal_src, self.left.end_point, 484 | self.right.start_point) 485 | if self.symbol: 486 | self.symbol = self.symbol.strip() 487 | 488 | def is_logic_op(self): 489 | if self.symbol in ['&&', '||']: 490 | return True 491 | return False 492 | 493 | 494 | class DeclarationNode(BasicNode): 495 | 496 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 497 | super().__init__(src, ts_node, ts_tree) 498 | # TODO: duplicate name, comment temporarily 499 | # self.type = self.child_by_field_name('type') 500 | 501 | # a tricky solution since tree-sitter child_by_field_name 502 | # can only return the first field 503 | # for example, int a,b; returns a. 504 | self.type = self.child_by_field_name('type') 505 | self.declarator = [] 506 | for _c in self.children: 507 | if _c.node_type in [ 508 | 'pointer_declarator', 509 | 'array_declarator', 510 | 'identifier', 511 | 'init_declarator', 512 | 'function_declarator', 513 | ]: 514 | self.declarator.append(_c) 515 | 516 | def declared_identifiers(self) -> List[IdentifierNode]: 517 | ids = [] 518 | for _decl in self.declarator: 519 | unpack_type = [ 520 | 'pointer_declarator', 521 | 'array_declarator', 522 | 'init_declarator', 523 | 'function_declarator', 524 | ] 525 | while _decl.node_type in unpack_type: 526 | _decl = _decl.child_by_field_name('declarator') 527 | assert (_decl.node_type == 'identifier') 528 | ids.append(_decl) 529 | return ids 530 | 531 | 532 | class DoStatementNode(BasicNode): 533 | 534 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 535 | super().__init__(src, ts_node, ts_tree) 536 | self.body = self.child_by_field_name('body') 537 | self.condition = self.child_by_field_name('condition') 538 | 539 | 540 | class ParenthesizedExpressionNode(BasicNode): 541 | 542 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 543 | super().__init__(src, ts_node, ts_tree) 544 | 545 | def remove_parenthese(self): 546 | # children[0] and children[2] is ( and ) 547 | return self.make_wrapper(self.internal.children[1]) 548 | 549 | 550 | class IfStatementNode(BasicNode): 551 | 552 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 553 | from .abstract_node import IfConditionNode 554 | super().__init__(src, ts_node, ts_tree) 555 | self.condition = self.child_by_field_name('condition') 556 | # By AbstractNode, we assume there is a node with the type 557 | # if_condition, which is actually non-exist in tree-sitter. 558 | self.condition_abs = IfConditionNode(self.condition) 559 | self.consequence = self.child_by_field_name('consequence') 560 | self.alternative = self.child_by_field_name('alternative') 561 | 562 | def common_entry_constraints(self): 563 | return self.condition_abs.common_entry_constraints() 564 | 565 | def entry_constraints(self): 566 | return self.condition_abs.entry_constraints() 567 | 568 | 569 | class ParameterDeclarationNode(BasicNode): 570 | """ The wrapper of the parameter_declaration node in tree-sitter. 571 | 572 | Attributes: 573 | type (TypeNode): the type of the parameter. 574 | declarator (Optional[BasicNode]): the declarator of the parameter, 575 | this can be None, e.g. int func(void), or the declaration 576 | int func(int). 577 | """ 578 | 579 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 580 | super().__init__(src, ts_node, ts_tree) 581 | 582 | # fields of the tree-sitter node 583 | self.type = self.child_by_field_name('type') 584 | self.declarator = self.child_by_field_name('declarator') 585 | 586 | @property 587 | def type_qualifier(self) -> List[TypeQualifierNode]: 588 | """ 589 | get the type qualifier of the parameter 590 | """ 591 | 592 | return [_c for _c in self.children if isinstance(_c, TypeQualifierNode)] 593 | 594 | @property 595 | def storage_class_specifier(self) -> List[StorageClassSpecifierNode]: 596 | """ 597 | get the storage class specifier of the parameter 598 | """ 599 | 600 | return [ 601 | _c for _c in self.children 602 | if isinstance(_c, StorageClassSpecifierNode) 603 | ] 604 | 605 | @property 606 | def name(self) -> Optional[IdentifierNode]: 607 | """ 608 | try to analyse the name of the parameter 609 | """ 610 | declarator = self.declarator 611 | # self.declarator maybe None, e.g. int func(void) 612 | while True and declarator: 613 | # int func(int *a) 614 | if declarator.node_type == 'pointer_declarator': 615 | declarator = declarator.child_by_field_name('declarator') 616 | # mainly in declaration such as int func(int *) 617 | elif declarator.node_type == 'abstract_pointer_declarator': 618 | return None 619 | # int func(int (*a)()) 620 | elif declarator.node_type == 'function_declarator': 621 | declarator = declarator.child_by_field_name('declarator') 622 | # int func(int (*a)()) 623 | elif declarator.node_type == 'parenthesized_declarator': 624 | declarator = declarator.children[0] 625 | # int func(int a[]) 626 | elif declarator.node_type == 'array_declarator': 627 | declarator = declarator.child_by_field_name('declarator') 628 | # int func(int a) 629 | elif declarator.node_type == 'identifier': 630 | return declarator 631 | else: 632 | assert (False) 633 | return None 634 | 635 | 636 | class ParameterListNode(BasicNode): 637 | """ The wrapper of the parameter_list node in tree-sitter. 638 | 639 | ParameterListNode is a collection of ParameterDeclarationNode. 640 | You can visit the children of ParameterListNode by the attributes 641 | . 642 | """ 643 | 644 | pass 645 | 646 | 647 | class FunctionDeclaratorNode(BasicNode): 648 | """ wrapper of the function_declarator node in tree-sitter 649 | 650 | Attributes: 651 | declarator (BasicNode): declarator 652 | parameters (ParameterListNode): parameters 653 | """ 654 | 655 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 656 | super().__init__(src, ts_node, ts_tree) 657 | self.declarator: BasicNode = self.child_by_field_name('declarator') 658 | self.parameters: ParameterListNode = self.child_by_field_name( 659 | 'parameters') 660 | 661 | 662 | class FunctionDefinitionNode(BasicNode): 663 | """Wrapper class of the function_definition node in tree-sitter 664 | 665 | Attributes: 666 | type (TypeNode): the type of the function 667 | declarator (FunctionDeclaratorNode): the declarator of the function 668 | body (CompoundStatementNode): the body of the function 669 | 670 | TODO: name may be None under some cases 671 | """ 672 | 673 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 674 | super().__init__(src, ts_node, ts_tree) 675 | self.type: TypeNode = self.child_by_field_name('type') 676 | self.declarator: FunctionDeclaratorNode = self.child_by_field_name( 677 | 'declarator') 678 | self.body: CompoundStatementNode = self.child_by_field_name('body') 679 | 680 | def _identifier_result(self) -> Optional[str]: 681 | return self.name.src 682 | 683 | def __get_nest_function_declarator(self) -> FunctionDeclaratorNode: 684 | """ 685 | function_declarator may be nested, e.g. int (*func(int a))(int b). 686 | This function find the innermost one, i.e., func(int a). 687 | """ 688 | declarator = self.declarator 689 | last_function_declarator = declarator 690 | # try to find out the nest function_declarator 691 | while True: 692 | if declarator.node_type == 'function_declarator': 693 | last_function_declarator = declarator 694 | declarator = declarator.child_by_field_name('declarator') 695 | elif declarator.node_type == 'parenthesized_declarator': 696 | declarator = declarator.children[0] 697 | elif declarator.node_type == 'pointer_declarator': 698 | declarator = declarator.child_by_field_name('declarator') 699 | elif declarator.node_type == 'identifier': 700 | break 701 | else: 702 | assert (0) 703 | return last_function_declarator 704 | 705 | @property 706 | def name(self) -> IdentifierNode: 707 | """ 708 | conclude the name of the function 709 | """ 710 | 711 | return self.__get_nest_function_declarator().child_by_field_name( 712 | 'declarator') 713 | 714 | @property 715 | def parameters(self) -> ParameterListNode: 716 | """ conclude the parameters of the function 717 | 718 | The node function_declarator in tree-sitter has two fields: 719 | declarator and parameters. However, the field parameters is 720 | not strictly the real parameters of the current function. 721 | For example, for the function `int (*bar(int a))(int)`, the 722 | field parameters of the function_declarator `(*bar(int a))(int)` 723 | is `(int)`, while `int a` is the real parameter of the function. 724 | """ 725 | 726 | return self.__get_nest_function_declarator().child_by_field_name( 727 | 'parameters') 728 | 729 | @property 730 | def type_qualifier(self) -> List[TypeQualifierNode]: 731 | """ 732 | get the type qualifier of the parameter 733 | """ 734 | 735 | return [_c for _c in self.children if isinstance(_c, TypeQualifierNode)] 736 | 737 | @property 738 | def storage_class_specifier(self) -> List[StorageClassSpecifierNode]: 739 | """ 740 | get the storage class specifier of the parameter 741 | """ 742 | 743 | return [ 744 | _c for _c in self.children 745 | if isinstance(_c, StorageClassSpecifierNode) 746 | ] 747 | 748 | @property 749 | def static(self) -> bool: 750 | """ 751 | Whether the function is static 752 | """ 753 | storage_class_specifiers = [_.src for _ in self.children] 754 | return True if 'static' in storage_class_specifiers else False 755 | 756 | @property 757 | def inline(self) -> bool: 758 | """ 759 | Whether the function is inline 760 | """ 761 | storage_class_specifiers = [_.src for _ in self.children] 762 | return True if 'inline' in storage_class_specifiers else False 763 | 764 | 765 | class SubscriptExpressionNode(BasicNode): 766 | 767 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 768 | super().__init__(src, ts_node, ts_tree) 769 | self.argument = self.child_by_field_name('argument') 770 | self.index = self.child_by_field_name('index') 771 | 772 | 773 | class CallExpressionNode(BasicNode): 774 | 775 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 776 | super().__init__(src, ts_node, ts_tree) 777 | self.function = self.child_by_field_name('function') 778 | self.arguments = self.child_by_field_name( 779 | 'arguments').children # at least ( and ) as children 780 | # filter out bracket 781 | self.arguments = [ 782 | _a for _a in self.arguments if _a.src not in ['(', ')'] 783 | ] 784 | 785 | def is_indirect(self) -> bool: 786 | """ whether the invocation is the indirect call 787 | """ 788 | return not isinstance(self.function, IdentifierNode) 789 | 790 | 791 | class ExpressionStatementNode(BasicNode): 792 | 793 | pass 794 | 795 | 796 | class AssignmentExpressionNode(BasicNode): 797 | 798 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 799 | super().__init__(src, ts_node, ts_tree) 800 | self.left = self.child_by_field_name('left') 801 | self.right = self.child_by_field_name('right') 802 | 803 | @property 804 | def symbol(self) -> str: 805 | """ 806 | Return the symbol of the assignment expression, e.g., 807 | a |= b owns the symbol |=. 808 | """ 809 | 810 | symbol_start = self.left.internal.end_byte 811 | symbol_end = self.right.internal.start_byte 812 | return self.internal_src[symbol_start + 1:symbol_end].strip() 813 | 814 | 815 | class InitDeclaratorNode(BasicNode): 816 | 817 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 818 | super().__init__(src, ts_node, ts_tree) 819 | self.declarator = self.child_by_field_name('declarator') 820 | self.value = self.child_by_field_name('value') 821 | 822 | 823 | class PreprocArgNode(BasicNode): 824 | 825 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 826 | super().__init__(src, ts_node, ts_tree) 827 | # Tree-sitter parser will include the useless spaces 828 | # in preproc_arg, thus we do strip() for src. 829 | self.src = self.src.strip() 830 | 831 | 832 | class PreprocDefNode(BasicNode): 833 | 834 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 835 | super().__init__(src, ts_node, ts_tree) 836 | self.name = self.child_by_field_name('name') 837 | self.value = self.child_by_field_name('value') 838 | 839 | def __str__(self): 840 | return f'#define {self.name} {self.value}' 841 | 842 | def __repr__(self): 843 | return f'#define {self.name} {self.value}' 844 | 845 | 846 | class EnumSpecifierNode(BasicNode): 847 | """ 848 | wrapper of the tree-sitter node with the type 849 | 850 | Note that EnumSpecifierNode has different formats: 851 | 852 | 1. enum Hash { A, B}; -> the whole string except ';' is EnumSpecifierNode 853 | 2. void func(enum Hash h); -> is EnumSpecifierNode, currently 854 | this node has no filed 855 | 856 | """ 857 | 858 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 859 | super().__init__(src, ts_node, ts_tree) 860 | self.name: Optional[TypeIdentifierNode] = self.child_by_field_name( 861 | 'name') 862 | # inner property 863 | self._body: EnumeratorListNode = self.child_by_field_name('body') 864 | self.kv: dict = self._body.kv if self._body else dict() 865 | 866 | def _type_identifier_result(self) -> Optional[str]: 867 | return self.name.src if self.name else None 868 | 869 | def unsolved_value(self): 870 | """ 871 | helper function of conclude_value, outputs 872 | the symbols representing unknown values. One 873 | needs to provide the values of these symbols 874 | to perform conclude_value. 875 | """ 876 | lst = [] 877 | for _k, _v in self.kv.items(): 878 | if _v and not isinstance(_v, NumberLiteralNode): 879 | lst.append(_v) 880 | return lst 881 | 882 | def conclude_value(self, value_dic=dict()) -> dict: 883 | """ 884 | While self.kv provides the literal enum key and 885 | value, conclude_value tries to conclude the actual 886 | values that keys represent. 887 | 888 | For example, for the following enumeration: 889 | enum A { 890 | A1 = MACRO1, // #define MACRO1 1 891 | A2, 892 | }; 893 | 894 | self.kv equals to {A1: MACRO1, A2: None}. 895 | 896 | However, with the value of MACRO1 provided by the 897 | parameter value_dic, conclude_value will output the 898 | dict like {A1: 1, A2: 2}. 899 | """ 900 | 901 | def solve(val) -> Optional[int]: 902 | # the types of the values of the output dic are int and None 903 | if isinstance(val, NumberLiteralNode): 904 | return int(str(val)) 905 | elif val in value_dic.keys(): 906 | return int(value_dic[val]) 907 | else: 908 | return None 909 | 910 | # 1. conclude the value of each item 911 | # 1.1 sort the items 912 | k_lst: List[BasicNode] = [] 913 | for _k in self.kv.keys(): 914 | if not k_lst: 915 | k_lst.append(_k) 916 | continue 917 | 918 | insert_idx = -1 919 | for _id, _ in enumerate(k_lst): 920 | if not _.in_front(_k): 921 | insert_idx = _id 922 | break 923 | # when _k is behind all elements in k_lst 924 | if insert_idx == -1: 925 | insert_idx = len(k_lst) 926 | k_lst.insert(insert_idx, _k) 927 | 928 | # 1.2 conclude the value 929 | dic: Dict[BasicNode, Optional[int]] = dict() 930 | for _id, _k in enumerate(k_lst): 931 | _v = self.kv[_k] 932 | if _id == 0 and _v is None: 933 | _v = 0 934 | elif _v is not None: 935 | _v = solve(_v) 936 | # query the previous element 937 | elif _v is None: 938 | pre_ele = dic[k_lst[_id - 1]] 939 | _v = pre_ele + 1 if type(pre_ele) == int else None 940 | else: 941 | assert (False) 942 | dic[_k] = _v 943 | return dic 944 | 945 | 946 | class EnumeratorListNode(BasicNode): 947 | 948 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 949 | super().__init__(src, ts_node, ts_tree) 950 | self.enumerator = self.descendants_by_type_name('enumerator') 951 | self.kv = dict() 952 | for _e in self.enumerator: 953 | self.kv[_e.name] = _e.value 954 | 955 | 956 | class EnumeratorNode(BasicNode): 957 | 958 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 959 | super().__init__(src, ts_node, ts_tree) 960 | self.name = self.child_by_field_name('name') 961 | self.value = self.child_by_field_name('value') 962 | 963 | 964 | class VariadicParameterNode(BasicNode): 965 | 966 | pass 967 | 968 | 969 | class CompoundStatementNode(BasicNode): 970 | """ The wrapper of tree-sitter node 'compound_statement'. 971 | 972 | CompoundStatementNode is a collection of nodes. For 973 | example, it is used to represent the body of the function 974 | and loop node. 975 | 976 | Attributes: 977 | statements (list): the nodes contained in compound statement 978 | 979 | """ 980 | 981 | def __init__(self, src: str, ts_node=None, ts_tree=None) -> None: 982 | super().__init__(src, ts_node, ts_tree) 983 | self.statements = self.children 984 | -------------------------------------------------------------------------------- /cinspector/nodes/edit.py: -------------------------------------------------------------------------------- 1 | """ 2 | Edit nodes 3 | """ 4 | 5 | from __future__ import annotations 6 | from typing import Tuple, List, Optional 7 | from .basic_node import BasicNode 8 | from .node import Util 9 | 10 | 11 | class EditPos: 12 | 13 | def __init__(self, start: int, end: int, new_snippet: str) -> None: 14 | self.start = start 15 | self.end = end 16 | self.new_snippet = new_snippet 17 | 18 | 19 | def edit_str(s: str, edits: List[EditPos]) -> Optional[str]: 20 | 21 | edits = sorted(edits, key=lambda x: x.start) 22 | 23 | for i in range(len(edits)): 24 | 25 | # check whether there is overlap in the edits 26 | if i != len(edits) - 1 and edits[i].end > edits[i + 1].start: 27 | return None 28 | 29 | # check whether the start and end are within the string 30 | if edits[i].start < 0 or edits[i].end > len(s): 31 | return None 32 | 33 | # start replacing 34 | s_lst = list(s) 35 | gap_cnt = 0 36 | for edit in edits: 37 | s_lst[edit.start + gap_cnt:edit.end + gap_cnt] = list(edit.new_snippet) 38 | gap = len(edit.new_snippet) - (edit.end - edit.start) 39 | gap_cnt += gap 40 | 41 | return ''.join(s_lst) 42 | 43 | 44 | class Edit(Util): 45 | """ Edit the BasicNode 46 | 47 | This class is designed for editing the BasicNode, 48 | including adding, deleting, and replacing the children. 49 | 50 | """ 51 | 52 | def __init__(self, target: BasicNode): 53 | self.target: BasicNode = target 54 | 55 | def is_child(self, child: BasicNode) -> bool: 56 | """ 57 | whether the child node is the child of self.target, 58 | check by 1. same tree_sitter.Tree 2. start and end point 59 | """ 60 | 61 | if self.target.internal_tree != child.internal_tree: 62 | return False 63 | 64 | # we don't treat the node itself as its child 65 | if self.target.start_point == child.start_point \ 66 | and self.target.end_point == child.end_point: 67 | return False 68 | 69 | def is_infront(a1: Tuple[int, int], a2: Tuple[int, int]) -> bool: 70 | """ 71 | whether a1 is infront of a2, leverage the default 72 | tuple comparison 73 | """ 74 | 75 | return a1 <= a2 76 | 77 | return is_infront(self.target.start_point, child.start_point) \ 78 | and is_infront(child.end_point, self.target.end_point) 79 | 80 | def _remove_child_src(self, child: BasicNode) -> str: 81 | """ 82 | remove child's source code from self.target.internal_src 83 | 84 | Returns: 85 | the new source code 86 | """ 87 | 88 | internal_src = self.target.internal_src 89 | start_byte = child.internal.start_byte 90 | end_byte = child.internal.end_byte 91 | return internal_src[:start_byte] + internal_src[end_byte:] 92 | 93 | def remove_child(self, child: BasicNode): 94 | """ Remove the child from the target node 95 | 96 | Args: 97 | child: the child to be removed 98 | """ 99 | 100 | # check whether child's position is legal 101 | assert (self.is_child(child)) 102 | 103 | # remove the child from the children list 104 | new_src = self._remove_child_src(child) 105 | tree = self.target.internal_tree 106 | tree.edit( 107 | start_byte=child.internal.start_byte, 108 | old_end_byte=child.internal.end_byte, 109 | new_end_byte=child.internal.start_byte, 110 | start_point=child.internal.start_point, 111 | old_end_point=child.internal.end_point, 112 | new_end_point=child.internal.start_point, 113 | ) 114 | parser = self.get_parser() 115 | new_tree = parser.parse(bytes(new_src, 'utf8'), tree) 116 | """ 117 | relocate to the target node, since we delete the child 118 | of self.target, the start_point of self.target should 119 | be unchanged. 120 | """ 121 | old_start_point = self.target.start_point 122 | node_type = self.target.node_type 123 | # construct the BasicNode 124 | bn = BasicNode(new_src, new_tree.root_node, new_tree) 125 | target = None 126 | for _ in bn.descendants_by_type_name(node_type): 127 | if _.start_point == old_start_point: 128 | target = _ 129 | break 130 | assert (target is not None) 131 | return target 132 | -------------------------------------------------------------------------------- /cinspector/nodes/node.py: -------------------------------------------------------------------------------- 1 | """ 2 | Make a wrapper of tree-sitter node 3 | """ 4 | 5 | from __future__ import annotations 6 | import os 7 | from functools import cmp_to_key 8 | from typing import Dict, Optional, Iterable, TYPE_CHECKING 9 | if TYPE_CHECKING: 10 | from .basic_node import BasicNode 11 | from tree_sitter import Language, Parser 12 | 13 | 14 | class Util(): 15 | """ 16 | 17 | Methods: 18 | sort_nodes(nodes: Iterable, reverse: bool = False): sort the nodes by 19 | their position in internal_src. 20 | """ 21 | 22 | def get_parser(self): 23 | abs_path = os.path.abspath(__file__) 24 | dire = os.path.dirname(abs_path) 25 | C_LANGUAGE = Language(f'{dire}/../cinspector-tree-sitter.so', 'c') 26 | parser = Parser() 27 | parser.set_language(C_LANGUAGE) 28 | return parser 29 | 30 | def get_tree(self, src: str): 31 | parser = self.get_parser() 32 | tree = parser.parse(bytes(src, 'utf8')) 33 | return tree 34 | 35 | def get_cursor(self, src: str): 36 | parser = self.get_parser() 37 | tree = parser.parse(bytes(src, 'utf8')) 38 | return tree.walk() 39 | 40 | @staticmethod 41 | def sort_nodes(nodes: Iterable, reverse: bool = False) -> Iterable: 42 | """ Sort the instances of BasicNode by their position in source code 43 | 44 | Args: 45 | nodes (Iterable): nodes waiting for sorting 46 | reverse (bool=False): use descending instead of ascending 47 | 48 | Return: 49 | sorted Iterable object 50 | """ 51 | 52 | def cmp_position(node1: BasicNode, node2: BasicNode) -> int: 53 | if node1.start_point[0] < node2.start_point[0] or \ 54 | (node1.start_point[0] == node2.start_point[0] and node1.start_point[1] < node2.start_point[1]): 55 | return -1 56 | else: 57 | return 1 58 | 59 | sorted_nodes = sorted(nodes, 60 | key=cmp_to_key(cmp_position), 61 | reverse=reverse) 62 | return sorted_nodes 63 | 64 | @staticmethod 65 | def get_raw(s: str, start: tuple, end: tuple) -> Optional[str]: 66 | """ extracts from s the string fragment specified by the points start and end 67 | 68 | Args: 69 | s (str): a string 70 | start (tuple): (row, column), specify the start of the fragment 71 | end (tuple): (row, column), specify the end of the fragment 72 | 73 | Return: 74 | the extracted string, or None if it fails 75 | """ 76 | 77 | lst = s.split('\n') 78 | s_row, s_col = start 79 | e_row, e_col = end 80 | 81 | if s_row > e_row or (s_row == e_row and s_col >= e_col): 82 | return None 83 | 84 | # potential bug: corresponding line does not have enough character 85 | if s_row == e_row: 86 | return lst[s_row][s_col:e_col] 87 | elif s_row + 1 == e_row: 88 | return lst[s_row][s_col:] + '\n' + lst[e_row][:e_col] 89 | else: 90 | return lst[s_row][s_col:] \ 91 | + '\n'.join(lst[s_row+1:e_row]) \ 92 | + lst[e_row][:e_col] 93 | 94 | @staticmethod 95 | def get_node_raw(s: str, node): 96 | if not node: 97 | return None 98 | return Util.get_raw(s, node.start_point, node.end_point) 99 | 100 | @staticmethod 101 | def point2index(s: str, row: int, col: int) -> Optional[int]: 102 | """ return the character index at the specified row and column in the string s. 103 | 104 | Args: 105 | s (str): a string 106 | row (int): row, start from 0 107 | col (int): column, start from 0 108 | 109 | Return: 110 | the character index, or None if it fails 111 | """ 112 | 113 | lines = s.split('\n') 114 | if row < 0 or row >= len(lines): 115 | return None 116 | if col < 0 or col >= len(lines[row]): 117 | return None 118 | 119 | index = sum(len(line) + 1 for line in lines[:row]) 120 | return index + col 121 | 122 | 123 | class Query(): 124 | """ Access the specific nodes in the source code 125 | 126 | Query is used to access the nodes with specific properties in the 127 | source code. For example, find the enumeration with the type identifier 128 | "weekdays". To implement this, we let EnumSpecifierNode inherit from 129 | Query and implement the __type_identifier_result method, i.e., returns the 130 | field . The class in interface such as CCode will gather 131 | all the EnumSpecifierNode and check the query method to find 132 | the ideal node. 133 | 134 | Attributes: 135 | mapping: a dictionary that maps the query key to the method 136 | 137 | Methods: 138 | query: query the node with the given query 139 | """ 140 | 141 | def __init__(self): 142 | pass 143 | 144 | def query(self, query: Dict[str, str]) -> bool: 145 | """ Query the node with the given query 146 | 147 | Args: 148 | query: the query to be executed 149 | 150 | Returns: 151 | True if the node satisfies the query, otherwise False 152 | """ 153 | mapping = { 154 | 'type_identifier': self._type_identifier_result, 155 | 'identifier': self._identifier_result, 156 | } 157 | 158 | for key, value in query.items(): 159 | if not mapping[key]() == value: 160 | return False 161 | return True 162 | 163 | def _identifier_result(self) -> Optional[str]: 164 | raise NotImplementedError 165 | 166 | def _type_identifier_result(self) -> Optional[str]: 167 | raise NotImplementedError 168 | 169 | 170 | class Node(): 171 | """ The root calss of all nodes 172 | 173 | In general, there are three types of nodes. Node is 174 | the root class of all nodes while both AbstractNode 175 | and BasicNode are the direct children of Node. 176 | 177 | AbstractNode represents the logical node in the source code. 178 | It does not correspond to a exactly same element in the source code. 179 | We design AbstractNode mainly for the needs of program analysis. 180 | 181 | BasicNode is the base class of a series of nodes that 182 | correspond to the actually existing elements in the source 183 | code. 184 | """ 185 | pass 186 | -------------------------------------------------------------------------------- /cinspector/parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from cinspector.interfaces import CCode 3 | 4 | 5 | def main(): 6 | parser = argparse.ArgumentParser(description='Print the parsed tree') 7 | group = parser.add_mutually_exclusive_group(required=True) 8 | group.add_argument("-f", "--file", action="store_true") 9 | group.add_argument("-s", "--string", action="store_true") 10 | parser.add_argument("target", type=str) 11 | args = parser.parse_args() 12 | 13 | target = args.target 14 | content = target 15 | if args.file: 16 | with open(target, 'r', errors='ignore') as r: 17 | content = r.read() 18 | 19 | cc = CCode(content) 20 | cc.node.print_tree() 21 | 22 | 23 | if __name__ == '__main__': 24 | main() 25 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeiweiHu/cinspector/e827cfd9fccb1c5d7c4e0169c0da0bd2006de63b/docs/_static/.gitkeep -------------------------------------------------------------------------------- /docs/_templates/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeiweiHu/cinspector/e827cfd9fccb1c5d7c4e0169c0da0bd2006de63b/docs/_templates/.gitkeep -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sphinx_rtd_theme 15 | import sys 16 | sys.path.insert(0, os.path.abspath('..')) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = 'cinspector' 22 | copyright = '2023, Peiwei Hu' 23 | author = 'Peiwei Hu' 24 | 25 | 26 | # -- General configuration --------------------------------------------------- 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | 'sphinx.ext.githubpages', 33 | 'sphinx.ext.todo', 34 | 'sphinx.ext.autodoc', 35 | 'sphinx.ext.viewcode', 36 | 'myst_parser', 37 | 'sphinx.ext.napoleon', 38 | ] 39 | 40 | # Add any paths that contain templates here, relative to this directory. 41 | templates_path = ['_templates'] 42 | 43 | # List of patterns, relative to source directory, that match files and 44 | # directories to ignore when looking for source files. 45 | # This pattern also affects html_static_path and html_extra_path. 46 | exclude_patterns = [] 47 | 48 | 49 | # -- Options for HTML output ------------------------------------------------- 50 | 51 | # The theme to use for HTML and HTML Help pages. See the documentation for 52 | # a list of builtin themes. 53 | # 54 | html_theme = 'sphinx_rtd_theme' 55 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 56 | 57 | # Add any paths that contain custom static files (such as style sheets) here, 58 | # relative to this directory. They are copied after the builtin static files, 59 | # so a file named "default.css" will overwrite the builtin "default.css". 60 | html_static_path = ['_static'] 61 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. cinspector documentation master file, created by 2 | sphinx-quickstart on Wed Feb 8 11:34:57 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to cinspector's documentation! 7 | ====================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | Introduction 14 | Interfaces 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/md/introduction.md: -------------------------------------------------------------------------------- 1 | # cinspector 2 | 3 | A static analysis framework for C. 4 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel", "tree-sitter==0.20.4"] 3 | -------------------------------------------------------------------------------- /script/check.sh: -------------------------------------------------------------------------------- 1 | # chdir 2 | ROOT_FOLDER=$(cd "$(dirname "$0")";pwd)"/../" 3 | cd $ROOT_FOLDER 4 | # yapf check 5 | yapf -d -e docs --style=google --recursive . 6 | # mypy check 7 | mypy ./cinspector 8 | -------------------------------------------------------------------------------- /script/rebuild.sh: -------------------------------------------------------------------------------- 1 | # chdir 2 | ROOT_FOLDER=$(cd "$(dirname "$0")";pwd)"/../" 3 | cd $ROOT_FOLDER 4 | pip uninstall cinspector 5 | pip install . 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from setuptools import setup, find_packages 4 | from setuptools.command.install import install 5 | 6 | 7 | def _gen_parser(dir_path): 8 | from subprocess import check_call 9 | check_call([sys.executable, '_gen_parser.py'], 10 | cwd=os.path.join(dir_path, 'cinspector')) 11 | 12 | 13 | class CinspectorInstallCmd(install): 14 | 15 | def run(self): 16 | install.run(self) 17 | self.execute(_gen_parser, (self.install_lib,), 18 | msg="Generate the parser") 19 | 20 | 21 | with open('README.md') as f: 22 | long_description = f.read() 23 | 24 | setup(name='cinspector', 25 | version='0.0.1', 26 | author="Peiwei Hu", 27 | author_email='jlu.hpw@foxmail.com', 28 | description='A static C source code analysis framework', 29 | long_description=long_description, 30 | long_description_content_type='text/markdown', 31 | packages=find_packages(), 32 | include_package_data=True, 33 | url='https://github.com/PeiweiHu/cinspector', 34 | install_requires=[ 35 | 'networkx', 36 | 'tree-sitter==0.20.4', 37 | ], 38 | entry_points={ 39 | 'console_scripts': ['cinspector-parser = cinspector.parser:main',] 40 | }, 41 | tests_require=[ 42 | 'pytest', 43 | ], 44 | classifiers=[ 45 | "Programming Language :: Python :: 3", 46 | "Operating System :: OS Independent", 47 | ], 48 | license="WTFPL", 49 | python_requires='>=3.6', 50 | cmdclass={ 51 | 'install': CinspectorInstallCmd, 52 | }) 53 | -------------------------------------------------------------------------------- /test/test_Util.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import BasicNode, Util 3 | 4 | SRC = """ 5 | int a, b, c = 1; 6 | int d; 7 | int e; 8 | """ 9 | 10 | 11 | class TestSort: 12 | 13 | def test_A(self): 14 | cc = CCode(SRC) 15 | decls = cc.get_by_type_name('declaration') 16 | assert (len(decls) == 3) 17 | 18 | ids = [] 19 | for _d in decls: 20 | ids += _d.declared_identifiers() 21 | 22 | id_dic = dict() 23 | for _id in ids: 24 | id_dic[_id.src] = _id 25 | for _ in ['a', 'b', 'c', 'd', 'e']: 26 | assert (_ in id_dic.keys()) 27 | 28 | lst = [id_dic['b'], id_dic['e'], id_dic['c'], id_dic['a'], id_dic['d']] 29 | ascending = Util.sort_nodes(lst) 30 | assert (ascending[0].src == 'a') 31 | assert (ascending[1].src == 'b') 32 | assert (ascending[2].src == 'c') 33 | assert (ascending[3].src == 'd') 34 | assert (ascending[4].src == 'e') 35 | descending = Util.sort_nodes(lst, reverse=True) 36 | assert (descending[4].src == 'a') 37 | assert (descending[3].src == 'b') 38 | assert (descending[2].src == 'c') 39 | assert (descending[1].src == 'd') 40 | assert (descending[0].src == 'e') 41 | 42 | 43 | class TestPoint2Index: 44 | 45 | def test_A(self): 46 | s = "abcs\n12345" 47 | point = (0, 0) 48 | index = Util.point2index(s, point[0], point[1]) 49 | assert (index == 0) 50 | 51 | point = (0, 4) 52 | index = Util.point2index(s, point[0], point[1]) 53 | assert (index is None) 54 | 55 | point = (-1, 4) 56 | index = Util.point2index(s, point[0], point[1]) 57 | assert (index is None) 58 | 59 | point = (2, 4) 60 | index = Util.point2index(s, point[0], point[1]) 61 | assert (index is None) 62 | 63 | point = (1, 4) 64 | index = Util.point2index(s, point[0], point[1]) 65 | assert (index == 9) 66 | -------------------------------------------------------------------------------- /test/test_analysis_CFG.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.analysis import CFG, BaseCFG 3 | 4 | SRC1 = """ 5 | void a(int p) { 6 | int b = 1; 7 | while (b < 10) { 8 | b++; 9 | if (b == 8) { 10 | goto label1; 11 | } 12 | } 13 | for (int i = 0; i < 10; i++) { 14 | b--; 15 | } 16 | return; 17 | label1: 18 | printf("6"); 19 | return; 20 | } 21 | """ 22 | 23 | 24 | def test_A(capsys): 25 | cc = CCode(SRC1) 26 | func = cc.get_by_type_name('function_definition')[0] 27 | cfg = CFG(func) 28 | for path in cfg.execution_path(): 29 | print('=' * 10) 30 | for p in path: 31 | print(p) 32 | captured = capsys.readouterr() 33 | assert captured.out == """========== 34 | int b = 1; 35 | [while][Y]((b < 10)) 36 | b++; 37 | [if][Y](b == 8) 38 | goto label1; 39 | printf("6"); 40 | return; 41 | ========== 42 | int b = 1; 43 | [while][Y]((b < 10)) 44 | b++; 45 | [if][N](b == 8) 46 | [for][Y](int i = 0; i < 10; i++) 47 | b--; 48 | return; 49 | ========== 50 | int b = 1; 51 | [while][Y]((b < 10)) 52 | b++; 53 | [if][N](b == 8) 54 | [for][N](int i = 0; i < 10; i++) 55 | return; 56 | ========== 57 | int b = 1; 58 | [while][N]((b < 10)) 59 | [for][Y](int i = 0; i < 10; i++) 60 | b--; 61 | return; 62 | ========== 63 | int b = 1; 64 | [while][N]((b < 10)) 65 | [for][N](int i = 0; i < 10; i++) 66 | return; 67 | """ 68 | 69 | 70 | SRC2 = """ 71 | int b = 1; 72 | do { 73 | if (b == 1) { 74 | c(0); 75 | } else if (b == 2) { 76 | c(2); 77 | } else { 78 | c(3); 79 | } 80 | } while (b < 10); 81 | 82 | if (b == 1) { 83 | printf("6"); 84 | } 85 | """ 86 | 87 | 88 | def test_B(capsys): 89 | cc = CCode(SRC2) 90 | stmts = cc.node.children 91 | cfg = BaseCFG(stmts) 92 | for path in cfg.execution_path(): 93 | print('*' * 10) 94 | for p in path: 95 | print(p) 96 | captured = capsys.readouterr() 97 | assert captured.out == """********** 98 | int b = 1; 99 | [if][Y](b == 1) 100 | c(0); 101 | [do-while][]((b < 10)) 102 | [if][Y](b == 1) 103 | printf("6"); 104 | ********** 105 | int b = 1; 106 | [if][Y](b == 1) 107 | c(0); 108 | [do-while][]((b < 10)) 109 | [if][N](b == 1) 110 | ********** 111 | int b = 1; 112 | [if][N](b == 1) 113 | [if][Y](b == 2) 114 | c(2); 115 | [do-while][]((b < 10)) 116 | [if][Y](b == 1) 117 | printf("6"); 118 | ********** 119 | int b = 1; 120 | [if][N](b == 1) 121 | [if][Y](b == 2) 122 | c(2); 123 | [do-while][]((b < 10)) 124 | [if][N](b == 1) 125 | ********** 126 | int b = 1; 127 | [if][N](b == 1) 128 | [if][N](b == 2) 129 | c(3); 130 | [do-while][]((b < 10)) 131 | [if][Y](b == 1) 132 | printf("6"); 133 | ********** 134 | int b = 1; 135 | [if][N](b == 1) 136 | [if][N](b == 2) 137 | c(3); 138 | [do-while][]((b < 10)) 139 | [if][N](b == 1) 140 | """ 141 | -------------------------------------------------------------------------------- /test/test_analysis_CallGraph.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.analysis import CallGraph 3 | from cinspector.nodes import CompoundStatementNode, DeclarationNode, IfStatementNode 4 | 5 | SRC = """ 6 | void a(int p) { 7 | b(0); 8 | } 9 | 10 | void b(int p) { 11 | c(1, 2); 12 | } 13 | 14 | void c(int p1, int p2) { 15 | a(0); 16 | } 17 | 18 | void d() { 19 | b(1); 20 | c(1, 2); 21 | } 22 | 23 | void e() {} 24 | """ 25 | 26 | 27 | def get_func(name, funcs): 28 | for _ in funcs: 29 | if _.name.src == name: 30 | return _ 31 | return None 32 | 33 | 34 | class TestCallGraph: 35 | 36 | def test_A(self): 37 | cc = CCode(SRC) 38 | funcs = cc.get_by_type_name('function_definition') 39 | fa = get_func('a', funcs) 40 | fb = get_func('b', funcs) 41 | fc = get_func('c', funcs) 42 | fd = get_func('d', funcs) 43 | fe = get_func('e', funcs) 44 | assert (fa and fb and fc and fd and fe) 45 | cg = CallGraph(funcs).analysis() 46 | assert (len(cg.nodes) == 5) 47 | assert (cg.has_edge(fa, fb)) 48 | assert (cg.has_edge(fb, fc)) 49 | assert (cg.has_edge(fc, fa)) 50 | assert (cg.has_edge(fd, fb)) 51 | assert (cg.has_edge(fd, fc)) 52 | assert (cg.has_node(fe)) 53 | -------------------------------------------------------------------------------- /test/test_node_AssignmentExpressionNode.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import AssignmentExpressionNode, Util 3 | 4 | SRC = """ 5 | void funcA() { 6 | int a, b, c = 1; 7 | a = b; 8 | b += c; 9 | c |= a; 10 | } 11 | """ 12 | 13 | 14 | class TestAssignmentNode: 15 | 16 | def test_A(self): 17 | cc = CCode(SRC) 18 | # locate funcA 19 | assignmment_exps = cc.get_by_type_name('assignment_expression') 20 | assignmment_exps = Util.sort_nodes(assignmment_exps) 21 | assert (assignmment_exps[0].symbol == '=') 22 | assert (assignmment_exps[1].symbol == '+=') 23 | assert (assignmment_exps[2].symbol == '|=') 24 | -------------------------------------------------------------------------------- /test/test_node_CompoundStatementNode.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import CompoundStatementNode, DeclarationNode, IfStatementNode 3 | 4 | SRC = """ 5 | int func() { 6 | int a = 10, b = 20; 7 | if ((a > 10 && b < 20) || b == 20) { 8 | printf(); 9 | } else if (b < 0) { 10 | printf(); 11 | } else { 12 | printf(); 13 | } 14 | } 15 | """ 16 | 17 | 18 | class TestCompoundStatementNode: 19 | 20 | def test_A(self): 21 | cc = CCode(SRC) 22 | func = cc.get_by_type_name('function_definition')[0] 23 | compound_statement = func.body 24 | assert (isinstance(compound_statement, CompoundStatementNode)) 25 | inner_statement_lst = compound_statement.statements 26 | assert (len(inner_statement_lst) == 2) 27 | assert (isinstance(inner_statement_lst[0], DeclarationNode)) 28 | assert (isinstance(inner_statement_lst[1], IfStatementNode)) 29 | -------------------------------------------------------------------------------- /test/test_node_DeclarationNode.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import IdentifierNode, DeclarationNode, FunctionDefinitionNode, Util 3 | 4 | SRC = """ 5 | void funcA() { 6 | int a, b, c = 1; 7 | } 8 | 9 | void funcB() { 10 | struct STy ins, *ins_pointer, ins_arr[c]; 11 | } 12 | 13 | pcap_t *pcap_open_rpcap(const char *source, int snaplen, int flags, 14 | int read_timeout, struct pcap_rmtauth *auth, char *errbuf); 15 | """ 16 | 17 | 18 | class TestDeclarationNode: 19 | 20 | def test_A(self): 21 | cc = CCode(SRC) 22 | # locate funcA 23 | funcs = cc.get_by_type_name('function_definition') 24 | for _f in funcs: 25 | if _f.name.src == 'funcA': 26 | funcA = _f 27 | assert (funcA) 28 | # test 29 | declarations = funcA.descendants_by_type_name('declaration') 30 | assert (len(declarations) == 1) 31 | declaration = declarations[0] 32 | assert (isinstance(declaration, DeclarationNode)) 33 | assert (declaration.type.src == 'int') 34 | declarator = declaration.declarator 35 | assert (len(declarator) == 3) 36 | # test declared_identifiers 37 | ids = declaration.declared_identifiers() 38 | assert (len(ids) == 3) 39 | for _ in ids: 40 | assert (_.src in ['a', 'b', 'c']) 41 | 42 | def test_B(self): 43 | cc = CCode(SRC) 44 | # locate funcA 45 | funcs = cc.get_by_type_name('function_definition') 46 | for _f in funcs: 47 | if _f.name.src == 'funcB': 48 | funcB = _f 49 | assert (funcB) 50 | # test 51 | declarations = funcB.descendants_by_type_name('declaration') 52 | assert (len(declarations) == 1) 53 | declaration = declarations[0] 54 | assert (isinstance(declaration, DeclarationNode)) 55 | assert (declaration.type.src == 'struct STy') 56 | declarator = declaration.declarator 57 | assert (len(declarator) == 3) 58 | # test declared_identifiers 59 | ids = declaration.declared_identifiers() 60 | assert (len(ids) == 3) 61 | for _ in ids: 62 | assert (_.src in ['ins', 'ins_pointer', 'ins_arr']) 63 | 64 | def test_C(self): 65 | # test function declarator in declaration 66 | cc = CCode(SRC) 67 | decls = cc.get_by_type_name('declaration') 68 | decls = Util.sort_nodes(decls) 69 | pcap_decl = decls[-1] 70 | assert (pcap_decl.declared_identifiers()[0].src == 'pcap_open_rpcap') 71 | -------------------------------------------------------------------------------- /test/test_node_Edit.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import Edit, FunctionDefinitionNode, EditPos, edit_str 3 | 4 | SRC = """ 5 | void a(int p) { 6 | int b = 1; 7 | b(0); 8 | } 9 | """ 10 | 11 | 12 | class TestEdit: 13 | 14 | def test_A(self): 15 | cc = CCode(SRC) 16 | func = cc.get_by_type_name('function_definition')[0] 17 | assert (len(func.descendants_by_type_name('call_expression')) == 1) 18 | """ 19 | expression_statement - b(0); 20 | call_expression - b(0) # without ; 21 | """ 22 | call_exp = func.descendants_by_type_name('expression_statement')[0] 23 | func_edit = Edit(func) 24 | new_func = func_edit.remove_child(call_exp) 25 | assert (new_func.node_type == 'function_definition') 26 | assert (new_func.descendants_by_type_name('call_expression') == []) 27 | 28 | def test_edit_str(self): 29 | s = "012345678" 30 | edits = [] 31 | edits.append(EditPos(1, 3, "a")) 32 | edits.append(EditPos(5, 6, "bcd")) 33 | s = edit_str(s, edits) 34 | assert (s == '0a34bcd678') 35 | 36 | s = "012345678" 37 | edits = [] 38 | edits.append(EditPos(0, 30, "a")) 39 | s = edit_str(s, edits) 40 | assert (s == None) 41 | 42 | s = "012345678" 43 | edits = [] 44 | edits.append(EditPos(0, 4, "a")) 45 | edits.append(EditPos(1, 4, "a")) 46 | s = edit_str(s, edits) 47 | assert (s == None) 48 | -------------------------------------------------------------------------------- /test/test_node_EnumSpecifierNode.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import EnumSpecifierNode, IdentifierNode, NumberLiteralNode 3 | 4 | SRC = """ 5 | #include "stdio.h" 6 | 7 | 8 | #define MACRO1 1 9 | #define MACRO2 2 10 | 11 | enum A {A1}; 12 | 13 | enum B { 14 | B1, 15 | B2 16 | }; 17 | 18 | enum C { 19 | C1 = 3, 20 | C2, 21 | C3 22 | }; 23 | 24 | 25 | enum D { 26 | D1, 27 | D2 = -1, 28 | D3, 29 | D4 = 4, 30 | D5, 31 | }; 32 | 33 | enum E { 34 | E1, 35 | E2 = MACRO1, 36 | E3 = 3, 37 | E4 = MACRO2 38 | }; 39 | 40 | """ 41 | 42 | 43 | class TestEnumSpecifierNode: 44 | 45 | def test_A(self): 46 | cc = CCode(SRC) 47 | 48 | def conditionA(n): 49 | if n.node_type == 'enum_specifier': 50 | if n.child_by_field_name('name').src == 'A': 51 | return True 52 | return False 53 | 54 | enum = cc.get_by_condition(conditionA)[0] 55 | assert (isinstance(enum, EnumSpecifierNode)) 56 | assert (len(enum.unsolved_value()) == 0) 57 | # test EnumSpecifierNode.kv 58 | value_dic = enum.kv 59 | assert (len(value_dic) == 1) 60 | k = list(value_dic)[0] 61 | assert (k.src == 'A1') 62 | assert (value_dic[k] is None) 63 | # test EnumSpecifierNode.conclude_value() 64 | value_dic = enum.conclude_value() 65 | assert (len(value_dic) == 1) 66 | k = list(value_dic)[0] 67 | assert (k.src == 'A1') 68 | assert (value_dic[k] == 0) 69 | 70 | def test_B(self): 71 | cc = CCode(SRC) 72 | enum = cc.get_by_type_name_and_query('enum_specifier', 73 | {'type_identifier': 'B'})[0] 74 | # test EnumSpecifierNode.kv 75 | for _k, _v in enum.kv.items(): 76 | assert (_v is None) 77 | # test EnumSpecifierNode.conclude_value() 78 | for _k, _v in enum.conclude_value().items(): 79 | if _k.src == 'B1': 80 | assert (_v == 0) 81 | elif _k.src == 'B2': 82 | assert (_v == 1) 83 | 84 | def test_C(self): 85 | cc = CCode(SRC) 86 | enum = cc.get_by_type_name_and_query('enum_specifier', 87 | {'type_identifier': 'C'})[0] 88 | # test EnumSpecifierNode.kv 89 | for _k, _v in enum.kv.items(): 90 | if _k.src == 'C1': 91 | assert (isinstance(_v, NumberLiteralNode)) 92 | assert (_v.src == '3') 93 | else: 94 | assert (_v is None) 95 | # test EnumSpecifierNode.conclude_value() 96 | for _k, _v in enum.conclude_value().items(): 97 | if _k.src == 'C1': 98 | assert (_v == 3) 99 | elif _k.src == 'C2': 100 | assert (_v == 4) 101 | elif _k.src == 'C3': 102 | assert (_v == 5) 103 | else: 104 | assert (False) 105 | 106 | def test_D(self): 107 | cc = CCode(SRC) 108 | enum = cc.get_by_type_name_and_query('enum_specifier', 109 | {'type_identifier': 'D'})[0] 110 | # test EnumSpecifierNode.kv 111 | for _k, _v in enum.kv.items(): 112 | if _k.src == 'D2': 113 | assert (isinstance(_v, NumberLiteralNode)) 114 | assert (_v.src == '-1') 115 | elif _k.src == 'D4': 116 | assert (isinstance(_v, NumberLiteralNode)) 117 | assert (_v.src == '4') 118 | else: 119 | assert (_v is None) 120 | # test EnumSpecifierNode.conclude_value() 121 | for _k, _v in enum.conclude_value().items(): 122 | if _k.src == 'D1': 123 | assert (_v == 0) 124 | elif _k.src == 'D2': 125 | assert (_v == -1) 126 | elif _k.src == 'D3': 127 | assert (_v == 0) 128 | elif _k.src == 'D4': 129 | assert (_v == 4) 130 | elif _k.src == 'D5': 131 | assert (_v == 5) 132 | else: 133 | assert (False) 134 | 135 | def test_E(self): 136 | cc = CCode(SRC) 137 | enum = cc.get_by_type_name_and_query('enum_specifier', 138 | {'type_identifier': 'E'})[0] 139 | # test EnumSpecifierNode.kv 140 | for _k, _v in enum.kv.items(): 141 | if _k.src == 'E2': 142 | assert (isinstance(_v, IdentifierNode)) 143 | assert (_v.src == 'MACRO1') 144 | elif _k.src == 'E3': 145 | assert (isinstance(_v, NumberLiteralNode)) 146 | assert (_v.src == '3') 147 | elif _k.src == 'E4': 148 | assert (isinstance(_v, IdentifierNode)) 149 | assert (_v.src == 'MACRO2') 150 | else: 151 | assert (_v is None) 152 | # test unsolved_value 153 | unsolved = enum.unsolved_value() 154 | assert (len(unsolved) == 2) 155 | for _ in unsolved: 156 | assert (isinstance(_, IdentifierNode)) 157 | assert (_.src == 'MACRO1' or _.src == 'MACRO2') 158 | # test EnumSpecifierNode.conclude_value() 159 | for _k, _v in enum.conclude_value().items(): 160 | if _k.src == 'E1': 161 | assert (_v == 0) 162 | elif _k.src == 'E2': 163 | assert (_v is None) 164 | elif _k.src == 'E3': 165 | assert (_v == 3) 166 | elif _k.src == 'E4': 167 | assert (_v is None) 168 | else: 169 | assert (False) 170 | # test EnumSpecifierNode.conclude_value() with value_dic 171 | val_dic = dict() 172 | for _ in unsolved: 173 | if _.src == 'MACRO1': 174 | val_dic[_] = 1 175 | elif _.src == 'MACRO2': 176 | val_dic[_] = 2 177 | else: 178 | assert (False) 179 | for _k, _v in enum.conclude_value(val_dic).items(): 180 | if _k.src == 'E1': 181 | assert (_v == 0) 182 | elif _k.src == 'E2': 183 | assert (_v == 1) 184 | elif _k.src == 'E3': 185 | assert (_v == 3) 186 | elif _k.src == 'E4': 187 | assert (_v == 2) 188 | else: 189 | assert (False) 190 | -------------------------------------------------------------------------------- /test/test_node_FunctionDefinitionNode.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import FunctionDefinitionNode, ParameterListNode, IdentifierNode 3 | 4 | SRC = """ 5 | static inline int func(int a, struct st ins, struct st **ins_pointer, struct st *ins_pointer_arr[]) { 6 | return 0; 7 | } 8 | 9 | int foo(int (*func_pointer)(const static int *)) { 10 | return 0; 11 | } 12 | 13 | int (*bar(int (*cmp)(void *)))(int) { 14 | return 0; 15 | } 16 | """ 17 | 18 | 19 | class TestFunctionDefinitionNode: 20 | 21 | def test_func(self): 22 | cc = CCode(SRC) 23 | func = cc.get_by_type_name('function_definition') 24 | 25 | # test the property name 26 | func = cc.get_by_type_name_and_query('function_definition', 27 | {'identifier': 'func'})[0] 28 | assert (isinstance(func, FunctionDefinitionNode)) 29 | # test the property inline and static 30 | assert (func.inline and func.static) 31 | # test the property parameters 32 | func_parameters = func.parameters 33 | assert (isinstance(func_parameters, ParameterListNode)) 34 | para_decl_lst = func_parameters.children 35 | # number of the parameter 36 | assert (len(para_decl_lst) == 4) 37 | 38 | # the first parameter - int a 39 | para0 = para_decl_lst[0] 40 | assert (str(para0.type.src) == 'int') 41 | assert (isinstance(para0.name, IdentifierNode)) 42 | assert (str(para0.name.src) == 'a') 43 | 44 | # the second parameter - struct st ins 45 | para1 = para_decl_lst[1] 46 | assert (str(para1.type.src) == 'struct st') 47 | assert (isinstance(para1.name, IdentifierNode)) 48 | assert (str(para1.name.src) == 'ins') 49 | 50 | # the third parameter - struct st **ins_pointer 51 | para2 = para_decl_lst[2] 52 | assert (str(para2.type.src) == 'struct st') 53 | assert (isinstance(para2.name, IdentifierNode)) 54 | assert (str(para2.name.src) == 'ins_pointer') 55 | 56 | # the forth parameter - struct st *ins_pointer_arr[] 57 | para3 = para_decl_lst[3] 58 | assert (str(para3.type.src) == 'struct st') 59 | assert (isinstance(para3.name, IdentifierNode)) 60 | assert (str(para3.name.src == 'ins_pointer_arr')) 61 | 62 | def test_foo(self): 63 | cc = CCode(SRC) 64 | foo = cc.get_by_type_name_and_query('function_definition', 65 | {'identifier': 'foo'})[0] 66 | # test the property parameters 67 | # int (*func_pointer)(const static int *) 68 | para_decl1 = foo.parameters.children[0] 69 | assert (para_decl1.name.src == 'func_pointer') 70 | assert (para_decl1.src == 'int (*func_pointer)(const static int *)') 71 | 72 | def test_bar(self): 73 | cc = CCode(SRC) 74 | bar = cc.get_by_type_name_and_query('function_definition', 75 | {'identifier': 'bar'})[0] 76 | 77 | # int (*cmp)(void *) 78 | para_decl = bar.parameters.children[0] 79 | assert (para_decl.src == 'int (*cmp)(void *)') 80 | -------------------------------------------------------------------------------- /test/test_node_IfStatementNode.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import IfStatementNode, BasicNode, Util 3 | 4 | SRC = """ 5 | int a = 10, b = 20; 6 | if ((a > 10 && b < 20) || b == 20) { 7 | printf(); 8 | } else if (b < 0) { 9 | printf(); 10 | } else { 11 | printf(); 12 | } 13 | """ 14 | 15 | 16 | class TestIfStatementNode: 17 | 18 | def test_A(self): 19 | cc = CCode(SRC) 20 | if_stmts = cc.get_by_type_name('if_statement') 21 | assert (len(if_stmts) == 2) # and in SRC 22 | if_stmts = Util.sort_nodes(if_stmts) 23 | 24 | stmt0 = if_stmts[0] 25 | assert (isinstance(stmt0, IfStatementNode)) 26 | # test condition 27 | assert (stmt0.condition.src == '((a > 10 && b < 20) || b == 20)') 28 | # test consequence 29 | assert (stmt0.consequence.node_type == 'compound_statement') 30 | # test alternative 31 | assert (stmt0.alternative.node_type == 'if_statement') 32 | assert (isinstance(stmt0.alternative, IfStatementNode)) 33 | # test entry_constraints 34 | # the constraint should be [[a > 10, b < 20], [b == 20]] 35 | constraints = stmt0.entry_constraints() 36 | assert (len(constraints) == 2) 37 | for _ in constraints: 38 | if (len(_) == 2): 39 | assert ('a > 10' in str(_)) 40 | assert ('b < 20' in str(_)) 41 | elif (len(_) == 1): 42 | assert ('b == 20' in str(_)) 43 | else: 44 | assert (False) 45 | -------------------------------------------------------------------------------- /test/test_node_ParameterDeclarationNode.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import ParameterDeclarationNode, IdentifierNode, TypeQualifierNode, StorageClassSpecifierNode 3 | 4 | SRC = """ 5 | int func(int a, struct st ins, struct st **ins_pointer, struct st *ins_pointer_arr[]) { 6 | return 0; 7 | } 8 | 9 | int foo(int (*func_pointer)(const static int *)) { 10 | return 0; 11 | } 12 | 13 | int (*bar(int (*cmp)(void *)))(int) { 14 | return 0; 15 | } 16 | """ 17 | 18 | 19 | class TestParameterDeclarationNode: 20 | 21 | def test_func(self): 22 | cc = CCode(SRC) 23 | func = cc.get_by_type_name('function_definition') 24 | func = [_ for _ in func if _.name.src == 'func'][0] 25 | para_decl_lst = func.descendants_by_type_name('parameter_declaration') 26 | # number of the parameter 27 | assert (len(para_decl_lst) == 4) 28 | 29 | # the first parameter - int a 30 | para0 = para_decl_lst[0] 31 | assert (str(para0.type.src) == 'int') 32 | assert (isinstance(para0.name, IdentifierNode)) 33 | assert (str(para0.name.src) == 'a') 34 | 35 | # the second parameter - struct st ins 36 | para1 = para_decl_lst[1] 37 | assert (str(para1.type.src) == 'struct st') 38 | assert (isinstance(para1.name, IdentifierNode)) 39 | assert (str(para1.name.src) == 'ins') 40 | 41 | # the third parameter - struct st **ins_pointer 42 | para2 = para_decl_lst[2] 43 | assert (str(para2.type.src) == 'struct st') 44 | assert (isinstance(para2.name, IdentifierNode)) 45 | assert (str(para2.name.src) == 'ins_pointer') 46 | 47 | # the forth parameter - struct st *ins_pointer_arr[] 48 | para3 = para_decl_lst[3] 49 | assert (str(para3.type.src) == 'struct st') 50 | assert (isinstance(para3.name, IdentifierNode)) 51 | assert (str(para3.name.src == 'ins_pointer_arr')) 52 | 53 | def test_foo(self): 54 | cc = CCode(SRC) 55 | foo = cc.get_by_type_name('function_definition') 56 | foo = [_ for _ in foo if _.name.src == 'foo'][0] 57 | 58 | # int (*func_pointer)(const static int *) 59 | para_decl1 = foo.child_by_field_name('declarator').child_by_field_name( 60 | 'parameters').children[0] 61 | assert (para_decl1.name.src == 'func_pointer') 62 | 63 | # const static int * 64 | para_decl2 = para_decl1.child_by_field_name( 65 | 'declarator').child_by_field_name('parameters').children[0] 66 | assert (para_decl2.name is None) 67 | assert (para_decl2.type_qualifier[0].src == 'const') 68 | assert (para_decl2.storage_class_specifier[0].src == 'static') 69 | 70 | def test_bar(self): 71 | cc = CCode(SRC) 72 | bar = cc.get_by_type_name('function_definition') 73 | bar = [_ for _ in bar if _.name.src == 'bar'][0] 74 | 75 | # int (*bar(int (*cmp)(void *)))(int) 76 | para_decl_lst = bar.descendants_by_type_name('parameter_declaration') 77 | for _decl in para_decl_lst: 78 | if _decl.src == 'int (*cmp)(void *)': 79 | assert (_decl.name.src == 'cmp') 80 | elif _decl.src == 'void *': 81 | assert (_decl.name is None) 82 | elif _decl.src == 'int': 83 | assert (_decl.name is None) 84 | else: 85 | assert (0) 86 | -------------------------------------------------------------------------------- /test/test_node_ParenthesizedExpressionNode.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import ParenthesizedExpressionNode 3 | 4 | SRC = """ 5 | int a = 10, b = 20; 6 | if (a > 10) { 7 | printf("%d\n", a); 8 | } 9 | if ((a > 10 && b < 20) || b == 20) { 10 | printf(); 11 | } 12 | """ 13 | 14 | 15 | class TestParenthesizedExpressionNode: 16 | 17 | def test_A(self): 18 | cc = CCode(SRC) 19 | pe = cc.get_by_type_name('parenthesized_expression') 20 | assert (len(pe) == 3) 21 | for _ in pe: 22 | if _.src == '(a > 10)': 23 | assert (_.remove_parenthese().src == 'a > 10') 24 | elif _.src == '((a > 10 && b < 20) || b == 20)': 25 | assert (_.remove_parenthese().src == 26 | '(a > 10 && b < 20) || b == 20') 27 | elif _.src == '(a > 10 && b < 20)': 28 | assert (_.remove_parenthese().src == 'a > 10 && b < 20') 29 | else: 30 | assert (False) 31 | -------------------------------------------------------------------------------- /test/test_node_PreprocDefNode.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import PreprocDefNode, IdentifierNode, PreprocArgNode 3 | 4 | SRC = """ 5 | 6 | #define MACRO1 1 7 | #define MACRO2 MACRO1 8 | 9 | """ 10 | 11 | 12 | class TestPreprocDefNode: 13 | 14 | def test_preproc_def(self): 15 | cc = CCode(SRC) 16 | macro_lst = cc.get_by_type_name('preproc_def') 17 | assert (len(macro_lst) == 2) 18 | for _ in macro_lst: 19 | assert (isinstance(_, PreprocDefNode)) 20 | assert (isinstance(_.name, IdentifierNode)) 21 | assert (isinstance(_.value, PreprocArgNode)) 22 | if _.name.src == 'MACRO1': 23 | assert (_.value.src == '1') 24 | elif _.name.src == 'MACRO2': 25 | assert (_.value.src == 'MACRO1') 26 | else: 27 | assert (False) 28 | -------------------------------------------------------------------------------- /test/test_node_StructSpecifierNode.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | from cinspector.nodes import StructSpecifierNode, FieldDeclarationListNode, FieldDeclarationNode, BasicNode, FieldIdentifierNode 3 | from cinspector.nodes import Util 4 | 5 | SRC = """ 6 | struct st { 7 | int a; 8 | struct { 9 | int *b; 10 | int c[]; 11 | }; 12 | }; 13 | """ 14 | 15 | 16 | class TestStructSpecifierNode: 17 | 18 | def test_A(self): 19 | cc = CCode(SRC) 20 | struct = cc.get_by_type_name('struct_specifier') 21 | assert (len(struct) == 2) 22 | struct = Util.sort_nodes(struct) 23 | struct = struct[0] 24 | assert (isinstance(struct, StructSpecifierNode)) 25 | 26 | decl_lst = struct.body 27 | assert (isinstance(decl_lst, FieldDeclarationListNode)) 28 | decl_lst = decl_lst.children 29 | assert (len(decl_lst) == 2) 30 | decl_lst = Util.sort_nodes(decl_lst) 31 | int_a = decl_lst[0] 32 | assert (isinstance(int_a, FieldDeclarationNode)) 33 | assert (int_a.type.src == 'int') 34 | assert (int_a.declarator.src == 'a') 35 | assert (isinstance(int_a.declarator, FieldIdentifierNode)) 36 | struct_anonymous = decl_lst[1] 37 | assert (isinstance(struct_anonymous, FieldDeclarationNode)) 38 | assert (isinstance(struct_anonymous.type, StructSpecifierNode)) 39 | assert (struct_anonymous.declarator is None) 40 | 41 | # test Query for StructSpecifierNode 42 | struct_st = cc.get_by_type_name_and_query('struct_specifier', 43 | {'type_identifier': 'st'})[0] 44 | assert (isinstance(struct_st, StructSpecifierNode)) 45 | assert (struct_st.name.src == 'st') 46 | -------------------------------------------------------------------------------- /test/test_node_tokenize.py: -------------------------------------------------------------------------------- 1 | from cinspector.interfaces import CCode 2 | 3 | SRC = """ 4 | int a, b, c = 1; 5 | int d; 6 | int e; 7 | """ 8 | 9 | SRC1 = """ 10 | void BND_Fixup(void) { 11 | if (((byte)prefixes & 2)) { 12 | *(undefined4 *)(all_prefixes + (long)last_repnz_prefix * 4) = 0x4f2; 13 | } 14 | } 15 | """ 16 | 17 | 18 | class TestTokenize: 19 | 20 | def test_A(self): 21 | cc = CCode(SRC) 22 | tokens = cc.node.tokenize() 23 | tokens = [_.src for _ in tokens] 24 | assert tokens == [ 25 | 'int', 'a', ',', 'b', ',', 'c', '=', '1', ';', 'int', 'd', ';', 26 | 'int', 'e', ';' 27 | ] 28 | 29 | def test_B(self): 30 | cc = CCode(SRC1) 31 | tokens = cc.node.tokenize() 32 | tokens = [_.src for _ in tokens] 33 | assert tokens == [ 34 | 'void', 'BND_Fixup', '(', 'void', ')', '{', 'if', '(', '(', '(', 35 | 'byte', ')', 'prefixes', '&', '2', ')', ')', '{', '*', '(', 36 | 'undefined4', '*', ')', '(', 'all_prefixes', '+', '(', 'long', ')', 37 | 'last_repnz_prefix', '*', '4', ')', '=', '0x4f2', ';', '}', '}' 38 | ] 39 | --------------------------------------------------------------------------------