├── .gitignore ├── LICENSE ├── README.md ├── code_diff ├── __init__.py ├── ast.py ├── diff_utils.py ├── gumtree │ ├── __init__.py │ ├── chawathe.py │ ├── editmap.py │ ├── isomap.py │ ├── ops.py │ └── utils.py ├── sstubs.py └── utils.py ├── pyproject.toml ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── __init__.py └── test_sstubs.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | #VSCode 132 | .vscode/ 133 | scripts/ 134 | 135 | .DS_Store 136 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021-2022 Cedric Richter 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code Diff 2 | ------------------------------------------------ 3 | > Fast AST based code differencing in Python 4 | 5 | Software projects are constantly evolving to integrate new features or improve existing implementations. To keep track of this progress, it becomes important to track individual code changes. Code differencing provides a way 6 | to identify the smallest code change between two 7 | implementations. 8 | 9 | **code.diff** provides a fast alternative to standard code differencing techniques with a focus 10 | on AST based code differencing. As part of this library, we include a fast reimplementation of the [**GumTree**](https://github.com/GumTreeDiff/gumtree) algorithm. However, by relying on 11 | a best-effort AST parser, we are able to generate 12 | AST code changes for individual code snippets. Many 13 | programming languages including Python, Java and JavaScript are supported! 14 | 15 | 16 | ## Installation 17 | The package is tested under Python 3. It can be installed via: 18 | ``` 19 | pip install code-diff 20 | ``` 21 | 22 | ## Usage 23 | code.diff can compute a code difference for nearly any program code in a few lines of code: 24 | ```python 25 | import code_diff as cd 26 | 27 | # Python 28 | output = cd.difference( 29 | ''' 30 | def my_func(): 31 | print("Hello World") 32 | ''', 33 | ''' 34 | def say_helloworld(): 35 | print("Hello World") 36 | ''', 37 | lang = "python") 38 | 39 | # Output: my_func -> say_helloworld 40 | 41 | output.edit_script() 42 | 43 | # Output: 44 | # [ 45 | # Update((identifier:my_func, line 1:12 - 1:19), say_helloworld) 46 | #] 47 | 48 | 49 | # Java 50 | output = cd.difference( 51 | ''' 52 | int x = x + 1; 53 | ''', 54 | ''' 55 | int x = x / 2; 56 | ''', 57 | lang = "java") 58 | 59 | # Output: x + 1 -> x / 2 60 | 61 | output.edit_script() 62 | 63 | # Output: [ 64 | # Insert(/:/, (binary_operator, line 0:4 - 0:9), 1), 65 | # Update((integer:1, line 0:8 - 0:9), 2), 66 | # Delete((+:+, line 0:6 - 0:7)) 67 | #] 68 | 69 | 70 | ``` 71 | ## Language support 72 | code.diff supports most programming languages 73 | where an AST can be computed. To parse an AST, 74 | the underlying parser employs 75 | * [**code.tokenize:**](https://github.com/cedricrupb/code_tokenize) A frontend for 76 | tree-sitter to effectively parse and tokenize 77 | program code in Python. 78 | 79 | * [**tree-sitter:**](https://tree-sitter.github.io/tree-sitter/) A best-effort AST parser supporting 80 | many programming languages including Python, Java and JavaScript. 81 | 82 | To decide whether your code can be handled by code.diff please review the libraries above. 83 | 84 | **GumTree:** To compute an edit script between a source and target AST, we employ a Python reimplementation of the [GumTree](https://github.com/GumTreeDiff/gumtree) algorithm. Note however that the computed script are heavily dependent on the AST representation of the given code. Therefore, AST edit script computed with code.diff might significantly differ to the one computed by GumTree. 85 | 86 | 87 | ## Release history 88 | * 0.1.2 89 | * Fix of the release information 90 | * Fix bug in 0.1.1 release 91 | * Package now useable by installing from PyPI 92 | * 0.1.0 93 | * Initial functionality 94 | * Documentation 95 | * SStuB Testing 96 | 97 | ## Project Info 98 | The goal of this project is to provide developer with easy access to AST-based code differencing. This is currently developed as a helper library for internal research projects. Therefore, it will only be updated as needed. 99 | 100 | Feel free to open an issue if anything unexpected 101 | happens. 102 | 103 | [Cedric Richter](https://uol.de/informatik/formale-methoden/team/cedric-richter) - [@cedricrichter](https://twitter.com/cedrichter) - cedric.richter@uni-oldenburg.de 104 | 105 | Distributed under the MIT license. See ``LICENSE`` for more information. 106 | 107 | 108 | -------------------------------------------------------------------------------- /code_diff/__init__.py: -------------------------------------------------------------------------------- 1 | from code_tokenize.lang import load_from_lang_config 2 | from code_tokenize.tokens import match_type 3 | 4 | from .ast import parse_ast 5 | from .utils import cached_property 6 | from .sstubs import SStubPattern, classify_sstub 7 | from .gumtree import compute_edit_script, EditScript, Update 8 | 9 | 10 | # Main method -------------------------------------------------------- 11 | 12 | def difference(source, target, lang = "guess", **kwargs): 13 | """ 14 | Computes the smallest difference between source and target 15 | 16 | Computes the smallest code difference between the given 17 | code snippets. Difference is computed by a simulteanous 18 | walk over the ASTs of the given code snippets. Returned 19 | will be the smallest code snippet that represent 20 | the first AST node found to be different. 21 | 22 | Parameters 23 | ---------- 24 | source : str 25 | Source code which should be compared 26 | 27 | target : str 28 | Comparison target as a code string 29 | 30 | lang : [python, java, javascript, ...] 31 | Programming language which should be used 32 | to parse the code snippets. 33 | Default: guess (Currently not supported, will throw error) 34 | 35 | syntax_error : [raise, warn, ignore] 36 | Strategy to handle syntax errors in code. 37 | To parse incomplete code snippets, 'ignore' should 38 | be selected to silent any warning. 39 | Default: raise (Raises an exception) 40 | 41 | **kwargs : dict 42 | Further config option that are specific to 43 | the underlying AST parser. See code_tokenize 44 | for more infos. 45 | 46 | Returns 47 | ------- 48 | ASTDiff 49 | The smallest code change necessary 50 | to convert the source code into the target code. 51 | 52 | """ 53 | 54 | config = load_from_lang_config(lang, **kwargs) 55 | source_ast = parse_ast(source, lang = lang, **kwargs) 56 | target_ast = parse_ast(target, lang = lang, **kwargs) 57 | 58 | if source_ast is None or target_ast is None: 59 | raise ValueError("Source / Target AST seems to be empty: %s" % source) 60 | 61 | # Concretize Diff 62 | source_ast, target_ast = diff_search(source_ast, target_ast) 63 | 64 | if source_ast is None: 65 | raise ValueError("Source and Target AST are identical.") 66 | 67 | return ASTDiff(config, source_ast, target_ast) 68 | 69 | 70 | # Diff Search -------------------------------------------------------- 71 | # Run BFS until we find a node with at least two diffs 72 | 73 | def diff_search(source_ast, target_ast): 74 | if source_ast is None or source_ast.isomorph(target_ast): return None, None 75 | 76 | queue = [(source_ast, target_ast)] 77 | while len(queue) > 0: 78 | source_node, target_node = queue.pop(0) 79 | 80 | if len(source_node.children) != len(target_node.children): 81 | return (source_node, target_node) 82 | 83 | next_children = [] 84 | for i, source_child in enumerate(source_node.children): 85 | target_child = target_node.children[i] 86 | 87 | if not source_child.isomorph(target_child): 88 | next_children.append((source_child, target_child)) 89 | 90 | if len(next_children) == 1: 91 | queue.append(next_children[0]) 92 | else: 93 | return (source_node, target_node) 94 | 95 | 96 | # AST Difference -------------------------------------------------------- 97 | 98 | class ASTDiff: 99 | """ 100 | Difference between two code snippets 101 | 102 | This object represents the smallest code change 103 | necessary to transform a source code snippet 104 | into a target code. 105 | 106 | Attributes 107 | ---------- 108 | is_single_statement : bool 109 | Whether the code difference only affect a single program statement 110 | 111 | source_ast : ASTNode 112 | AST node related to the code change 113 | 114 | source_text : str 115 | Source code which have to be changed 116 | 117 | target_ast : ASTNode 118 | AST node which is different to the source AST 119 | 120 | target_text : str 121 | Target text for converting source to target 122 | 123 | Methods 124 | ------- 125 | edit_script : list[EditOp] 126 | Computes a sequence of AST operations which need 127 | to be performed to translate source code in target code 128 | 129 | Note: We balance performance and precision by computing 130 | the AST edit script at the current diff level. The 131 | algorithm runs the fastest on the smallest diff level 132 | but is also most imprecise. To achieve the highest precision, 133 | the root_diff should be used. 134 | 135 | sstub_pattern : SStuBPattern 136 | Categorizes the current diff into one of 20 SStuB categories. 137 | Note: Currently, this operation is only supported for 138 | Python code. Running the function on code in another language 139 | will cause an exception. 140 | 141 | statement_diff : ASTDiff 142 | raises the AST difference to the statement level 143 | 144 | root_diff : ASTDiff 145 | raises the AST difference to the root level (of each code snippet) 146 | 147 | 148 | """ 149 | 150 | def __init__(self, config, source_ast, target_ast): 151 | self.config = config 152 | self.source_ast = source_ast 153 | self.target_ast = target_ast 154 | 155 | @cached_property 156 | def is_single_statement(self): 157 | return (is_single_statement(self.config.statement_types, self.source_ast) 158 | and is_single_statement(self.config.statement_types, self.target_ast)) 159 | 160 | @cached_property 161 | def source_text(self): 162 | return tokenize_tree(self.source_ast) 163 | 164 | @cached_property 165 | def target_text(self): 166 | return tokenize_tree(self.target_ast) 167 | 168 | def statement_diff(self): 169 | source_stmt = parent_statement(self.config.statement_types, self.source_ast) 170 | target_stmt = parent_statement(self.config.statement_types, self.target_ast) 171 | 172 | if source_stmt is None or target_stmt is None: 173 | raise ValueError("AST diff is not enclosed in a statement") 174 | 175 | return ASTDiff(self.config, source_stmt, target_stmt) 176 | 177 | def root_diff(self): 178 | return ASTDiff(self.config, ast_root(self.source_ast), ast_root(self.target_ast)) 179 | 180 | def sstub_pattern(self): 181 | if self.config.lang != "python": 182 | raise ValueError("SStuB can currently only be computed for Python code.") 183 | 184 | if (parent_statement(self.config.statement_types, self.source_ast) is None 185 | or parent_statement(self.config.statement_types, self.target_ast) is None): 186 | return SStubPattern.NO_STMT 187 | 188 | if not self.is_single_statement: 189 | return SStubPattern.MULTI_STMT 190 | 191 | return classify_sstub(*diff_search(self.source_ast, self.target_ast)) 192 | 193 | def edit_script(self): 194 | 195 | source_ast, target_ast = self.source_ast, self.target_ast 196 | 197 | if source_ast.type == target_ast.type and len(source_ast.children) == 0 and len(target_ast.children) == 0: 198 | # Both nodes are tokens of the same type 199 | # Only an update is required 200 | return EditScript([Update(source_ast, target_ast.text)]) 201 | 202 | # We need a common root to add to 203 | while source_ast.type != target_ast.type: 204 | if source_ast.parent is None: break 205 | if target_ast.parent is None: break 206 | 207 | source_ast = source_ast.parent 208 | target_ast = target_ast.parent 209 | 210 | return compute_edit_script(source_ast, target_ast) 211 | 212 | def __repr__(self): 213 | return "%s -> %s" % (self.source_text, self.target_text) 214 | 215 | 216 | 217 | 218 | # AST Utils ----------------------------------------------------------- 219 | 220 | def is_single_statement(statement_types, ast): 221 | 222 | if parent_statement(statement_types, ast) is None: return False 223 | 224 | def is_statement_type(node_type): 225 | return any(match_type(r, node_type) for r in statement_types) 226 | 227 | # Test if any other statement as child 228 | queue = list(ast.children) 229 | while len(queue) > 0: 230 | node = queue.pop(0) 231 | if is_statement_type(node.type): return False 232 | 233 | queue.extend(node.children) 234 | 235 | return True 236 | 237 | 238 | def parent_statement(statement_types, ast): 239 | 240 | def is_statement_type(node_type): 241 | return any(match_type(r, node_type) for r in statement_types) 242 | 243 | # Test if node in statement 244 | parent_node = ast 245 | while parent_node is not None and not is_statement_type(parent_node.type): 246 | parent_node = parent_node.parent 247 | 248 | return parent_node 249 | 250 | 251 | def ast_root(ast): 252 | parent_node = ast 253 | 254 | while parent_node.parent is not None: 255 | parent_node = parent_node.parent 256 | 257 | return parent_node 258 | 259 | 260 | def tokenize_tree(ast): 261 | tokens = [] 262 | 263 | # Test if any other statement as child 264 | if ast.text: tokens.append(ast.text) 265 | 266 | for child in ast.children: 267 | tokens.append(tokenize_tree(child)) 268 | 269 | return " ".join(tokens) 270 | 271 | 272 | 273 | def is_compatible_root(root_candidate, source_ast): 274 | return not equal_text(source_ast, root_candidate) and root_candidate.type != "block" 275 | 276 | 277 | def equal_text(source_ast, parent_ast): 278 | source_position = source_ast.position 279 | parent_position = parent_ast.position 280 | 281 | if parent_position[0][0] < source_position[0][0]: return False 282 | if source_position[1][0] < parent_position[1][0]: return False 283 | 284 | return (source_position[0][1], source_position[1][1]) == (parent_position[0][1], parent_position[1][1]) -------------------------------------------------------------------------------- /code_diff/ast.py: -------------------------------------------------------------------------------- 1 | import code_tokenize as ct 2 | 3 | from collections import defaultdict 4 | 5 | # AST Node ---------------------------------------------------------------- 6 | 7 | 8 | class ASTNode(object): 9 | """ 10 | A representation of an AST node together with its children 11 | 12 | Node Attributes 13 | --------------- 14 | type : str 15 | Syntactic type of the AST node 16 | 17 | text : str 18 | If this node belongs to a program token, then 19 | it contains the text of the program token. Otherwise, None. 20 | 21 | children : list[ASTNode] 22 | Potenially empty list of child nodes 23 | 24 | position : int 25 | If supported, the code position that is referenced by the AST node 26 | 27 | parent : ASTNode 28 | If not root node, the AST parent of this node. 29 | 30 | Subtree Attributes 31 | ------------------ 32 | subtree_hash : str 33 | A hash string representing the subtree of the AST node 34 | Two subtrees are isomorph if they have the same subtree hash. 35 | 36 | subtree_height : int 37 | Longest path from this node to a leaf node 38 | 39 | subtree_weight : int 40 | Count of all nodes in this subtree 41 | 42 | """ 43 | 44 | def __init__(self, type, text = None, position = None, parent = None, children = None): 45 | 46 | # Basic node attributes 47 | self.type = type 48 | self.children = children if children is not None else [] 49 | self.parent = parent 50 | self.text = text # If text is not None, then leaf node 51 | self.position = position 52 | 53 | # Tree based attributes 54 | self.subtree_hash = None 55 | self.subtree_height = 1 56 | self.subtree_weight = 1 57 | 58 | def isomorph(self, other): 59 | return ((self.subtree_hash, self.type, self.subtree_height, self.subtree_weight) == 60 | (other.subtree_hash, other.type, other.subtree_height, other.subtree_weight)) 61 | 62 | def descandents(self): 63 | return (t for t in self if t != self) 64 | 65 | def sexp(self): 66 | name = self.text if self.text is not None else self.type 67 | 68 | child_sexp = [] 69 | for child in self.children: 70 | text = child.sexp() 71 | text = [" " + t for t in text.splitlines()] 72 | child_sexp.append("\n".join(text)) 73 | 74 | if len(child_sexp) == 0: 75 | return name 76 | 77 | return "%s {\n%s\n}" % (name, " ".join(child_sexp)) 78 | 79 | def __iter__(self): 80 | def _self_bfs_search(): 81 | queue = [self] 82 | while len(queue) > 0: 83 | current = queue.pop(0) 84 | yield current 85 | queue.extend(current.children) 86 | 87 | return _self_bfs_search() 88 | 89 | def __repr__(self): 90 | attrs = {"type": self.type, "text": self.text} 91 | return "ASTNode(%s)" % (", ".join(["%s=%s" % (k, v) for k, v in attrs.items() if v is not None])) 92 | 93 | 94 | def default_create_node(type, children, text = None, position = None): 95 | new_node = ASTNode(type, text = text, position = position, children = children) 96 | 97 | # Subtree metrics 98 | height = 1 99 | weight = 1 100 | hash_str = [] 101 | 102 | for child in children: 103 | child.parent = new_node # Set parent relation 104 | height = max(child.subtree_height + 1, height) 105 | weight += child.subtree_weight 106 | hash_str.append(str(child.subtree_hash)) 107 | 108 | new_node.subtree_height = height 109 | new_node.subtree_weight = weight 110 | 111 | # WL hash subtree representation 112 | base_str = new_node.type if new_node.text is None else new_node.text 113 | hash_str.insert(0, base_str) 114 | hash_str = "_".join(hash_str) 115 | new_node.subtree_hash = hash(hash_str) 116 | 117 | return new_node 118 | 119 | 120 | def _node_key(node): 121 | return (node.type, node.start_point, node.end_point) 122 | 123 | 124 | class TokensToAST: 125 | 126 | def __init__(self, create_node_fn): 127 | self.create_node_fn = create_node_fn 128 | 129 | self.root_node = None 130 | self.waitlist = [] 131 | self.node_index = {} 132 | self.child_count = defaultdict(int) 133 | 134 | def _create_node(self, ast_node, text = None): 135 | 136 | if ast_node.type == "comment": return # We ignore comments 137 | 138 | node_key = _node_key(ast_node) 139 | children = [self.node_index[_node_key(c)] for c in ast_node.children 140 | if _node_key(c) in self.node_index] 141 | 142 | position = (ast_node.start_point, ast_node.end_point) 143 | current_node = self.create_node_fn(ast_node.type, children, text = text, position = position) 144 | current_node.backend = ast_node 145 | 146 | self.node_index[node_key] = current_node 147 | 148 | # Add parent if ready 149 | if ast_node.parent: 150 | parent_ast = ast_node.parent 151 | parent_key = _node_key(parent_ast) 152 | self.child_count[parent_key] += 1 153 | 154 | if len(parent_ast.children) == self.child_count[parent_key]: 155 | self.waitlist.append(parent_ast) 156 | 157 | else: 158 | self.root_node = current_node 159 | 160 | 161 | def _open_node(self, node): 162 | node_key = _node_key(node) 163 | if node_key in self.node_index: return False 164 | 165 | opened = False 166 | for c in node.children: 167 | opened = opened or self._open_node(c) 168 | 169 | if not opened: 170 | self.waitlist.append(node) 171 | return True 172 | 173 | return False 174 | 175 | def _open_root_if_not_complete(self, base_node): 176 | 177 | root = base_node 178 | while root.parent is not None: 179 | root = root.parent 180 | 181 | for c in root.children: 182 | self._open_node(c) 183 | 184 | def __call__(self, tokens): 185 | 186 | token_nodes = ((t.text, t.ast_node) for t in tokens if hasattr(t, "ast_node")) 187 | for token_text, token_ast in token_nodes: 188 | self._create_node(token_ast, text = token_text) 189 | 190 | while self.root_node is None: 191 | while len(self.waitlist) > 0: 192 | current_node = self.waitlist.pop(0) 193 | self._create_node(current_node) 194 | 195 | self._open_root_if_not_complete(current_node) 196 | 197 | print(self.root_node.sexp()) 198 | 199 | return self.root_node 200 | 201 | 202 | 203 | class BottomUpParser: 204 | 205 | def __init__(self, create_node_fn): 206 | 207 | self.create_node_fn = create_node_fn 208 | self.waitlist = [] # Invariant: All children have been processed 209 | self.open_index = {} 210 | self.node_index = {} # Nodes that have been processed 211 | 212 | def _should_ignore(self, node): 213 | return node.type == "comment" 214 | 215 | def _add_to_waitlist(self, node): 216 | if self._should_ignore(node): return 217 | 218 | node_key = _node_key(node) 219 | 220 | if node_key not in self.node_index and node_key not in self.open_index: 221 | self.open_index[node_key] = node 222 | self.waitlist.append(node) 223 | 224 | 225 | def _init_lists(self, tokens): 226 | 227 | for token in tokens: 228 | if hasattr(token, 'ast_node'): 229 | ast_node = token.ast_node 230 | if self._should_ignore(ast_node): continue 231 | self.open_index[_node_key(ast_node)] = ast_node 232 | self._create_node(ast_node, token.text) 233 | 234 | if ast_node is None: return 235 | 236 | # Get to root 237 | root = ast_node 238 | while root.parent is not None: 239 | root = root.parent 240 | 241 | self._open_descandents(root) 242 | 243 | return root 244 | 245 | def _open_descandents(self, node): 246 | 247 | queue = [node] 248 | while len(queue) > 0: 249 | current_node = queue.pop(0) 250 | 251 | has_opened = False 252 | for child in current_node.children: 253 | if _node_key(child) not in self.node_index: 254 | has_opened = True 255 | queue.append(child) 256 | 257 | if not has_opened: 258 | self._add_to_waitlist(current_node) 259 | 260 | 261 | def _open_parent(self, ast_node): 262 | parent = ast_node.parent 263 | 264 | if all(_node_key(c) in self.node_index for c in parent.children if not self._should_ignore(c)): 265 | self._add_to_waitlist(parent) 266 | 267 | def _create_node(self, ast_node, text = None): 268 | 269 | node_key = _node_key(ast_node) 270 | children = [self.node_index[_node_key(c)] for c in ast_node.children 271 | if _node_key(c) in self.node_index] 272 | 273 | position = (ast_node.start_point, ast_node.end_point) 274 | current_node = self.create_node_fn(ast_node.type, children, text = text, position = position) 275 | current_node.backend = ast_node 276 | 277 | self.node_index[node_key] = current_node 278 | del self.open_index[node_key] 279 | 280 | if ast_node.parent: self._open_parent(ast_node) 281 | 282 | 283 | def __call__(self, tokens): 284 | root_node = self._init_lists(tokens) 285 | 286 | while len(self.waitlist) > 0: 287 | self._create_node(self.waitlist.pop(0)) 288 | 289 | if _node_key(root_node) not in self.node_index: 290 | return None 291 | 292 | return self.node_index[_node_key(root_node)] 293 | 294 | 295 | 296 | 297 | # Interface ---------------------------------------------------------------- 298 | 299 | 300 | def parse_ast(source_code, lang = "guess", **kwargs): 301 | """ 302 | Parses a given source code string into its AST 303 | 304 | Function to parse source code in the given language 305 | into its AST. As a backend, we employ 306 | code_tokenize (tree-sitter). The final 307 | AST is additionally analyzed to compute 308 | additional annotations 309 | 310 | Parameters 311 | ---------- 312 | source_code : str 313 | Source code snippet as a string 314 | 315 | lang : [python, java, javascript, ...] 316 | Language to parse the given source code 317 | Default: guess (Currently not supported; will raise error) 318 | 319 | Returns 320 | ------- 321 | ASTNode 322 | the root node of the computed AST 323 | 324 | """ 325 | 326 | # Parse AST 327 | kwargs["lang"] = lang 328 | kwargs["syntax_error"] = "ignore" 329 | 330 | ast_tokens = ct.tokenize(source_code, **kwargs) 331 | 332 | return BottomUpParser(default_create_node)(ast_tokens) -------------------------------------------------------------------------------- /code_diff/diff_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | 4 | # Diff parsing ----------------------------------------------------------------- 5 | 6 | class Hunk: 7 | 8 | def __init__(self, lines, added_lines, rm_lines, header = None): 9 | self.lines = lines 10 | self.added_lines = set(added_lines) 11 | self.rm_lines = set(rm_lines) 12 | self.header = header 13 | 14 | 15 | @property 16 | def after(self): 17 | 18 | alines = [] 19 | 20 | for i, line in enumerate(self.lines): 21 | if i in self.rm_lines: continue 22 | if i in self.added_lines: 23 | alines.append(" " + line[1:]) 24 | else: 25 | alines.append(line) 26 | 27 | return "".join(alines) 28 | 29 | 30 | @property 31 | def before(self): 32 | 33 | alines = [] 34 | 35 | for i, line in enumerate(self.lines): 36 | if i in self.added_lines: continue 37 | if i in self.rm_lines: 38 | alines.append(" " + line[1:]) 39 | else: 40 | alines.append(line) 41 | 42 | return "".join(alines) 43 | 44 | def __repr__(self): 45 | 46 | if self.header: 47 | return self.header + "".join(self.lines) 48 | 49 | return "".join(self.lines) 50 | 51 | 52 | def _parse_hunk(lines, start, end): 53 | 54 | hunk_lines = lines[start + 1:end] 55 | 56 | added_lines = [] 57 | rm_lines = [] 58 | 59 | for i, hline in enumerate(hunk_lines): 60 | if hline.startswith("+"): added_lines.append(i) 61 | if hline.startswith("-"): rm_lines.append(i) 62 | 63 | return Hunk(hunk_lines, added_lines, rm_lines, header = lines[start]) 64 | 65 | 66 | hunk_pat = re.compile("@@ -(\d+)(,\d+)? \+(\d+)(,\d+)? @@.*") 67 | 68 | def parse_hunks(diff): 69 | lines = diff.splitlines(True) 70 | 71 | hunks = [] 72 | 73 | start_ix = -1 74 | end_ix = -1 75 | 76 | for line_ix, line in enumerate(lines): 77 | 78 | if hunk_pat.match(line): 79 | 80 | end_ix = line_ix - 1 81 | 82 | if start_ix >= 0 and start_ix < end_ix: 83 | hunks.append(_parse_hunk(lines, start_ix, end_ix)) 84 | 85 | start_ix = line_ix 86 | 87 | end_ix = len(lines) 88 | 89 | if start_ix >= 0 and start_ix < end_ix: 90 | hunks.append(_parse_hunk(lines, start_ix, end_ix)) 91 | 92 | return hunks 93 | 94 | 95 | # Diff cleaning -------------------------------- 96 | 97 | def _has_incomplete_comment(lines): 98 | is_incomplete2 = False 99 | is_incomplete1 = False 100 | 101 | for line in lines: 102 | count2 = line.count("\"\"\"") 103 | if count2 % 2 == 1: is_incomplete2 = not is_incomplete2 104 | 105 | count1 = line.count("\'\'\'") 106 | if count1 % 2 == 1: is_incomplete1 = not is_incomplete1 107 | 108 | return is_incomplete1 or is_incomplete2 109 | 110 | 111 | def _determine_incomplete_comment(lines): 112 | last_incomplete2 = -1 113 | last_incomplete1 = -1 114 | 115 | for i, line in enumerate(lines): 116 | count2 = line.count("\"\"\"") 117 | if count2 % 2 == 1: 118 | last_incomplete2 = i if last_incomplete2 == -1 else -1 119 | 120 | count1 = line.count("\'\'\'") 121 | if count1 % 2 == 1: 122 | last_incomplete1 = i if last_incomplete1 == -1 else -1 123 | 124 | assert last_incomplete1 != -1 or last_incomplete2 != -1 125 | 126 | last_incomplete = last_incomplete2 if last_incomplete2 != -1 else last_incomplete1 127 | 128 | dist_to_end = len(lines) - last_incomplete 129 | 130 | if last_incomplete < dist_to_end: 131 | return last_incomplete + 1, len(lines) 132 | else: 133 | return 0, last_incomplete 134 | 135 | 136 | def clean_hunk(hunk): 137 | if not _has_incomplete_comment(hunk.lines): return hunk 138 | start, end = _determine_incomplete_comment(hunk.lines) 139 | 140 | new_lines = hunk.lines[start:end] 141 | added_lines = [l - start for l in hunk.added_lines if l >= start and l < end] 142 | rm_lines = [l - start for l in hunk.rm_lines if l >= start and l < end] 143 | 144 | return Hunk(new_lines, added_lines, rm_lines, header = hunk.header) 145 | 146 | -------------------------------------------------------------------------------- /code_diff/gumtree/__init__.py: -------------------------------------------------------------------------------- 1 | from .isomap import gumtree_isomap 2 | from .editmap import gumtree_editmap 3 | from .chawathe import compute_chawathe_edit_script 4 | from .ops import (Update, Insert, Delete, Move) 5 | from .ops import EditScript 6 | from .ops import serialize_script, deserialize_script 7 | from .ops import json_serialize, json_deserialize 8 | 9 | # Edit script ---------------------------------------------------------------- 10 | 11 | def compute_edit_script(source_ast, target_ast, min_height = 1, max_size = 1000, min_dice = 0.5): 12 | 13 | # If source_ast and target_ast only leaves 14 | if len(source_ast.children) == 0 and len(target_ast.children) == 0: 15 | return EditScript([_update_leaf(source_ast, target_ast)]) 16 | 17 | isomap = gumtree_isomap(source_ast, target_ast, min_height) 18 | 19 | while len(isomap) == 0 and min_height > 0: 20 | min_height -= 1 21 | isomap = gumtree_isomap(source_ast, target_ast, min_height) 22 | 23 | editmap = gumtree_editmap(isomap, source_ast, target_ast, max_size, min_dice) 24 | editscript = compute_chawathe_edit_script(editmap, source_ast, target_ast) 25 | 26 | return EditScript(editscript) 27 | 28 | 29 | # Update leaf ---------------------------------------------------------------- 30 | 31 | def _update_leaf(source_ast, target_ast): 32 | return Update(source_ast, target_ast.text) -------------------------------------------------------------------------------- /code_diff/gumtree/chawathe.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | from .ops import Update, Insert, Delete, Move 4 | from .utils import bfs_traversal, postorder_traversal 5 | 6 | # API method ---------------------------------------------------------------- 7 | 8 | def compute_chawathe_edit_script(editmap, source, target): 9 | 10 | edit_script = [] 11 | 12 | source_root, source_parent = _fake_root(source) 13 | target_root, target_parent = _fake_root(target) 14 | editmap.add(source_root, target_root) 15 | 16 | wt = WorkingTree(editmap) 17 | wt[source].mod_parent = wt[source_root] # Inject fake root only for working copy 18 | 19 | for target_node in bfs_traversal(target): 20 | 21 | if target_node == target: # Script might start in the middle of AST 22 | parent = target_root 23 | else: 24 | parent = target_node.parent 25 | 26 | if parent is None: parent = target_root 27 | 28 | source_partner = wt.partner(target_node) 29 | parent_partner = wt.partner(parent) 30 | 31 | if source_partner is None: 32 | k = wt.position(target_node) 33 | op = Insert( 34 | parent_partner.delegate, 35 | (target_node.type, target_node.text), 36 | k, 37 | -1 38 | ) 39 | edit_script.append(op) 40 | node = parent_partner.apply(op) 41 | editmap.add(node, target_node) 42 | 43 | elif target_node.parent is not None: 44 | 45 | if target_node.text is not None and source_partner.text != target_node.text: 46 | op = Update(source_partner.delegate, target_node.text) 47 | edit_script.append(op) 48 | source_partner.apply(op) 49 | 50 | partner_parent = source_partner.parent 51 | 52 | if not editmap[partner_parent.delegate, parent]: 53 | k = wt.position(target_node) 54 | op = Move( 55 | parent_partner.delegate, 56 | source_partner.delegate, 57 | k 58 | ) 59 | edit_script.append(op) 60 | parent_partner.apply(op) 61 | 62 | target_node.inorder = True 63 | for move in _align_children(wt.partner(target_node), target_node, wt): 64 | edit_script.append(move) 65 | 66 | for node in postorder_traversal(source): 67 | node = wt[node] 68 | partner = node.partner 69 | if partner is None: 70 | op = Delete(node.delegate) 71 | edit_script.append(op) 72 | node.apply(op) 73 | 74 | # Change root back after edit 75 | source.parent = source_parent 76 | target.parent = target_parent 77 | 78 | return edit_script 79 | 80 | 81 | # Alignment ------------------------------------------------------------------ 82 | 83 | def _longest_common_subsequence(source, target, equal_fn): 84 | 85 | lengths = [[0] * (len(target)+1) for _ in range(len(source)+1)] 86 | for i, x in enumerate(source): 87 | for j, y in enumerate(target): 88 | if equal_fn(x, y): 89 | lengths[i+1][j+1] = lengths[i][j] + 1 90 | else: 91 | lengths[i+1][j+1] = max(lengths[i+1][j], lengths[i][j+1]) 92 | 93 | result = [] 94 | 95 | # Backtrack 96 | i, j = len(source), len(target) 97 | while i > 0 and j > 0: 98 | if equal_fn(source[i - 1], target[j - 1]): 99 | result.append((source[i - 1], target[j - 1])) 100 | i -= 1 101 | j -= 1 102 | else: 103 | if lengths[i][j - 1] > lengths[i - 1][j]: 104 | j -= 1 105 | elif lengths[i][j - 1] == lengths[i - 1][j]: 106 | # Heuristic we like to select terminal nodes for LCS 107 | 108 | if source[i - 1].text is None: 109 | i -= 1 110 | else: 111 | j -= 1 112 | 113 | else: 114 | i -= 1 115 | 116 | return result[::-1] 117 | 118 | 119 | def _align_children(source, target, wt): 120 | for c in source.children: c.inorder = False 121 | for c in target.children: c.inorder = False 122 | 123 | def _partner_child(c, o, src_partner = False): 124 | p = wt.partner(c) if src_partner else c.partner 125 | if p is None: return False 126 | return p.parent == o 127 | 128 | S1 = [c for c in source.children if _partner_child(c, target)] 129 | S2 = [c for c in target.children if _partner_child(c, source, True)] 130 | 131 | S = _longest_common_subsequence(S1, S2, lambda x, y: wt.isomap[x.delegate, y]) 132 | 133 | SM = set() 134 | 135 | for a, b in S: 136 | a.inorder = True 137 | b.inorder = True 138 | SM.add((a, b)) 139 | 140 | for a, b in itertools.product(S1, S2): 141 | if wt.isomap[a.delegate, b] and (a, b) not in SM: 142 | k = wt.position(b) 143 | op = Move(a.delegate, source.delegate, k) 144 | yield op 145 | source.apply(op) 146 | a.inorder = True 147 | b.inorder = True 148 | 149 | # Working tree ---------------------------------------------------------------- 150 | # A tree to capture all AST modifications during edit 151 | 152 | class InsertNode: 153 | 154 | INSERT_COUNT = 0 155 | 156 | def __init__(self, type, text = None, children = None): 157 | self.type = type 158 | self.text = text 159 | 160 | self.node_id = InsertNode.INSERT_COUNT 161 | InsertNode.INSERT_COUNT += 1 162 | 163 | self.parent = None 164 | self.children = children if children is not None else [] 165 | 166 | def __repr__(self): 167 | output = {"type": self.type, "text": self.text} 168 | return "IN(%s)" % ", ".join(["%s=%s" % (k, v) for k, v in output.items() if v is not None]) 169 | 170 | 171 | def _fake_root(root): 172 | node = InsertNode("root", None, [root]) 173 | node.parent = None 174 | 175 | old_parent = root.parent 176 | root.parent = node 177 | 178 | return node, old_parent 179 | 180 | 181 | class WorkingNode: 182 | 183 | def __init__(self, src, delegate): 184 | self.src = src 185 | self.delegate = delegate 186 | 187 | self.text = self.delegate.text 188 | self.mod_parent = None 189 | self.mod_children = None 190 | 191 | self.mod_partner = None 192 | 193 | @property 194 | def parent(self): 195 | 196 | if self.mod_parent is None: 197 | self.mod_parent = self.src._access_wn(self.delegate.parent) 198 | 199 | return self.mod_parent 200 | 201 | @property 202 | def children(self): 203 | if self.mod_children is None: 204 | self.mod_children = [self.src._access_wn(c) for c in self.delegate.children] 205 | 206 | return self.mod_children 207 | 208 | 209 | @property 210 | def partner(self): 211 | 212 | if self.mod_partner is None: 213 | node = self.delegate 214 | 215 | if node is None: return None 216 | 217 | result = self.isomap[node, None] 218 | result = next(result, None) 219 | 220 | if result is None: return None 221 | 222 | self.mod_partner = result[1] 223 | 224 | return self.mod_partner 225 | 226 | @property 227 | def isomap(self): 228 | return self.src.isomap 229 | 230 | 231 | def apply(self, operation): 232 | 233 | if isinstance(operation, Insert): 234 | node = InsertNode(*operation.node) 235 | operation.insert_id = node.node_id 236 | wn = self.src._access_wn(node) 237 | self.children.insert(operation.position, wn) 238 | 239 | node.parent = self.delegate 240 | 241 | return node 242 | 243 | if isinstance(operation, Update): 244 | self.text = operation.value 245 | return 246 | 247 | if isinstance(operation, Delete): 248 | node = operation.target_node 249 | node = self.src._access_wn(node) 250 | 251 | for n, child in enumerate(node.parent.children): 252 | if child == node: break 253 | 254 | del node.parent.mod_children[n] 255 | return 256 | 257 | if isinstance(operation, Move): 258 | insert_node = operation.node 259 | 260 | self.apply(Delete(insert_node)) 261 | 262 | wn = self.src._access_wn(insert_node) 263 | self.children.insert(operation.position, wn) 264 | 265 | wn.mod_parent = self 266 | 267 | return insert_node 268 | 269 | 270 | class WorkingTree: 271 | 272 | def __init__(self, isomap): 273 | self.isomap = isomap 274 | self.node_to_wn = {} 275 | 276 | def _access_wn(self, source_node): 277 | if source_node is None: return None 278 | 279 | if isinstance(source_node, WorkingNode): 280 | return source_node 281 | 282 | if source_node not in self.node_to_wn: 283 | self.node_to_wn[source_node] = WorkingNode(self, source_node) 284 | 285 | return self.node_to_wn[source_node] 286 | 287 | def __getitem__(self, key): 288 | return self._access_wn(key) 289 | 290 | 291 | def partner(self, target_node): 292 | if target_node is None: return None 293 | 294 | result = self.isomap[None, target_node] 295 | result = next(result, None) 296 | 297 | if result is None: return None 298 | 299 | source_node = result[0] 300 | wn = self._access_wn(source_node) 301 | wn.mod_partner = target_node 302 | return wn 303 | 304 | def position(self, target_node): 305 | parent = target_node.parent 306 | 307 | if parent is None: return 0 308 | 309 | for n, child in enumerate(parent.children): 310 | if child == target_node: break 311 | 312 | if all(not c.inorder for c in parent.children[:n]): 313 | return 0 314 | 315 | left_child = parent.children[n - 1] 316 | while not left_child.inorder: 317 | n -= 1 318 | left_child = parent.children[n - 1] 319 | 320 | left_partner = self.partner(left_child) 321 | 322 | for n, child in enumerate(left_partner.parent.children): 323 | if child == left_partner: break 324 | 325 | return sum(1 for c in parent.children[:n] if c.inorder) + 1 326 | -------------------------------------------------------------------------------- /code_diff/gumtree/editmap.py: -------------------------------------------------------------------------------- 1 | from apted import APTED, Config 2 | 3 | from .utils import subtree_dice, postorder_traversal 4 | 5 | # Minimal edit mapping to make source isomorph to target ------------------- 6 | 7 | # We compute a mapping between source and target tree 8 | # If a source node is mapped to a target node with different label, 9 | # the source node has to be updated with the target label 10 | # If a source node is unmapped, 11 | # the source node has to be deleted 12 | # If a target node is unmapped, 13 | # the target node has to be added to the source tree 14 | # 15 | # Edits are chosen to be (approximately) minimal 16 | 17 | # API method ------------------------------------------------------------- 18 | 19 | def gumtree_editmap(isomap, source, target, max_size = 1000, min_dice = 0.5): 20 | # Caution: This method does change the isomap 21 | if len(isomap) == 0: return isomap 22 | 23 | for source_node in postorder_traversal(source): 24 | 25 | if source_node == source: # source_node is root 26 | isomap.add(source_node, target) 27 | 28 | for s, t in _minimal_edit(isomap, source_node, target, max_size): 29 | isomap.add(s, t) 30 | 31 | break 32 | 33 | if len(source_node.children) == 0: continue # source_node is leaf 34 | if (source_node, None) in isomap: continue # source_node is now mapped 35 | 36 | target_node, dice = _select_near_candidate(source_node, isomap) 37 | 38 | if target_node is None or dice <= min_dice: continue 39 | 40 | for s, t in _minimal_edit(isomap, source_node, target_node, max_size): 41 | isomap.add(s, t) 42 | isomap.add(source_node, target_node) 43 | 44 | return isomap 45 | 46 | 47 | 48 | # APTED for computing a minimal edit -------------------------------- 49 | 50 | class APTEDConfig(Config): 51 | 52 | def rename(self, node1, node2): 53 | 54 | if node1.type == node2.type: 55 | return 1 if node1.text != node2.text else 0 56 | 57 | return 1 58 | 59 | def children(self, node): 60 | return node.children 61 | 62 | 63 | def _minimal_edit(isomap, source, target, max_size = 1000): 64 | if source.subtree_weight > max_size or target.subtree_weight > max_size: return 65 | 66 | apted = APTED(source, target, APTEDConfig()) 67 | mapping = apted.compute_edit_mapping() 68 | 69 | for source_node, target_node in mapping: 70 | if source_node is None: continue 71 | if target_node is None: continue 72 | if source_node.type != target_node.type: continue 73 | 74 | if (source_node, None) in isomap: continue 75 | if (None, target_node) in isomap: continue 76 | 77 | yield source_node, target_node 78 | 79 | 80 | # Select node heuristically that is close to isomorph -------------------- 81 | 82 | def _select_near_candidate(source_node, mapping): 83 | 84 | dst_seeds = [] 85 | 86 | for src in source_node.descandents(): 87 | for _, dst in mapping[src, None]: 88 | dst_seeds.append(dst) 89 | 90 | candidates = [] 91 | seen = set() 92 | 93 | for dst in dst_seeds: 94 | while dst.parent is not None: 95 | parent = dst.parent 96 | if parent in seen: break 97 | seen.add(parent) 98 | 99 | if (parent.type == source_node.type 100 | and parent.parent is not None 101 | and (None, parent) not in mapping): 102 | candidates.append(parent) 103 | dst = parent 104 | 105 | if len(candidates) == 0: return None, 0.0 106 | 107 | candidates = [(x, subtree_dice(source_node, x, mapping)) for x in candidates] 108 | 109 | return max(candidates, key=lambda x: x[1]) -------------------------------------------------------------------------------- /code_diff/gumtree/isomap.py: -------------------------------------------------------------------------------- 1 | 2 | import heapq 3 | import itertools 4 | import math 5 | 6 | from collections import defaultdict 7 | 8 | from .utils import NodeMapping, subtree_dice 9 | 10 | # API method ---------------------------------------------------------------- 11 | 12 | def gumtree_isomap(source_ast, target_ast, min_height = 1): 13 | 14 | isomorphic_mapping = NodeMapping() 15 | candidate_mapping = NodeMapping() 16 | 17 | source_index = _index_iso_nodes(source_ast) 18 | target_index = _index_iso_nodes(target_ast) 19 | 20 | source_open = HeightPriorityHeap(source_ast) 21 | target_open = HeightPriorityHeap(target_ast) 22 | 23 | while max(source_open.max(), target_open.max()) > min_height: 24 | 25 | if source_open.max() > target_open.max(): 26 | for c in list(source_open.pop()): 27 | _open_node(source_open, c) 28 | continue 29 | 30 | if source_open.max() < target_open.max(): 31 | for c in list(target_open.pop()): 32 | _open_node(target_open, c) 33 | continue 34 | 35 | source_candidates, target_candidates = list(source_open.pop()), list(target_open.pop()) 36 | 37 | for source_node, target_node in itertools.product(source_candidates, target_candidates): 38 | # Source node and Target node have the same height 39 | # Check if source node is isomorph to target node 40 | 41 | if source_node.isomorph(target_node): 42 | # Check if there exists more candidates 43 | if (source_index[source_node] > 1 44 | or target_index[target_node] > 1): 45 | candidate_mapping.add(source_node, target_node) 46 | else: 47 | # We can savely map both nodes and all descandents 48 | _map_recursively(isomorphic_mapping, source_node, target_node) 49 | 50 | # Open all unmapped nodes 51 | for source_node in source_candidates: 52 | if ((source_node, None) not in isomorphic_mapping 53 | and (source_node, None) not in candidate_mapping): 54 | _open_node(source_open, source_node) 55 | 56 | for target_node in target_candidates: 57 | if ((None, target_node) not in isomorphic_mapping 58 | and (None, target_node) not in candidate_mapping): 59 | _open_node(target_open, target_node) 60 | 61 | # Select the heuristically best mapping for all isomorphic pairs 62 | selection_heuristic = create_default_heuristic(isomorphic_mapping) 63 | for source_node, target_node in _select_candidates(candidate_mapping, selection_heuristic): 64 | _map_recursively(isomorphic_mapping, source_node, target_node) 65 | 66 | return isomorphic_mapping 67 | 68 | 69 | # Collections ---------------------------------------------------------------- 70 | 71 | class NodeCounter: 72 | 73 | def __init__(self): 74 | self._counter = defaultdict(int) 75 | 76 | def _node_key(self, node): 77 | return (node.subtree_hash, node.subtree_weight) 78 | 79 | def __getitem__(self, node): 80 | return self._counter[self._node_key(node)] 81 | 82 | def __setitem__(self, node, value): 83 | self._counter[self._node_key(node)] = value 84 | 85 | 86 | class HeightPriorityHeap: 87 | 88 | def __init__(self, start_node = None): 89 | self._heap = [] 90 | self.element_count = 0 91 | 92 | if start_node is not None: 93 | self.push(start_node) 94 | 95 | def __len__(self): 96 | return len(self._heap) 97 | 98 | def push(self, x, seed = 0): 99 | try: 100 | heapq.heappush(self._heap, (-x.subtree_height, x.subtree_hash, self.element_count, seed, x)) 101 | self.element_count += 1 102 | except TypeError: 103 | # Typically the type error occurs if we compare with the last element in tuple (Node) 104 | # If this happens the node is already contained in the heap and we skip this push 105 | return 106 | 107 | def max(self): 108 | if len(self) == 0: return 0 109 | return -self._heap[0][0] 110 | 111 | def pop(self): 112 | current_head = self.max() 113 | 114 | while len(self) > 0 and self.max() == current_head: 115 | yield heapq.heappop(self._heap)[-1] 116 | 117 | # Helper methods ----------------------------------------------------------- 118 | 119 | def _index_iso_nodes(ast): 120 | result = NodeCounter() 121 | for node in ast: result[node] += 1 122 | 123 | return result 124 | 125 | def _open_node(heap, node): 126 | for n, child in enumerate(node.children): 127 | heap.push(child, seed = n) 128 | 129 | def _map_recursively(mapping, source_node, target_node): 130 | mapping.add(source_node, target_node) 131 | 132 | for i, source_child in enumerate(source_node.children): 133 | target_child = target_node.children[i] 134 | assert source_node.type == target_node.type 135 | 136 | _map_recursively(mapping, source_child, target_child) 137 | 138 | # Heuristic selection ---------------------------------------------------------------- 139 | 140 | 141 | def source_distance(source_node, target_node): 142 | 143 | max_token_mover = 1000 144 | 145 | line_mover_distance = source_node.position[0][0] - target_node.position[1][0] 146 | line_mover_distance = line_mover_distance * max_token_mover 147 | 148 | if line_mover_distance == 0: 149 | token_mover_distance = min(abs(source_node.position[0][1] - target_node.position[0][1]), max_token_mover - 1) 150 | line_mover_distance += token_mover_distance 151 | 152 | return -line_mover_distance 153 | 154 | 155 | 156 | def create_default_heuristic(isomorphic_mapping): 157 | 158 | def _heuristic(source_node, target_node): 159 | return (subtree_dice(source_node, target_node, isomorphic_mapping), source_distance(source_node, target_node)) 160 | 161 | return _heuristic 162 | 163 | 164 | def _select_candidates(candidate_mapping, heuristic = None): 165 | if len(candidate_mapping) == 0: return 166 | 167 | candidate_pairs = [(s, t) for s, t in candidate_mapping] 168 | 169 | if heuristic is not None: 170 | candidate_pairs = sorted(candidate_pairs, 171 | key=lambda p: heuristic(*p), 172 | reverse=True) 173 | 174 | source_seen = set() 175 | target_seen = set() 176 | 177 | while len(candidate_pairs) > 0: 178 | source_node, target_node = candidate_pairs.pop(0) 179 | 180 | if source_node in source_seen: 181 | continue 182 | source_seen.add(source_node) 183 | 184 | if target_node in target_seen: 185 | continue 186 | target_seen.add(target_node) 187 | 188 | yield source_node, target_node 189 | 190 | -------------------------------------------------------------------------------- /code_diff/gumtree/ops.py: -------------------------------------------------------------------------------- 1 | import json 2 | from dataclasses import dataclass 3 | from typing import Any, Tuple 4 | 5 | @dataclass 6 | class EditOperation: 7 | target_node: Any 8 | 9 | @dataclass 10 | class Update(EditOperation): 11 | value: Any 12 | 13 | @dataclass 14 | class Insert(EditOperation): 15 | node: Tuple[str, Any] 16 | position: int 17 | insert_id: int # This is necessary to keep track of nodes (TODO: Better solution?) 18 | 19 | @dataclass 20 | class Move(EditOperation): 21 | node: Any 22 | position: int 23 | 24 | @dataclass 25 | class Delete(EditOperation): 26 | pass 27 | 28 | # Edit script ---------------------------------------------------------------- 29 | 30 | class EditScript(list): 31 | 32 | def __init__(self, operations): 33 | super().__init__(operations) 34 | 35 | def __repr__(self): 36 | return serialize_script(self, indent = 2) 37 | 38 | 39 | # Serialization -------------------------------- 40 | 41 | 42 | def _serialize_new_node(new_node_index, node): 43 | 44 | if node.node_id not in new_node_index: 45 | new_node_index[node.node_id] = len(new_node_index) 46 | 47 | return "N%d" % new_node_index[node.node_id] 48 | 49 | def _serialize_ast_node(node): 50 | position = node.position 51 | node_text = node.type 52 | 53 | if node.text: node_text += ":" + node.text 54 | 55 | return "(%s, line %d:%d - %d:%d)" % (node_text, position[0][0], position[0][1], position[1][0], position[1][1]) 56 | 57 | 58 | def _serialize_node(new_node_index, node): 59 | 60 | if hasattr(node, 'node_id'): 61 | return _serialize_new_node(new_node_index, node) 62 | 63 | return _serialize_ast_node(node) 64 | 65 | 66 | def serialize_script(edit_script, indent = 0): 67 | 68 | sedit_script = [] 69 | new_node_index = {} 70 | 71 | for operation in edit_script: 72 | 73 | operation_name = operation.__class__.__name__ 74 | target_node_str = _serialize_node(new_node_index, operation.target_node) 75 | 76 | if operation_name == "Update": 77 | sedit_script.append("%s(%s, %s)" % (operation_name, target_node_str, operation.value)) 78 | 79 | elif operation_name == "Insert": 80 | 81 | new_node = operation.node 82 | 83 | if new_node[1] is None: 84 | new_node_index[operation.insert_id] = len(new_node_index) 85 | new_node_str = "(%s, %s)" % (new_node[0], "N%d" % new_node_index[operation.insert_id]) 86 | else: # Leaf node 87 | new_node_str = "%s:%s" % new_node 88 | 89 | sedit_script.append("%s(%s, %s, %d)" % (operation_name, new_node_str, target_node_str, operation.position)) 90 | 91 | elif operation_name == "Move": 92 | 93 | new_node_str = _serialize_node(new_node_index, operation.node) 94 | 95 | sedit_script.append("%s(%s, %s, %d)" % (operation_name, new_node_str, target_node_str, operation.position)) 96 | 97 | elif operation_name == "Delete": 98 | sedit_script.append("%s(%s)" % (operation_name, target_node_str)) 99 | 100 | if indent > 0: 101 | sedit_script = [" "*indent + e for e in sedit_script] 102 | return "[\n%s\n]" % (",\n").join(sedit_script) 103 | 104 | return "[%s]" % ", ".join(sedit_script) 105 | 106 | 107 | 108 | # Deserialize -------------------------------------------------------------------------------------------------------------------------------- 109 | 110 | class DASTNode: 111 | 112 | def __init__(self, type, position, text = None): 113 | self.type = type 114 | self.position = position 115 | self.text = text 116 | 117 | def __repr__(self): 118 | return "Node(%s, %s, %s)" % (self.type, str(self.text), self.position) 119 | 120 | 121 | class InsertNode: 122 | 123 | def __init__(self, node_id, type, text = None): 124 | self.node_id = node_id 125 | self.type = type 126 | self.text = text 127 | 128 | def __repr__(self): 129 | return "%s(%s, %s)" % (self.node_id, self.type, str(self.text)) 130 | 131 | 132 | def _split_args(inst): 133 | args = [] 134 | 135 | bracket_open = 0 136 | str_open = False 137 | for i, c in enumerate(inst): 138 | 139 | # Lookahead 140 | if i > 0 and i < len(inst) - 1 and c in ["(", ")", ",", "\"", "\'"]: 141 | if inst[i - 1] == ":" and inst[i - 2] == c: continue 142 | if inst[i + 1] == ":" and inst[i + 2] == c: continue 143 | 144 | if c in ["\"", "\'"]: 145 | str_open = not str_open 146 | 147 | if str_open: continue 148 | 149 | if c == "(": 150 | if bracket_open == 0: args.append(i) 151 | bracket_open += 1 152 | continue 153 | 154 | if c == ")": 155 | bracket_open -= 1 156 | if bracket_open == 0: args.append(i) 157 | continue 158 | 159 | if bracket_open == 1 and c == ",": 160 | args.append(i) 161 | 162 | return [inst[args[i - 1] + 1: args[i]].strip() for i in range(1, len(args))] 163 | 164 | 165 | def _deserialize_insert_node(node_registry, node_info): 166 | 167 | if "(" not in node_info or node_info in ["(:(", "):)"]: 168 | return InsertNode("T", *_parse_type(node_info)) 169 | 170 | node_type, node_id = _split_args(node_info) 171 | 172 | if node_id in node_registry: return node_registry[node_id] 173 | 174 | insert_node = InsertNode(node_id, node_type) 175 | node_registry[node_id] = insert_node 176 | 177 | return insert_node 178 | 179 | 180 | def _parse_type(node_type): 181 | if ":" in node_type: 182 | return node_type.split(":", 1) 183 | return node_type, None 184 | 185 | 186 | def _deserialize_node(node_registry, node_info): 187 | 188 | if "(" in node_info: 189 | ast_type, ast_position = _split_args(node_info) 190 | ast_type, ast_text = _parse_type(ast_type) 191 | return DASTNode(ast_type, ast_position, text = ast_text) 192 | 193 | if node_info in node_registry: 194 | return node_registry[node_info] 195 | 196 | return InsertNode(node_info, "unknown") 197 | 198 | 199 | def _deserialize_update(node_registry, inst): 200 | target_node, update = _split_args(inst) 201 | target_node = _deserialize_node(node_registry, target_node) 202 | return Update(target_node, update) 203 | 204 | 205 | def _deserialize_insert(node_registry, inst): 206 | new_node, target_node, position = _split_args(inst) 207 | 208 | new_node = _deserialize_insert_node(node_registry, new_node) 209 | target_node = _deserialize_node(node_registry, target_node) 210 | 211 | return Insert(target_node, new_node, int(position), -1) 212 | 213 | 214 | def _deserialize_delete(node_registry, inst): 215 | target_node = _split_args(inst)[0] 216 | target_node = _deserialize_node(node_registry, target_node) 217 | return Delete(target_node) 218 | 219 | 220 | def _deserialize_move(node_registry, inst): 221 | from_node, to_node, position = _split_args(inst) 222 | from_node = _deserialize_node(node_registry, from_node) 223 | to_node = _deserialize_node(node_registry, to_node) 224 | return Move(to_node, from_node, int(position)) 225 | 226 | 227 | def deserialize_script(script_string): 228 | 229 | instructions = script_string.split("\n")[1:-1] 230 | 231 | script = [] 232 | node_registry = {} 233 | for instruction in instructions: 234 | instruction = instruction.strip() 235 | 236 | if instruction.startswith("Update"): 237 | op = _deserialize_update(node_registry, instruction) 238 | if instruction.startswith("Insert"): 239 | op = _deserialize_insert(node_registry, instruction) 240 | if instruction.startswith("Delete"): 241 | op = _deserialize_delete(node_registry, instruction) 242 | if instruction.startswith("Move"): 243 | op = _deserialize_move(node_registry, instruction) 244 | 245 | script.append(op) 246 | 247 | return script 248 | 249 | 250 | # Fast serialize ----------------------------------------------------------------------------------------------------------------------------- 251 | 252 | def _json_serialize_new_node(new_node_index, node): 253 | 254 | if node.node_id not in new_node_index: 255 | new_node_index[node.node_id] = len(new_node_index) 256 | 257 | return "N%d" % new_node_index[node.node_id] 258 | 259 | 260 | def _json_serialize_ast_node(node): 261 | position = node.position 262 | node_text = node.type 263 | 264 | if node.text: node_text += ":" + node.text 265 | 266 | return [node_text, position[0][0], position[0][1], position[1][0], position[1][1]] 267 | 268 | 269 | def _json_serialize_node(new_node_index, node): 270 | 271 | if hasattr(node, 'node_id'): 272 | return _json_serialize_new_node(new_node_index, node) 273 | 274 | return _json_serialize_ast_node(node) 275 | 276 | 277 | def json_serialize(edit_script): 278 | edit_ops = [] 279 | new_node_index = {} 280 | 281 | for operation in edit_script: 282 | operation_name = operation.__class__.__name__ 283 | target_node_str = _json_serialize_node(new_node_index, operation.target_node) 284 | 285 | if operation_name == "Update": 286 | edit_ops.append([operation_name, target_node_str, operation.value]) 287 | 288 | elif operation_name == "Insert": 289 | 290 | new_node = operation.node 291 | 292 | if new_node[1] is None: 293 | new_node_index[operation.insert_id] = len(new_node_index) 294 | new_node_str = [new_node[0], "N%d" % new_node_index[operation.insert_id]] 295 | else: # Leaf node 296 | new_node_str = ["%s:%s" % new_node, "T"] 297 | 298 | edit_ops.append([operation_name, target_node_str, new_node_str, operation.position]) 299 | 300 | elif operation_name == "Move": 301 | 302 | new_node_str = _json_serialize_node(new_node_index, operation.node) 303 | 304 | edit_ops.append([operation_name, target_node_str, new_node_str, operation.position]) 305 | 306 | elif operation_name == "Delete": 307 | edit_ops.append([operation_name, target_node_str]) 308 | 309 | return json.dumps(edit_ops) 310 | 311 | 312 | # Fast deserialize ---------------------------------------------------------------------- 313 | 314 | def _json_deserialize_node(node_index, node_info): 315 | 316 | if not isinstance(node_info, list) and node_info != "T": 317 | node_id = int(node_info[1:]) 318 | return node_index[node_id] 319 | 320 | node_type, position = node_info[0], node_info[1:] 321 | node_text = None 322 | 323 | if ":" in node_type: 324 | node_type, node_text = node_type.split(":", 1) 325 | 326 | if len(position) == 4: 327 | return DASTNode(node_type, ((position[0], position[1]), (position[2], position[3])), node_text) 328 | 329 | return InsertNode(position[0], node_type, node_text) 330 | 331 | 332 | def _json_deserialize_node_constructor(node_index, cn_info): 333 | node_type, node_id = cn_info 334 | node_text = None 335 | 336 | if ":" in node_type: 337 | node_type, node_text = node_type.split(":", 1) 338 | 339 | if node_id != "T": 340 | node_id = int(node_id[1:]) 341 | node_index[node_id] = InsertNode(node_id, node_type, node_text) 342 | return node_index[node_id] 343 | 344 | return InsertNode(node_id, node_type, node_text) 345 | 346 | 347 | def _json_deserialize_update(node_index, operation): 348 | _, target, update = operation 349 | target = _json_deserialize_node(node_index, target) 350 | return Update(target, update) 351 | 352 | 353 | def _json_deserialize_insert(node_index, operation): 354 | _, target, new_node, position = operation 355 | target = _json_deserialize_node(node_index, target) 356 | new_node = _json_deserialize_node_constructor(node_index, new_node) 357 | 358 | return Insert(target, (new_node.type, new_node.text), position, new_node.node_id) 359 | 360 | 361 | def _json_deserialize_delete(node_index, operation): 362 | return Delete(_json_deserialize_node(node_index, operation[1])) 363 | 364 | 365 | def _json_deserialize_move(node_index, operation): 366 | _, target, move_node, position = operation 367 | target = _json_deserialize_node(node_index, target) 368 | move_node = _json_deserialize_node(node_index, move_node) 369 | return Move(target, move_node, position) 370 | 371 | 372 | DESERIALIZE = { 373 | "Update" : _json_deserialize_update, 374 | "Insert" : _json_deserialize_insert, 375 | "Delete" : _json_deserialize_delete, 376 | "Move" : _json_deserialize_move 377 | } 378 | 379 | 380 | def json_deserialize(edit_json): 381 | edit_ops = json.loads(edit_json) 382 | output = [] 383 | node_index = {} 384 | 385 | for operation in edit_ops: 386 | operation_name = operation[0] 387 | output.append(DESERIALIZE[operation_name](node_index, operation)) 388 | 389 | return EditScript(output) -------------------------------------------------------------------------------- /code_diff/gumtree/utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | # Collections ------------------------------------------------------------------- 4 | 5 | class NodeMapping: 6 | 7 | def __init__(self): 8 | self._src_to_dst = defaultdict(set) 9 | self._dst_to_src = defaultdict(set) 10 | self._length = 0 11 | 12 | def __getitem__(self, key): 13 | if not isinstance(key, tuple): key = (key, None) 14 | 15 | src_key, dst_key = key 16 | 17 | if src_key is not None and dst_key is not None: 18 | return dst_key in self._src_to_dst[src_key] 19 | 20 | if src_key is None and dst_key is None: 21 | return self.__iter__() 22 | 23 | if src_key is None: 24 | return ((src, dst_key) for src in self._dst_to_src[dst_key]) 25 | 26 | if dst_key is None: 27 | return ((src_key, dst) for dst in self._src_to_dst[src_key]) 28 | 29 | def __iter__(self): 30 | 31 | def _iter_maps(): 32 | for k, V in self._src_to_dst.items(): 33 | for v in V: yield (k, v) 34 | 35 | return _iter_maps() 36 | 37 | def __contains__(self, key): 38 | if not isinstance(key, tuple): key = (key, None) 39 | 40 | src_key, dst_key = key 41 | 42 | if src_key is not None and dst_key is not None: 43 | return self[src_key, dst_key] 44 | 45 | return next(self[src_key, dst_key], None) is not None 46 | 47 | def __len__(self): 48 | return self._length 49 | 50 | def add(self, src, dst): 51 | if not self[src, dst]: 52 | self._src_to_dst[src].add(dst) 53 | self._dst_to_src[dst].add(src) 54 | self._length += 1 55 | 56 | def __copy__(self): 57 | output = NodeMapping() 58 | 59 | for a, b in self: 60 | output.add(a, b) 61 | 62 | return output 63 | 64 | def __str__(self): 65 | approx_str = [] 66 | 67 | for src, dst in self: 68 | approx_str.append("%s ≈ %s" % (str(src), str(dst))) 69 | 70 | return "\n".join(approx_str) 71 | 72 | 73 | # Tree heuristic ---------------------------------------------------------------- 74 | 75 | def subtree_dice(A, B, mapping): 76 | 77 | if A is None or B is None: 78 | return 1.0 if all(x is None for x in [A, B]) else 0.0 79 | 80 | DA, DB = set(A.descandents()), set(B.descandents()) 81 | 82 | norm = len(DA) + len(DB) 83 | 84 | if norm == 0: return 1.0 85 | 86 | mapped = defaultdict(set) 87 | for a, b in mapping: mapped[a].add(b) 88 | 89 | mapped_children = set(m for t1 in DA if t1 in mapped for m in mapped[t1]) 90 | dice_score = len(set.intersection(mapped_children, DB)) 91 | 92 | return 2 * dice_score / norm 93 | 94 | 95 | # Tree traversal ---------------------------------------------------------------- 96 | 97 | def bfs_traversal(tree): 98 | queue = [tree] 99 | 100 | while len(queue) > 0: 101 | node = queue.pop(0) 102 | 103 | yield node 104 | 105 | for c in node.children: 106 | queue.append(c) 107 | 108 | 109 | def dfs_traversal(tree): 110 | stack = [tree] 111 | 112 | while len(stack) > 0: 113 | node = stack.pop(-1) 114 | 115 | yield node 116 | 117 | for c in node.children: 118 | stack.append(c) 119 | 120 | 121 | def postorder_traversal(tree): 122 | 123 | stack = [(tree, 0)] 124 | 125 | while len(stack) > 0: 126 | node, ix = stack.pop(-1) 127 | 128 | if ix >= len(node.children): 129 | yield node 130 | else: 131 | stack.append((node, ix + 1)) 132 | stack.append((node.children[ix], 0)) 133 | 134 | -------------------------------------------------------------------------------- /code_diff/sstubs.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class SStubPattern(Enum): 4 | 5 | MULTI_STMT = 0 6 | SINGLE_STMT = 1 7 | SINGLE_TOKEN = 2 8 | NO_STMT = 3 9 | 10 | # Functions 11 | WRONG_FUNCTION_NAME = 4 12 | 13 | SAME_FUNCTION_MORE_ARGS = 5 14 | SAME_FUNCTION_LESS_ARGS = 6 15 | SAME_FUNCTION_WRONG_CALLER = 7 16 | SAME_FUNCTION_SWAP_ARGS = 8 17 | 18 | ADD_FUNCTION_AROUND_EXPRESSION = 9 19 | ADD_METHOD_CALL = 10 20 | 21 | # Changes (single token) 22 | CHANGE_IDENTIFIER_USED = 11 23 | CHANGE_NUMERIC_LITERAL = 12 24 | CHANGE_BOOLEAN_LITERAL = 13 25 | 26 | # Change operator / operand 27 | CHANGE_UNARY_OPERATOR = 14 28 | CHANGE_BINARY_OPERATOR = 15 29 | CHANGE_BINARY_OPERAND = 16 30 | 31 | # Changes (Access) 32 | CHANGE_ATTRIBUTE_USED = 17 33 | CHANGE_KEYWORD_ARGUMENT_USED = 18 34 | CHANGE_CONSTANT_TYPE = 19 35 | 36 | ADD_ELEMENTS_TO_ITERABLE = 20 37 | ADD_ATTRIBUTE_ACCESS = 21 38 | 39 | # If condition 40 | MORE_SPECIFIC_IF = 22 41 | LESS_SPECIFIC_IF = 23 42 | 43 | # STRING 44 | CHANGE_STRING_LITERAL = 24 # This is not a sstub pattern but helpful for scanning results 45 | 46 | 47 | # SStub classification ------------------------------- 48 | 49 | def classify_sstub(source_ast, target_ast): 50 | # Assume tree is minimized to smallest edit 51 | 52 | classifier_fns = [] 53 | 54 | if len(source_ast.children) == 0 and len(target_ast.children) == 0: 55 | classifier_fns.append(single_token_edit) 56 | 57 | if source_ast.parent.type == "call" and target_ast.parent.type == "call": 58 | source_name = _call_name(source_ast.parent) 59 | target_name = _call_name(target_ast.parent) 60 | 61 | if source_name == target_name: 62 | classifier_fns.append(same_function_mod) 63 | 64 | if (_query_path(source_ast, "if_statement", "condition") 65 | or _query_path(source_ast, "elif_clause", "condition") 66 | or _query_path(source_ast, "while_statement", "condition")): 67 | classifier_fns.append(change_if_statement) 68 | 69 | if source_ast.type in ["tuple", "list", "dictionary", "set"]: 70 | classifier_fns.append(change_iterable) 71 | 72 | if target_ast.type == "call" or target_ast.parent.type == "call": 73 | classifier_fns.append(add_function) 74 | 75 | if target_ast.type == "attribute": 76 | classifier_fns.append(add_attribute_access) 77 | 78 | if "operator" in source_ast.type or "operator" in target_ast.type: 79 | if is_unary_operator_change(source_ast, target_ast): 80 | return SStubPattern.CHANGE_UNARY_OPERATOR 81 | 82 | # Now run all classifier functions 83 | for classifier_fn in classifier_fns: 84 | result = classifier_fn(source_ast, target_ast) 85 | 86 | if result != SStubPattern.SINGLE_STMT: 87 | return result 88 | 89 | if is_binary_operand(source_ast, target_ast): 90 | return SStubPattern.CHANGE_BINARY_OPERAND 91 | 92 | return SStubPattern.SINGLE_STMT 93 | 94 | 95 | # Utils ------------------------------------------------------------------------- 96 | 97 | def _call_name(ast_node): 98 | function_node = ast_node.children[0] 99 | 100 | right_most = function_node 101 | while len(right_most.children) > 0: 102 | right_most = right_most.children[-1] 103 | 104 | return right_most.text 105 | 106 | 107 | 108 | def pisomorph(A, B): 109 | if A.isomorph(B): return True 110 | 111 | if A.type == "parenthesized_expression": 112 | return pisomorph(A.children[1], B) 113 | 114 | if B.type == "parenthesized_expression": 115 | return pisomorph(A, B.children[1]) 116 | 117 | return False 118 | 119 | 120 | 121 | # Binary operand ---------------------------------------------------------------- 122 | 123 | 124 | def is_binary_operand(source_ast, target_ast): 125 | 126 | for bin_op_type in ["binary_operator", "comparison_operator", "boolean_operator"]: 127 | for direction in ["left", "right"]: 128 | if (_query_path(source_ast, bin_op_type, direction, depth = 1)): 129 | return True 130 | 131 | return False 132 | 133 | 134 | 135 | # Single token edits -------------------------------- 136 | 137 | def _query_path(ast_node, type_query, edge_query = "*", depth = 1e9): 138 | 139 | last = None 140 | current = ast_node 141 | while current is not None: 142 | 143 | if current.type == type_query: 144 | 145 | if edge_query == "*": 146 | return True 147 | elif last is not None: 148 | if hasattr(current, "backend"): 149 | edge_child = current.backend.child_by_field_name(edge_query) 150 | return edge_child == last.backend 151 | 152 | last = current 153 | current = current.parent 154 | depth -= 1 155 | if depth < 0: break 156 | 157 | return False 158 | 159 | 160 | 161 | def _get_parent(ast_node, type_query, edge_query = "*", depth = 1e9): 162 | 163 | last = None 164 | current = ast_node 165 | while current is not None: 166 | 167 | if current.type == type_query: 168 | 169 | if edge_query == "*": 170 | return current 171 | elif last is not None: 172 | if hasattr(current, "backend"): 173 | edge_child = current.backend.child_by_field_name(edge_query) 174 | if edge_child == last.backend: 175 | return current 176 | 177 | last = current 178 | current = current.parent 179 | depth -= 1 180 | if depth < 0: break 181 | 182 | return None 183 | 184 | 185 | 186 | def wrong_function_name(source_ast, target_ast): 187 | if not source_ast.type == "identifier": return False 188 | if not target_ast.type == "identifier": return False 189 | 190 | func_call = _get_parent(source_ast, "call", "function") 191 | if func_call is None: return False 192 | 193 | right_most = func_call.backend.child_by_field_name("function") 194 | while right_most is not None and right_most != source_ast.backend: 195 | if len(right_most.children) > 0: 196 | right_most = right_most.children[-1] 197 | else: 198 | right_most = None 199 | 200 | return right_most is not None 201 | 202 | 203 | def change_numeric_literal(source_ast, target_ast): 204 | return source_ast.type in ["integer", "float"] and target_ast.type in ["integer", "float"] 205 | 206 | 207 | def change_string_literal(source_ast, target_ast): 208 | return source_ast.type == "string" and target_ast.type == "string" 209 | 210 | 211 | def change_boolean_literal(source_ast, target_ast): 212 | return source_ast.type in ["false", "true"] and target_ast.type in ["false", "true"] 213 | 214 | 215 | def change_attribute_used(source_ast, target_ast): 216 | if source_ast.type == "identifier": 217 | return _query_path(source_ast, "attribute", "attribute", depth = 1) 218 | return False 219 | 220 | 221 | def change_identifier_used(source_ast, target_ast): 222 | 223 | # Following ManySStuBs we ignore the following Method declaration, Class Declaration, Variable Declaration 224 | if any(x in source_ast.parent.type for x in ["definition", "declaration"]): 225 | return False 226 | 227 | return source_ast.type == "identifier" and target_ast.type == "identifier" 228 | 229 | 230 | def change_binary_operator(source_ast, target_ast): 231 | 232 | if source_ast.parent.type in ["binary_operator", "boolean_operator", "comparison_operator"]: 233 | bin_op = source_ast.parent 234 | return bin_op.children[1] == source_ast 235 | 236 | return False 237 | 238 | 239 | def _to_plain_constant(text): 240 | 241 | if "\'" in text: text = text[1:-1] 242 | if "\"" in text: text = text[1:-1] 243 | 244 | try: 245 | return float(text) 246 | except: 247 | try: 248 | return float(int(text)) 249 | except: 250 | return text 251 | 252 | 253 | def change_constant_type(source_ast, target_ast): 254 | 255 | if source_ast.type == "identifier": return False 256 | if target_ast.type == "identifier": return False 257 | 258 | if source_ast.type == target_ast.type: return False 259 | 260 | source_text = _to_plain_constant(source_ast.text) 261 | target_text = _to_plain_constant(target_ast.text) 262 | 263 | return source_text == target_text 264 | 265 | 266 | def change_keyword_argument_used(source_ast, target_ast): 267 | if source_ast.type == "identifier": 268 | return _query_path(source_ast, "keyword_argument", "name", depth = 1) 269 | return False 270 | 271 | 272 | def same_function_wrong_caller(source_ast, target_ast): 273 | if not source_ast.type == "identifier": return False 274 | 275 | if not _query_path(source_ast, "call", "function", depth = 2): return False 276 | 277 | return _query_path(source_ast, "attribute", "object", depth = 1) 278 | 279 | 280 | 281 | single_token_edits = { 282 | SStubPattern.WRONG_FUNCTION_NAME: wrong_function_name, 283 | SStubPattern.CHANGE_CONSTANT_TYPE: change_constant_type, 284 | SStubPattern.CHANGE_NUMERIC_LITERAL: change_numeric_literal, 285 | SStubPattern.CHANGE_BOOLEAN_LITERAL: change_boolean_literal, 286 | SStubPattern.CHANGE_ATTRIBUTE_USED: change_attribute_used, 287 | SStubPattern.CHANGE_KEYWORD_ARGUMENT_USED : change_keyword_argument_used, 288 | SStubPattern.SAME_FUNCTION_WRONG_CALLER: same_function_wrong_caller, 289 | SStubPattern.CHANGE_BINARY_OPERATOR: change_binary_operator, 290 | SStubPattern.CHANGE_BINARY_OPERAND: is_binary_operand, 291 | SStubPattern.CHANGE_IDENTIFIER_USED: change_identifier_used, 292 | SStubPattern.CHANGE_STRING_LITERAL: change_string_literal, 293 | } 294 | 295 | 296 | def single_token_edit(source_ast, target_ast): 297 | 298 | for key, test_fn in single_token_edits.items(): 299 | if test_fn(source_ast, target_ast): 300 | return key 301 | 302 | return SStubPattern.SINGLE_TOKEN 303 | 304 | 305 | # Same function -------------------------------- 306 | 307 | 308 | def same_function_more_args(source_ast, target_ast): 309 | 310 | if len(source_ast.children) >= len(target_ast.children): 311 | return False 312 | 313 | arguments = source_ast.children 314 | for arg in arguments: 315 | if not any(pisomorph(t, arg) for t in target_ast.children): 316 | return False 317 | 318 | return True 319 | 320 | 321 | def same_function_less_args(source_ast, target_ast): 322 | 323 | if len(source_ast.children) <= len(target_ast.children): 324 | return False 325 | 326 | arguments = target_ast.children 327 | for arg in arguments: 328 | if not any(pisomorph(t, arg) for t in source_ast.children): 329 | return False 330 | 331 | return True 332 | 333 | 334 | def same_function_swap_args(source_ast, target_ast): 335 | 336 | if len(source_ast.children) != len(target_ast.children): 337 | return False 338 | 339 | src_arguments = source_ast.children 340 | target_arguments = target_ast.children 341 | 342 | diff_args = [i for i, src_arg in enumerate(src_arguments) if not pisomorph(src_arg, target_arguments[i])] 343 | 344 | if len(diff_args) != 2: return False 345 | 346 | swap_0, swap_1 = diff_args 347 | return (pisomorph(src_arguments[swap_0], target_arguments[swap_1]) 348 | and pisomorph(src_arguments[swap_1], target_arguments[swap_0])) 349 | 350 | 351 | same_function_edits = { 352 | SStubPattern.SAME_FUNCTION_MORE_ARGS: same_function_more_args, 353 | SStubPattern.SAME_FUNCTION_LESS_ARGS: same_function_less_args, 354 | SStubPattern.SAME_FUNCTION_SWAP_ARGS: same_function_swap_args, 355 | } 356 | 357 | 358 | def same_function_mod(source_ast, target_ast): 359 | 360 | if source_ast.type != "argument_list" or target_ast.type != "argument_list": 361 | return SStubPattern.SINGLE_STMT 362 | 363 | for key, test_fn in same_function_edits.items(): 364 | if test_fn(source_ast, target_ast): 365 | return key 366 | 367 | return SStubPattern.SINGLE_STMT 368 | 369 | 370 | 371 | # If statement ---------------------------------------------------------------- 372 | 373 | 374 | def more_specific_if(source_ast, target_ast): 375 | 376 | if not target_ast.type == "boolean_operator": return False 377 | if target_ast.children[1].type != "and" : return False 378 | 379 | return any(pisomorph(c, source_ast) for c in target_ast.children) 380 | 381 | 382 | def less_specific_if(source_ast, target_ast): 383 | if not target_ast.type == "boolean_operator": return False 384 | if target_ast.children[1].type != "or" : return False 385 | 386 | return any(pisomorph(c, source_ast) for c in target_ast.children) 387 | 388 | 389 | def change_if_statement(source_ast, target_ast): 390 | 391 | if more_specific_if(source_ast, target_ast): 392 | return SStubPattern.MORE_SPECIFIC_IF 393 | 394 | if less_specific_if(source_ast, target_ast): 395 | return SStubPattern.LESS_SPECIFIC_IF 396 | 397 | return SStubPattern.SINGLE_STMT 398 | 399 | # Change iterable ---------------------------------------------------------------- 400 | 401 | def add_elements_to_iterable(source_ast, target_ast): 402 | 403 | if len(source_ast.children) >= len(target_ast.children): 404 | return False 405 | 406 | for c in source_ast.children: 407 | if not any(pisomorph(t, c) for t in target_ast.children): 408 | return False 409 | 410 | return True 411 | 412 | 413 | def change_iterable(source_ast, target_ast): 414 | 415 | if add_elements_to_iterable(source_ast, target_ast): 416 | return SStubPattern.ADD_ELEMENTS_TO_ITERABLE 417 | 418 | return SStubPattern.SINGLE_STMT 419 | 420 | 421 | # ADD CALL AROUND STATEMENT ---------------------------------------------------------------- 422 | 423 | def add_function_around_expression(source_ast, target_ast): 424 | if len(target_ast.children) == 0: return False 425 | 426 | argument_list = target_ast.children[-1] 427 | 428 | if argument_list.type != "argument_list": 429 | return False 430 | 431 | # It seems that adding arguments together with a function seems to be okay (see PySStuBs dataset) 432 | #if len(argument_list.children) != 3: return False 433 | 434 | for arg in argument_list.children: 435 | if pisomorph(arg, source_ast): 436 | return True 437 | 438 | return False 439 | 440 | 441 | def add_function(source_ast, target_ast): 442 | 443 | if add_function_around_expression(source_ast, target_ast): 444 | return SStubPattern.ADD_FUNCTION_AROUND_EXPRESSION 445 | 446 | if add_method_call(source_ast, target_ast): 447 | return SStubPattern.ADD_METHOD_CALL 448 | 449 | return SStubPattern.SINGLE_STMT 450 | 451 | 452 | # ADD METHOD ---------------------------------------------------------------- 453 | 454 | def add_method_call(source_ast, target_ast): 455 | if len(target_ast.children) == 0: return False 456 | 457 | attribute = target_ast.children[0] 458 | 459 | if attribute.type not in ["attribute", "call"]: return False 460 | 461 | return pisomorph(attribute.children[0], source_ast) 462 | 463 | # ADD attribute ------------------------------------------------------------- 464 | 465 | 466 | def add_attribute_access(source_ast, target_ast): 467 | if pisomorph(target_ast.children[0], source_ast): 468 | return SStubPattern.ADD_ATTRIBUTE_ACCESS 469 | 470 | return SStubPattern.SINGLE_STMT 471 | 472 | 473 | # Change unary operator ---------------------------------------------------- 474 | 475 | def is_unary_operator(node): 476 | if "operator" not in node.type: return False 477 | return len(node.children) == 2 478 | 479 | 480 | def is_unary_operator_change(source_ast, target_ast): 481 | 482 | if is_unary_operator(source_ast): 483 | for source_child in source_ast.children: 484 | if pisomorph(source_child, target_ast): return True 485 | 486 | if is_unary_operator(target_ast): 487 | for target_child in target_ast.children: 488 | if pisomorph(target_child, source_ast): return True 489 | 490 | return False 491 | -------------------------------------------------------------------------------- /code_diff/utils.py: -------------------------------------------------------------------------------- 1 | 2 | def cached_property(fnc): 3 | name = fnc.__name__ 4 | 5 | def load_from_cache(self): 6 | if not hasattr(self, "_cache"): self._cache = {} 7 | 8 | if name not in self._cache: 9 | self._cache[name] = fnc(self) 10 | 11 | return self._cache[name] 12 | 13 | return property(load_from_cache) -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "code_diff" 7 | version = "v0.1.3" 8 | description = "Fast AST based code differencing in Python" 9 | readme = "README.md" 10 | requires-python = ">= 3.8" 11 | license = { file = "LICENSE.txt" } 12 | keywords = ["code", "differencing", "AST", "CST", "program", "language processing"] 13 | 14 | authors = [{name = "Cedric Richter", email = "cedricr.upb@gmail.com"}] 15 | maintainers = [{name = "Cedric Richter", email = "cedricr.upb@gmail.com"}] 16 | 17 | classifiers = [ 18 | "Development Status :: 3 - Alpha", 19 | "Intended Audience :: Developers", 20 | "Topic :: Software Development :: Build Tools", 21 | "License :: OSI Approved :: MIT License", 22 | "Programming Language :: Python :: 3", 23 | "Programming Language :: Python :: 3.6", 24 | "Programming Language :: Python :: 3.7", 25 | "Programming Language :: Python :: 3.8", 26 | "Programming Language :: Python :: 3.9", 27 | "Programming Language :: Python :: 3.10", 28 | "Programming Language :: Python :: 3.11", 29 | "Programming Language :: Python :: 3.12", 30 | "Programming Language :: Python :: 3.13", 31 | "Programming Language :: Python :: 3 :: Only", 32 | ] 33 | 34 | dependencies = ["code_tokenize", "apted"] 35 | 36 | [project.urls] 37 | "Homepage" = "https://github.com/cedricrupb/code_diff" 38 | "Bug Reports" = "https://github.com/cedricrupb/code_diff/issues" 39 | "Source" = "https://github.com/cedricrupb/code_diff" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | code-tokenize >= 0.1.0 2 | apted >= 1.0.3 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | ong_description_content_type = text/markdown -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open("README.md", "r") as f: 4 | long_description = f.read() 5 | 6 | setup( 7 | name = 'code_diff', 8 | packages = ['code_diff', 'code_diff.gumtree'], 9 | version = '0.1.3', 10 | license='MIT', 11 | description = 'Fast AST based code differencing in Python', 12 | long_description = long_description, 13 | long_description_content_type="text/markdown", 14 | author = 'Cedric Richter', 15 | author_email = 'cedricr.upb@gmail.com', 16 | url = 'https://github.com/cedricrupb/code_diff', 17 | download_url = 'https://github.com/cedricrupb/code_diff/archive/refs/tags/v0.1.3.tar.gz', 18 | keywords = ['code', 'differencing', 'AST', 'program', 'language processing'], 19 | install_requires=[ 20 | 'code-tokenize>=0.2.1', 21 | 'apted' 22 | ], 23 | classifiers=[ 24 | 'Development Status :: 3 - Alpha', 25 | 'Intended Audience :: Developers', 26 | 'Topic :: Software Development :: Build Tools', 27 | 'License :: OSI Approved :: MIT License', 28 | 'Programming Language :: Python :: 3', 29 | 'Programming Language :: Python :: 3.6', 30 | 'Programming Language :: Python :: 3.7', 31 | 'Programming Language :: Python :: 3.8', 32 | 'Programming Language :: Python :: 3.9', 33 | 'Programming Language :: Python :: 3.10', 34 | 'Programming Language :: Python :: 3.11', 35 | 'Programming Language :: Python :: 3.12', 36 | 'Programming Language :: Python :: 3.13', 37 | ], 38 | ) -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cedricrupb/code_diff/e8c6a8fdc953d8e6db414d31c4ca90bd5ceaf2b4/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_sstubs.py: -------------------------------------------------------------------------------- 1 | import code_diff as cd 2 | 3 | from code_diff.diff_utils import parse_hunks 4 | from code_diff import SStubPattern 5 | 6 | # Util -------------------------------------------------------------- 7 | 8 | def compute_diff_sstub(diff): 9 | hunks = parse_hunks(diff) 10 | hunk = hunks[0] 11 | diff = cd.difference(hunk.before, hunk.after, lang = "python") 12 | return diff.sstub_pattern() 13 | 14 | 15 | 16 | # Wrong Function name ---------------------------------------------- 17 | 18 | def test_wrong_function_name_1(): 19 | 20 | test = """ 21 | @@ -0,0 +0,0 @@ test 22 | 23 | - test() 24 | + test2() 25 | 26 | """ 27 | 28 | assert compute_diff_sstub(test) == SStubPattern.WRONG_FUNCTION_NAME 29 | 30 | 31 | def test_wrong_function_name_2(): 32 | 33 | test = """ 34 | @@ -0,0 +0,0 @@ test 35 | 36 | - test.call() 37 | + test.call_async() 38 | 39 | """ 40 | 41 | assert compute_diff_sstub(test) == SStubPattern.WRONG_FUNCTION_NAME 42 | 43 | 44 | def test_wrong_function_name_3(): 45 | 46 | test = """ 47 | @@ -0,0 +0,0 @@ test 48 | 49 | - test.call_async('Hello World', x, x / 2) 50 | + test.call('Hello World', x, x / 2) 51 | 52 | """ 53 | 54 | assert compute_diff_sstub(test) == SStubPattern.WRONG_FUNCTION_NAME 55 | 56 | 57 | def test_wrong_function_name_4(): 58 | 59 | test = """ 60 | @@ -0,0 +0,0 @@ test 61 | 62 | - test_call.call('Hello World', x, x / 2) 63 | + test.call('Hello World', x, x / 2) 64 | 65 | """ 66 | 67 | assert compute_diff_sstub(test) != SStubPattern.WRONG_FUNCTION_NAME 68 | 69 | 70 | def test_wrong_function_name_5(): 71 | 72 | test = """ 73 | @@ -0,0 +0,0 @@ test 74 | 75 | - test.x.call('Hello World', x, x / 2) 76 | + test.y.call('Hello World', x, x / 2) 77 | 78 | """ 79 | 80 | assert compute_diff_sstub(test) != SStubPattern.WRONG_FUNCTION_NAME 81 | 82 | 83 | 84 | # Same Function more args ------------------------------------------- 85 | 86 | def test_same_function_more_args_1(): 87 | 88 | test = """ 89 | @@ -0,0 +0,0 @@ test 90 | 91 | - test() 92 | + test(x) 93 | 94 | """ 95 | 96 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_MORE_ARGS 97 | 98 | 99 | def test_same_function_more_args_2(): 100 | 101 | test = """ 102 | @@ -0,0 +0,0 @@ test 103 | 104 | - test(x) 105 | + test(x, y) 106 | 107 | """ 108 | 109 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_MORE_ARGS 110 | 111 | 112 | def test_same_function_more_args_3(): 113 | 114 | test = """ 115 | @@ -0,0 +0,0 @@ test 116 | 117 | - test(x, y) 118 | + test(x, y + 1) 119 | 120 | """ 121 | 122 | assert compute_diff_sstub(test) != SStubPattern.SAME_FUNCTION_MORE_ARGS 123 | 124 | 125 | def test_same_function_more_args_4(): 126 | 127 | test = """ 128 | @@ -0,0 +0,0 @@ test 129 | 130 | - test(x) 131 | + test(x, y + 1) 132 | 133 | """ 134 | 135 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_MORE_ARGS 136 | 137 | 138 | def test_same_function_more_args_5(): 139 | 140 | test = """ 141 | @@ -0,0 +0,0 @@ test 142 | 143 | - test(x + 1) 144 | + test(x, y + 1) 145 | 146 | """ 147 | 148 | assert compute_diff_sstub(test) != SStubPattern.SAME_FUNCTION_MORE_ARGS 149 | 150 | 151 | def test_same_function_more_args_6(): 152 | 153 | test = """ 154 | @@ -0,0 +0,0 @@ test 155 | 156 | - test.call(x) 157 | + test.call(x, y) 158 | 159 | """ 160 | 161 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_MORE_ARGS 162 | 163 | 164 | def test_same_function_more_args_7(): 165 | 166 | test = """ 167 | @@ -0,0 +0,0 @@ test 168 | 169 | - test.call(x) 170 | + test.call(x, y, z, d, a, call()) 171 | 172 | """ 173 | 174 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_MORE_ARGS 175 | 176 | 177 | def test_same_function_more_args_8(): 178 | 179 | test = """ 180 | @@ -0,0 +0,0 @@ test 181 | 182 | - test.call1(x) 183 | + test.call(x, y, z, d, a, call()) 184 | 185 | """ 186 | 187 | assert compute_diff_sstub(test) != SStubPattern.SAME_FUNCTION_MORE_ARGS 188 | 189 | # Same Function less args ------------------------------------------- 190 | 191 | def test_same_function_less_args_1(): 192 | 193 | test = """ 194 | @@ -0,0 +0,0 @@ test 195 | 196 | - test(x) 197 | + test() 198 | 199 | """ 200 | 201 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_LESS_ARGS 202 | 203 | 204 | def test_same_function_less_args_2(): 205 | 206 | test = """ 207 | @@ -0,0 +0,0 @@ test 208 | 209 | - test(x, y) 210 | + test(x) 211 | 212 | """ 213 | 214 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_LESS_ARGS 215 | 216 | 217 | 218 | def test_same_function_less_args_3(): 219 | 220 | test = """ 221 | @@ -0,0 +0,0 @@ test 222 | 223 | - test(x, y + 1) 224 | + test(x, y) 225 | 226 | """ 227 | 228 | assert compute_diff_sstub(test) != SStubPattern.SAME_FUNCTION_LESS_ARGS 229 | 230 | 231 | def test_same_function_less_args_4(): 232 | 233 | test = """ 234 | @@ -0,0 +0,0 @@ test 235 | 236 | - test(x, y + 1) 237 | + test(x) 238 | 239 | """ 240 | 241 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_LESS_ARGS 242 | 243 | 244 | def test_same_function_less_args_5(): 245 | 246 | test = """ 247 | @@ -0,0 +0,0 @@ test 248 | 249 | - test(x, y + 1) 250 | + test(x + 1) 251 | 252 | """ 253 | 254 | assert compute_diff_sstub(test) != SStubPattern.SAME_FUNCTION_LESS_ARGS 255 | 256 | 257 | def test_same_function_less_args_6(): 258 | 259 | test = """ 260 | @@ -0,0 +0,0 @@ test 261 | 262 | - test.call(x, y) 263 | + test.call(x) 264 | 265 | """ 266 | 267 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_LESS_ARGS 268 | 269 | 270 | def test_same_function_less_args_7(): 271 | 272 | test = """ 273 | @@ -0,0 +0,0 @@ test 274 | 275 | - test.call(x, y, z, d, a, call()) 276 | + test.call(x) 277 | 278 | """ 279 | 280 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_LESS_ARGS 281 | 282 | # Same Function wrong caller ------------------------------------------- 283 | 284 | def test_same_function_wrong_caller_1(): 285 | 286 | test = """ 287 | @@ -0,0 +0,0 @@ test 288 | 289 | - test.call() 290 | + test1.call() 291 | 292 | """ 293 | 294 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_WRONG_CALLER 295 | 296 | 297 | def test_same_function_wrong_caller_2(): 298 | 299 | test = """ 300 | @@ -0,0 +0,0 @@ test 301 | 302 | - test.x.call() 303 | + test.y.call() 304 | 305 | """ 306 | 307 | assert compute_diff_sstub(test) != SStubPattern.SAME_FUNCTION_WRONG_CALLER 308 | 309 | 310 | def test_same_function_wrong_caller_3(): 311 | 312 | test = """ 313 | @@ -0,0 +0,0 @@ test 314 | 315 | - call() 316 | + test.call() 317 | 318 | """ 319 | 320 | assert compute_diff_sstub(test) != SStubPattern.SAME_FUNCTION_WRONG_CALLER 321 | 322 | 323 | # Same Function swap args ------------------------------------------- 324 | 325 | def test_same_function_swap_args_1(): 326 | 327 | test = """ 328 | @@ -0,0 +0,0 @@ test 329 | 330 | - test.call(x, y) 331 | + test.call(y, x) 332 | 333 | """ 334 | 335 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_SWAP_ARGS 336 | 337 | 338 | def test_same_function_swap_args_2(): 339 | 340 | test = """ 341 | @@ -0,0 +0,0 @@ test 342 | 343 | - test.call1(x, y) 344 | + test.call(y, x) 345 | 346 | """ 347 | 348 | assert compute_diff_sstub(test) != SStubPattern.SAME_FUNCTION_SWAP_ARGS 349 | 350 | 351 | def test_same_function_swap_args_3(): 352 | 353 | test = """ 354 | @@ -0,0 +0,0 @@ test 355 | 356 | - test.call(x, y, z) 357 | + test.call(y, x, z) 358 | 359 | """ 360 | 361 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_SWAP_ARGS 362 | 363 | 364 | def bin_swaps(x): 365 | for i in range(len(x) - 1): 366 | for j in range(i + 1, len(x)): 367 | result = list(x) 368 | result[i], result[j] = result[j], result[i] 369 | yield result 370 | 371 | 372 | def test_same_function_swap_args_auto(): 373 | 374 | args = ["a", "b", "c", "d + 1", "0 if a != 0 else 1"] 375 | 376 | for l in range(2, len(args)): 377 | perm = tuple(args[:l]) 378 | 379 | for p in bin_swaps(perm): 380 | 381 | test = """ 382 | @@ -0,0 +0,0 @@ test 383 | 384 | - test.call(%s) 385 | + test.call(%s) 386 | 387 | """ % (", ".join(perm), ", ".join(p)) 388 | 389 | assert compute_diff_sstub(test) == SStubPattern.SAME_FUNCTION_SWAP_ARGS 390 | 391 | 392 | # Add function around expression ------------------------------------------- 393 | 394 | def test_add_function_around_expression_1(): 395 | 396 | test = """ 397 | @@ -0,0 +0,0 @@ test 398 | 399 | - result = x 400 | + result = int(x) 401 | 402 | """ 403 | 404 | assert compute_diff_sstub(test) == SStubPattern.ADD_FUNCTION_AROUND_EXPRESSION 405 | 406 | 407 | def test_add_function_around_expression_2(): 408 | 409 | test = """ 410 | @@ -0,0 +0,0 @@ test 411 | 412 | - result = x + 1 413 | + result = int(x) + 1 414 | 415 | """ 416 | 417 | assert compute_diff_sstub(test) == SStubPattern.ADD_FUNCTION_AROUND_EXPRESSION 418 | 419 | 420 | def test_add_function_around_expression_3(): 421 | 422 | test = """ 423 | @@ -0,0 +0,0 @@ test 424 | 425 | - result = x + 1 426 | + result = int(x + 1) 427 | 428 | """ 429 | 430 | assert compute_diff_sstub(test) == SStubPattern.ADD_FUNCTION_AROUND_EXPRESSION 431 | 432 | # Add method call -------------------------------------------------------- 433 | 434 | 435 | def test_add_method_call_1(): 436 | 437 | test = """ 438 | @@ -0,0 +0,0 @@ test 439 | 440 | - result = x 441 | + result = x.get() 442 | 443 | """ 444 | 445 | assert compute_diff_sstub(test) == SStubPattern.ADD_METHOD_CALL 446 | 447 | 448 | def test_add_method_call_2(): 449 | 450 | test = """ 451 | @@ -0,0 +0,0 @@ test 452 | 453 | - result = x.get() 454 | + result = x.get().return() 455 | 456 | """ 457 | 458 | assert compute_diff_sstub(test) == SStubPattern.ADD_METHOD_CALL 459 | 460 | 461 | def test_add_method_call_3(): 462 | 463 | test = """ 464 | @@ -0,0 +0,0 @@ test 465 | 466 | - result = x.y 467 | + result = x.y.get() 468 | 469 | """ 470 | 471 | assert compute_diff_sstub(test) == SStubPattern.ADD_METHOD_CALL 472 | 473 | 474 | def test_add_method_call_4(): 475 | 476 | test = """ 477 | @@ -0,0 +0,0 @@ test 478 | 479 | - result = x.get() 480 | + result = x.return().get() 481 | 482 | """ 483 | 484 | assert compute_diff_sstub(test) == SStubPattern.ADD_METHOD_CALL 485 | 486 | 487 | def test_add_method_call_5(): 488 | 489 | test = """ 490 | @@ -0,0 +0,0 @@ test 491 | 492 | - result = x.get() 493 | + result = x.return.get() 494 | 495 | """ 496 | 497 | assert compute_diff_sstub(test) != SStubPattern.ADD_METHOD_CALL 498 | 499 | 500 | 501 | def test_add_method_call_6(): 502 | 503 | test = """ 504 | @@ -0,0 +0,0 @@ test 505 | 506 | - result = x.return().get() 507 | + result = x.get() 508 | 509 | """ 510 | 511 | assert compute_diff_sstub(test) != SStubPattern.ADD_METHOD_CALL 512 | 513 | # Change identifier -------------------------------------------------------- 514 | 515 | def test_change_identifier_used_1(): 516 | 517 | test = """ 518 | @@ -0,0 +0,0 @@ test 519 | 520 | - result = x 521 | + result = y 522 | 523 | """ 524 | 525 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_IDENTIFIER_USED 526 | 527 | 528 | def test_change_identifier_used_2(): 529 | 530 | test = """ 531 | @@ -0,0 +0,0 @@ test 532 | 533 | - result = test(path = path) 534 | + result = test(path = path2) 535 | 536 | """ 537 | 538 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_IDENTIFIER_USED 539 | 540 | 541 | def test_change_identifier_used_2(): 542 | 543 | test = """ 544 | @@ -0,0 +0,0 @@ test 545 | 546 | - result = test(path = path) 547 | + result = test(path2 = path) 548 | 549 | """ 550 | 551 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_IDENTIFIER_USED 552 | 553 | 554 | def test_change_identifier_used_3(): 555 | 556 | test = """ 557 | @@ -0,0 +0,0 @@ test 558 | 559 | - result = test(path = path) 560 | + result = test2(path = path) 561 | 562 | """ 563 | 564 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_IDENTIFIER_USED 565 | 566 | 567 | def test_change_identifier_used_4(): 568 | 569 | test = """ 570 | @@ -0,0 +0,0 @@ test 571 | 572 | - result = test.x(a, b, c) 573 | + result = test1.x(a, b, c) 574 | 575 | """ 576 | 577 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_IDENTIFIER_USED 578 | 579 | 580 | def test_change_identifier_used_5(): 581 | 582 | test = """ 583 | @@ -0,0 +0,0 @@ test 584 | 585 | - result = test.x(a, b, c) 586 | + result1 = test.x(a, b, c) 587 | 588 | """ 589 | 590 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_IDENTIFIER_USED 591 | 592 | 593 | # Change numeric literal ---------------------------------------------------- 594 | 595 | def test_change_numeric_literal_1(): 596 | 597 | test = """ 598 | @@ -0,0 +0,0 @@ test 599 | 600 | - result = 0 601 | + result = 1 602 | 603 | """ 604 | 605 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_NUMERIC_LITERAL 606 | 607 | 608 | def test_change_numeric_literal_2(): 609 | 610 | test = """ 611 | @@ -0,0 +0,0 @@ test 612 | 613 | - result = x + 1 614 | + result = x + 5 615 | 616 | """ 617 | 618 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_NUMERIC_LITERAL 619 | 620 | 621 | def test_change_numeric_literal_3(): 622 | 623 | test = """ 624 | @@ -0,0 +0,0 @@ test 625 | 626 | - result = x + 1 627 | + result = x + 5.0 628 | 629 | """ 630 | 631 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_NUMERIC_LITERAL 632 | 633 | 634 | def test_change_numeric_literal_4(): 635 | 636 | test = """ 637 | @@ -0,0 +0,0 @@ test 638 | 639 | - result = x + 1 640 | + result = x + 1.0 641 | 642 | """ 643 | 644 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_NUMERIC_LITERAL 645 | 646 | 647 | def test_change_numeric_literal_5(): 648 | 649 | test = """ 650 | @@ -0,0 +0,0 @@ test 651 | 652 | - result = x + 1 653 | + result = x + a 654 | 655 | """ 656 | 657 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_NUMERIC_LITERAL 658 | 659 | 660 | # Change boolean literal ---------------------------------------------------- 661 | 662 | def test_change_boolean_literal_1(): 663 | 664 | test = """ 665 | @@ -0,0 +0,0 @@ test 666 | 667 | - if True: 668 | + if False: 669 | pass 670 | 671 | """ 672 | 673 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BOOLEAN_LITERAL 674 | 675 | 676 | def test_change_boolean_literal_2(): 677 | 678 | test = """ 679 | @@ -0,0 +0,0 @@ test 680 | 681 | - if True and x < 0: 682 | + if False and x < 0: 683 | pass 684 | 685 | """ 686 | 687 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BOOLEAN_LITERAL 688 | 689 | 690 | def test_change_boolean_literal_3(): 691 | 692 | test = """ 693 | @@ -0,0 +0,0 @@ test 694 | 695 | - if False and x < 0: 696 | + if True and x < 0: 697 | pass 698 | 699 | """ 700 | 701 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BOOLEAN_LITERAL 702 | 703 | 704 | def test_change_boolean_literal_4(): 705 | 706 | test = """ 707 | @@ -0,0 +0,0 @@ test 708 | 709 | - if False and x < 0: 710 | + if x / 2 == 0 and x < 0: 711 | pass 712 | 713 | """ 714 | 715 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_BOOLEAN_LITERAL 716 | 717 | # Change unary operator ---------------------------------------------------- 718 | 719 | def test_change_unary_operator_1(): 720 | 721 | test = """ 722 | @@ -0,0 +0,0 @@ test 723 | 724 | - if x: 725 | + if not x: 726 | pass 727 | 728 | """ 729 | 730 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_UNARY_OPERATOR 731 | 732 | 733 | def test_change_unary_operator_2(): 734 | 735 | test = """ 736 | @@ -0,0 +0,0 @@ test 737 | 738 | - result = x 739 | + result = -x 740 | pass 741 | 742 | """ 743 | 744 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_UNARY_OPERATOR 745 | 746 | 747 | def test_change_unary_operator_3(): 748 | 749 | test = """ 750 | @@ -0,0 +0,0 @@ test 751 | 752 | - result = x 753 | + result = +x 754 | pass 755 | 756 | """ 757 | 758 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_UNARY_OPERATOR 759 | 760 | 761 | def test_change_unary_operator_4(): 762 | 763 | test = """ 764 | @@ -0,0 +0,0 @@ test 765 | 766 | - if not x: 767 | + if x: 768 | pass 769 | 770 | """ 771 | 772 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_UNARY_OPERATOR 773 | 774 | 775 | def test_change_unary_operator_5(): 776 | 777 | test = """ 778 | @@ -0,0 +0,0 @@ test 779 | 780 | - result = -x 781 | + result = x 782 | pass 783 | 784 | """ 785 | 786 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_UNARY_OPERATOR 787 | 788 | 789 | def test_change_unary_operator_6(): 790 | 791 | test = """ 792 | @@ -0,0 +0,0 @@ test 793 | 794 | - result = +x 795 | + result = x 796 | pass 797 | 798 | """ 799 | 800 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_UNARY_OPERATOR 801 | 802 | 803 | def test_change_unary_operator_7(): 804 | 805 | test = """ 806 | @@ -0,0 +0,0 @@ test 807 | 808 | - if x and y: 809 | + if not x and y: 810 | pass 811 | 812 | """ 813 | 814 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_UNARY_OPERATOR 815 | 816 | 817 | def test_change_unary_operator_8(): 818 | 819 | test = """ 820 | @@ -0,0 +0,0 @@ test 821 | 822 | - if x and y: 823 | + if not (x and y): 824 | pass 825 | 826 | """ 827 | 828 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_UNARY_OPERATOR 829 | 830 | 831 | 832 | # Change binary operator ---------------------------------------------------- 833 | 834 | def test_change_binary_operator_1(): 835 | 836 | test = """ 837 | @@ -0,0 +0,0 @@ test 838 | 839 | - if x and y: 840 | + if x or y: 841 | pass 842 | 843 | """ 844 | 845 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERATOR 846 | 847 | 848 | def test_change_binary_operator_2(): 849 | 850 | test = """ 851 | @@ -0,0 +0,0 @@ test 852 | 853 | - if x or y: 854 | + if x and y: 855 | pass 856 | 857 | """ 858 | 859 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERATOR 860 | 861 | 862 | def test_change_binary_operator_3(): 863 | 864 | test = """ 865 | @@ -0,0 +0,0 @@ test 866 | 867 | - if x + y: 868 | + if x or y: 869 | pass 870 | 871 | """ 872 | 873 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERATOR 874 | 875 | 876 | def test_change_binary_operator_4(): 877 | 878 | test = """ 879 | @@ -0,0 +0,0 @@ test 880 | 881 | - if x and y: 882 | + if x - y: 883 | pass 884 | 885 | """ 886 | 887 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERATOR 888 | 889 | 890 | def test_change_binary_operator_5(): 891 | 892 | test = """ 893 | @@ -0,0 +0,0 @@ test 894 | 895 | - if x + y: 896 | + if x - y: 897 | pass 898 | 899 | """ 900 | 901 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERATOR 902 | 903 | 904 | def test_change_binary_operator_6(): 905 | 906 | test = """ 907 | @@ -0,0 +0,0 @@ test 908 | 909 | - if x + y: 910 | + if x % y: 911 | pass 912 | 913 | """ 914 | 915 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERATOR 916 | 917 | 918 | def test_change_binary_operator_7(): 919 | 920 | test = """ 921 | @@ -0,0 +0,0 @@ test 922 | 923 | - if x + y: 924 | + if x / y: 925 | pass 926 | 927 | """ 928 | 929 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERATOR 930 | 931 | 932 | def test_change_binary_operator_8(): 933 | 934 | test = """ 935 | @@ -0,0 +0,0 @@ test 936 | 937 | - if x + y < 5: 938 | + if x + y <= 5: 939 | pass 940 | 941 | """ 942 | 943 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERATOR 944 | 945 | 946 | def test_change_binary_operator_9(): 947 | 948 | test = """ 949 | @@ -0,0 +0,0 @@ test 950 | 951 | - if x + y < 5 and is_t: 952 | + if x + y <= 5 and is_t: 953 | pass 954 | 955 | """ 956 | 957 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERATOR 958 | 959 | 960 | # Change binary operand ----------------------------------------------------- 961 | 962 | 963 | def test_change_binary_operand_1(): 964 | 965 | test = """ 966 | @@ -0,0 +0,0 @@ test 967 | 968 | - if x and y: 969 | + if x and z: 970 | pass 971 | 972 | """ 973 | 974 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERAND 975 | 976 | 977 | def test_change_binary_operand_2(): 978 | 979 | test = """ 980 | @@ -0,0 +0,0 @@ test 981 | 982 | - if x and y: 983 | + if x and z <= 1: 984 | pass 985 | 986 | """ 987 | 988 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERAND 989 | 990 | 991 | def test_change_binary_operand_3(): 992 | 993 | test = """ 994 | @@ -0,0 +0,0 @@ test 995 | 996 | - if x and y: 997 | + if x > 8 and z <= 1: 998 | pass 999 | 1000 | """ 1001 | 1002 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_BINARY_OPERAND 1003 | 1004 | 1005 | def test_change_binary_operand_4(): 1006 | 1007 | test = """ 1008 | @@ -0,0 +0,0 @@ test 1009 | 1010 | - result = result + graphA / 2 1011 | + result = result + graphB / 2 1012 | 1013 | """ 1014 | 1015 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_BINARY_OPERAND 1016 | 1017 | 1018 | 1019 | # Change attribute used ---------------------------------------------------------------- 1020 | 1021 | 1022 | def test_change_attribute_used_1(): 1023 | 1024 | test = """ 1025 | @@ -0,0 +0,0 @@ test 1026 | 1027 | - result = person.name 1028 | + result = person.age 1029 | 1030 | """ 1031 | 1032 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_ATTRIBUTE_USED 1033 | 1034 | 1035 | def test_change_attribute_used_2(): 1036 | 1037 | test = """ 1038 | @@ -0,0 +0,0 @@ test 1039 | 1040 | - result = (x + y).name 1041 | + result = (x + y).age 1042 | 1043 | """ 1044 | 1045 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_ATTRIBUTE_USED 1046 | 1047 | 1048 | def test_change_attribute_used_3(): 1049 | 1050 | test = """ 1051 | @@ -0,0 +0,0 @@ test 1052 | 1053 | - result = person.name.name 1054 | + result = person.age.age 1055 | 1056 | """ 1057 | 1058 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_ATTRIBUTE_USED 1059 | 1060 | 1061 | def test_change_attribute_used_4(): 1062 | 1063 | test = """ 1064 | @@ -0,0 +0,0 @@ test 1065 | 1066 | - result = person.name.name 1067 | + result = person.age.name 1068 | 1069 | """ 1070 | 1071 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_ATTRIBUTE_USED 1072 | 1073 | 1074 | 1075 | # Change keyword argument used ---------------------------------------------------------- 1076 | 1077 | def test_change_keyword_argument_used_1(): 1078 | 1079 | test = """ 1080 | @@ -0,0 +0,0 @@ test 1081 | 1082 | - result = Person(name = 5) 1083 | + result = Person(age = 5) 1084 | 1085 | """ 1086 | 1087 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_KEYWORD_ARGUMENT_USED 1088 | 1089 | 1090 | def test_change_keyword_argument_used_2(): 1091 | 1092 | test = """ 1093 | @@ -0,0 +0,0 @@ test 1094 | 1095 | - result = Person(path = path) 1096 | + result = Person(paths = path) 1097 | 1098 | """ 1099 | 1100 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_KEYWORD_ARGUMENT_USED 1101 | 1102 | 1103 | def test_change_keyword_argument_used_3(): 1104 | 1105 | test = """ 1106 | @@ -0,0 +0,0 @@ test 1107 | 1108 | - result = Person(path = path) 1109 | + result = Person(path = paths) 1110 | 1111 | """ 1112 | 1113 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_KEYWORD_ARGUMENT_USED 1114 | 1115 | 1116 | def test_change_keyword_argument_used_4(): 1117 | 1118 | test = """ 1119 | @@ -0,0 +0,0 @@ test 1120 | 1121 | - result = Person(path = path) 1122 | + result = Person(path = path, path2 = path) 1123 | 1124 | """ 1125 | 1126 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_KEYWORD_ARGUMENT_USED 1127 | 1128 | 1129 | def test_change_keyword_argument_used_5(): 1130 | 1131 | test = """ 1132 | @@ -0,0 +0,0 @@ test 1133 | 1134 | - result = Person(path = path, path = path) 1135 | + result = Person(path = path, path2 = path) 1136 | 1137 | """ 1138 | 1139 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_KEYWORD_ARGUMENT_USED 1140 | 1141 | 1142 | def test_change_keyword_argument_used_6(): 1143 | 1144 | test = """ 1145 | @@ -0,0 +0,0 @@ test 1146 | 1147 | - result = Person(path2 = path, path = path) 1148 | + result = Person(path = path, path2 = path) 1149 | 1150 | """ 1151 | 1152 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_KEYWORD_ARGUMENT_USED 1153 | 1154 | # Change constant type used -------------------------------------------------------------- 1155 | 1156 | def test_change_constant_type_used_1(): 1157 | 1158 | test = """ 1159 | @@ -0,0 +0,0 @@ test 1160 | 1161 | - result = 3 1162 | + result = 3.0 1163 | 1164 | """ 1165 | 1166 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_CONSTANT_TYPE 1167 | 1168 | 1169 | def test_change_constant_type_used_2(): 1170 | 1171 | test = """ 1172 | @@ -0,0 +0,0 @@ test 1173 | 1174 | - result = 3 1175 | + result = 3.1 1176 | 1177 | """ 1178 | 1179 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_CONSTANT_TYPE 1180 | 1181 | 1182 | def test_change_constant_type_used_3(): 1183 | 1184 | test = """ 1185 | @@ -0,0 +0,0 @@ test 1186 | 1187 | - result = 3 1188 | + result = '3' 1189 | 1190 | """ 1191 | 1192 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_CONSTANT_TYPE 1193 | 1194 | 1195 | def test_change_constant_type_used_4(): 1196 | 1197 | test = """ 1198 | @@ -0,0 +0,0 @@ test 1199 | 1200 | - result = "3" 1201 | + result = '3' 1202 | 1203 | """ 1204 | 1205 | assert compute_diff_sstub(test) != SStubPattern.CHANGE_CONSTANT_TYPE 1206 | 1207 | 1208 | def test_change_constant_type_used_5(): 1209 | 1210 | test = """ 1211 | @@ -0,0 +0,0 @@ test 1212 | 1213 | - result = 3.0 1214 | + result = '3' 1215 | 1216 | """ 1217 | 1218 | assert compute_diff_sstub(test) == SStubPattern.CHANGE_CONSTANT_TYPE 1219 | 1220 | # Add elements to iterable ---------------------------------------------------------------- 1221 | 1222 | def test_add_elements_to_iterable_1(): 1223 | test = """ 1224 | @@ -0,0 +0,0 @@ test 1225 | 1226 | - result = () 1227 | + result = (1,) 1228 | 1229 | """ 1230 | 1231 | assert compute_diff_sstub(test) == SStubPattern.ADD_ELEMENTS_TO_ITERABLE 1232 | 1233 | 1234 | def test_add_elements_to_iterable_2(): 1235 | test = """ 1236 | @@ -0,0 +0,0 @@ test 1237 | 1238 | - result = (1,) 1239 | + result = (1,2,) 1240 | 1241 | """ 1242 | 1243 | assert compute_diff_sstub(test) == SStubPattern.ADD_ELEMENTS_TO_ITERABLE 1244 | 1245 | 1246 | def test_add_elements_to_iterable_3(): 1247 | test = """ 1248 | @@ -0,0 +0,0 @@ test 1249 | 1250 | - result = (1,) 1251 | + result = (1,2, x + 1) 1252 | 1253 | """ 1254 | 1255 | assert compute_diff_sstub(test) == SStubPattern.ADD_ELEMENTS_TO_ITERABLE 1256 | 1257 | def test_add_elements_to_iterable_4(): 1258 | test = """ 1259 | @@ -0,0 +0,0 @@ test 1260 | 1261 | - result = (1,) 1262 | + result = (1,2, x + 1, fn()) 1263 | 1264 | """ 1265 | 1266 | assert compute_diff_sstub(test) == SStubPattern.ADD_ELEMENTS_TO_ITERABLE 1267 | 1268 | 1269 | def test_add_elements_to_iterable_5(): 1270 | test = """ 1271 | @@ -0,0 +0,0 @@ test 1272 | 1273 | - result = (1,) 1274 | + result = [1,2, x + 1, fn()] 1275 | 1276 | """ 1277 | 1278 | assert compute_diff_sstub(test) != SStubPattern.ADD_ELEMENTS_TO_ITERABLE 1279 | 1280 | 1281 | def test_add_elements_to_iterable_6(): 1282 | test = """ 1283 | @@ -0,0 +0,0 @@ test 1284 | 1285 | - result = [1,2,] 1286 | + result = [1,2, x + 1, fn()] 1287 | 1288 | """ 1289 | 1290 | assert compute_diff_sstub(test) == SStubPattern.ADD_ELEMENTS_TO_ITERABLE 1291 | 1292 | 1293 | def test_add_elements_to_iterable_7(): 1294 | test = """ 1295 | @@ -0,0 +0,0 @@ test 1296 | 1297 | - result = [1,2,] 1298 | + result = [1, x + 1, fn(), 2] 1299 | 1300 | """ 1301 | 1302 | assert compute_diff_sstub(test) == SStubPattern.ADD_ELEMENTS_TO_ITERABLE 1303 | 1304 | 1305 | def test_add_elements_to_iterable_8(): 1306 | test = """ 1307 | @@ -0,0 +0,0 @@ test 1308 | 1309 | - result = [1,2,] 1310 | + result = [1, x + 1, fn()] 1311 | 1312 | """ 1313 | 1314 | assert compute_diff_sstub(test) != SStubPattern.ADD_ELEMENTS_TO_ITERABLE 1315 | 1316 | 1317 | def test_add_elements_to_iterable_9(): 1318 | test = """ 1319 | @@ -0,0 +0,0 @@ test 1320 | 1321 | - result = {1,2,} 1322 | + result = {1,2, x + 1, fn()} 1323 | 1324 | """ 1325 | 1326 | assert compute_diff_sstub(test) == SStubPattern.ADD_ELEMENTS_TO_ITERABLE 1327 | 1328 | # Add attribute access --------------------------------------------------------------------- 1329 | 1330 | def test_add_attribute_access_1(): 1331 | test = """ 1332 | @@ -0,0 +0,0 @@ test 1333 | 1334 | - result = say_hello_to(person) 1335 | + result = say_hello_to(person.name) 1336 | 1337 | """ 1338 | 1339 | assert compute_diff_sstub(test) == SStubPattern.ADD_ATTRIBUTE_ACCESS 1340 | 1341 | 1342 | def test_add_attribute_access_2(): 1343 | test = """ 1344 | @@ -0,0 +0,0 @@ test 1345 | 1346 | - result = person.age 1347 | + result = person.parent.age 1348 | 1349 | """ 1350 | 1351 | assert compute_diff_sstub(test) == SStubPattern.ADD_ATTRIBUTE_ACCESS 1352 | 1353 | 1354 | # More specific if ------------------------------------------------------------------------ 1355 | 1356 | def test_more_specific_if_1(): 1357 | test = """ 1358 | @@ -0,0 +0,0 @@ test 1359 | 1360 | - if x: 1361 | + if x and y: 1362 | pass 1363 | 1364 | """ 1365 | 1366 | assert compute_diff_sstub(test) == SStubPattern.MORE_SPECIFIC_IF 1367 | 1368 | 1369 | def test_more_specific_if_2(): 1370 | test = """ 1371 | @@ -0,0 +0,0 @@ test 1372 | 1373 | - if isinstance(x): 1374 | + if isinstance(x, y): 1375 | pass 1376 | 1377 | """ 1378 | 1379 | assert compute_diff_sstub(test) != SStubPattern.MORE_SPECIFIC_IF 1380 | 1381 | 1382 | def test_more_specific_if_3(): 1383 | test = """ 1384 | @@ -0,0 +0,0 @@ test 1385 | 1386 | - if x: 1387 | + if not x: 1388 | pass 1389 | 1390 | """ 1391 | 1392 | assert compute_diff_sstub(test) != SStubPattern.MORE_SPECIFIC_IF 1393 | 1394 | 1395 | def test_more_specific_if_4(): 1396 | test = """ 1397 | @@ -0,0 +0,0 @@ test 1398 | 1399 | - if x and test(): 1400 | + if x and test() or test2(): 1401 | pass 1402 | 1403 | """ 1404 | 1405 | assert compute_diff_sstub(test) != SStubPattern.MORE_SPECIFIC_IF 1406 | 1407 | # Less specific if ------------------------------------------------------------------------ 1408 | 1409 | def test_less_specific_if_1(): 1410 | test = """ 1411 | @@ -0,0 +0,0 @@ test 1412 | 1413 | - if x: 1414 | + if x or y: 1415 | pass 1416 | 1417 | """ 1418 | 1419 | assert compute_diff_sstub(test) == SStubPattern.LESS_SPECIFIC_IF 1420 | 1421 | 1422 | def test_less_specific_if_2(): 1423 | test = """ 1424 | @@ -0,0 +0,0 @@ test 1425 | 1426 | - if isinstance(x, y): 1427 | + if isinstance(x): 1428 | pass 1429 | 1430 | """ 1431 | 1432 | assert compute_diff_sstub(test) != SStubPattern.LESS_SPECIFIC_IF 1433 | 1434 | 1435 | def test_less_specific_if_3(): 1436 | test = """ 1437 | @@ -0,0 +0,0 @@ test 1438 | 1439 | - if not x: 1440 | + if x: 1441 | pass 1442 | 1443 | """ 1444 | 1445 | assert compute_diff_sstub(test) != SStubPattern.LESS_SPECIFIC_IF 1446 | 1447 | 1448 | def test_less_specific_if_4(): 1449 | test = """ 1450 | @@ -0,0 +0,0 @@ test 1451 | 1452 | - if x and test(): 1453 | + if x and test() or test2(): 1454 | pass 1455 | 1456 | """ 1457 | 1458 | assert compute_diff_sstub(test) == SStubPattern.LESS_SPECIFIC_IF 1459 | 1460 | 1461 | # Real world tests ---------------------------------------------------------------- 1462 | 1463 | def test_real_world_1(): 1464 | 1465 | test = """ 1466 | @@ -16,7 +16,7 @@ def test_databases(): 1467 | bench2 = Benchmark(statement, setup, name='list with xrange', 1468 | description='Xrange', start_date=datetime(2013, 3, 9)) 1469 | 1470 | - dbHandler = BenchmarkDb.get_instance('bench.db') 1471 | + dbHandler = BenchmarkDb('bench.db') 1472 | """ 1473 | 1474 | assert compute_diff_sstub(test) == SStubPattern.SINGLE_STMT 1475 | 1476 | 1477 | 1478 | def test_real_world_2(): 1479 | 1480 | test = """ 1481 | @@ -146,7 +146,7 @@ class DatetimeWidget(DateWidget): 1482 | if default in (year, month, day, hour, minute): 1483 | return default 1484 | 1485 | - if self.ampm is True and hour != 12: 1486 | + if self.ampm is True and int(hour)!=12: 1487 | ampm = self.request.get(self.name + '-ampm', default) 1488 | if ampm == 'PM': 1489 | hour = str(12+int(hour)) 1490 | """ 1491 | 1492 | assert compute_diff_sstub(test) == SStubPattern.ADD_FUNCTION_AROUND_EXPRESSION 1493 | 1494 | 1495 | def test_real_world_3(): 1496 | 1497 | test = """ 1498 | @@ -59,7 +59,8 @@ class UrlRewriteFilter(object): 1499 | if ext in CONTENT_TYPES: 1500 | # Use the content type specified by the extension 1501 | return (path, CONTENT_TYPES[ext]) 1502 | - elif http_accept is None: 1503 | + elif http_accept is None or http_accept == '*/*': 1504 | + # TODO: This probably isn't the best place to handle "Accept: */*" 1505 | # No extension or Accept header specified, use default 1506 | return (path_info, DEFAULT_CONTENT_TYPE) 1507 | else: 1508 | 1509 | """ 1510 | 1511 | assert compute_diff_sstub(test) == SStubPattern.LESS_SPECIFIC_IF --------------------------------------------------------------------------------