├── .github ├── scripts │ └── build.sh └── workflows │ └── build.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── examples ├── __init__.py ├── control_flow_example.py ├── cyclomatic_complexity_example.py └── program_graph_example.py ├── python_graphs ├── __init__.py ├── analysis │ ├── __init__.py │ ├── program_graph_analysis.py │ ├── program_graph_analysis_test.py │ └── run_program_graph_analysis.py ├── control_flow.py ├── control_flow_graphviz.py ├── control_flow_graphviz_test.py ├── control_flow_test.py ├── control_flow_test_components.py ├── control_flow_visualizer.py ├── cyclomatic_complexity.py ├── cyclomatic_complexity_test.py ├── data_flow.py ├── data_flow_test.py ├── instruction.py ├── instruction_test.py ├── program_graph.py ├── program_graph_dataclasses.py ├── program_graph_graphviz.py ├── program_graph_graphviz_test.py ├── program_graph_test.py ├── program_graph_test_components.py ├── program_graph_visualizer.py ├── program_utils.py └── unparser_patch.py ├── requirements.txt └── setup.py /.github/scripts/build.sh: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2022 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | #!/usr/bin/env bash 16 | 17 | # Exit when any command fails. 18 | set -e 19 | 20 | PYTHON_VERSION=${PYTHON_VERSION:-3.7} 21 | 22 | pip install --upgrade setuptools pip 23 | pip install --upgrade pylint pytest pytest-pylint pytest-runner 24 | sudo apt install libgraphviz-dev 25 | python setup.py develop 26 | python -m pytest # Run the tests. 27 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: python_graphs 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: [3.8, 3.9] 11 | 12 | steps: 13 | - name: Checkout the repository 14 | uses: actions/checkout@v2 15 | 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | 21 | # Build using the build.sh script. 22 | - name: Run build script 23 | shell: bash 24 | run: ./.github/scripts/build.sh 25 | env: 26 | PYTHON_VERSION: ${{ matrix.python-version }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | scratch 3 | 4 | .DS_Store 5 | __MACOSX 6 | 7 | *~ 8 | __pycache__ 9 | .pytest_cache 10 | python_graphs.egg-info 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement (CLA). You (or your employer) retain the copyright to your 10 | contribution; this simply gives us permission to use and redistribute your 11 | contributions as part of the project. Head over to 12 | to see your current agreements on file or 13 | to sign a new one. 14 | 15 | You generally only need to submit a CLA once, so if you've already submitted one 16 | (even if it was for a different project), you probably don't need to do it 17 | again. 18 | 19 | ## Code Reviews 20 | 21 | All submissions, including submissions by project members, require review. For 22 | external contributions, we use GitHub pull requests for this purpose. Consult 23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 24 | information on using pull requests. 25 | 26 | ## Community Guidelines 27 | 28 | This project follows 29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python_graphs 2 | 3 | This package is for computing graph representations of Python programs for 4 | machine learning applications. It includes the following modules: 5 | 6 | * `control_flow` For computing control flow graphs statically from Python 7 | programs. 8 | * `data_flow` For computing data flow analyses of Python programs. 9 | * `program_graph` For computing graphs statically to represent arbitrary 10 | Python programs or functions. 11 | * `cyclomatic_complexity` For computing the cyclomatic complexity of a Python function. 12 | 13 | 14 | ## Installation 15 | 16 | To install python_graphs with pip, run: `pip install python_graphs`. 17 | 18 | To install python_graphs from source, run: `python setup.py develop`. 19 | 20 | ## Common Tasks 21 | 22 | **Generate a control flow graph from a function `fn`:** 23 | 24 | ```python 25 | from python_graphs import control_flow 26 | graph = control_flow.get_control_flow_graph(fn) 27 | ``` 28 | 29 | **Generate a program graph from a function `fn`:** 30 | 31 | ```python 32 | from python_graphs import program_graph 33 | graph = program_graph.get_program_graph(fn) 34 | ``` 35 | 36 | **Compute the cyclomatic complexity of a function `fn`:** 37 | 38 | ```python 39 | from python_graphs import control_flow 40 | from python_graphs import cyclomatic_complexity 41 | graph = control_flow.get_control_flow_graph(fn) 42 | value = cyclomatic_complexity.cyclomatic_complexity(graph) 43 | ``` 44 | 45 | --- 46 | 47 | This is not an officially supported Google product. 48 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/python-graphs/0201fb090b9224dfb1a9a05ce836e9ef6da8ccc9/examples/__init__.py -------------------------------------------------------------------------------- /examples/control_flow_example.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Example generating a control flow graph from a Python function. 16 | 17 | Generates an image visualizing the control flow graph for each of the functions 18 | in control_flow_test_components.py. Saves the resulting images to the directory 19 | `out`. 20 | 21 | Usage: 22 | python -m examples.control_flow_example 23 | """ 24 | 25 | import inspect 26 | import os 27 | 28 | from absl import app 29 | 30 | from python_graphs import control_flow 31 | from python_graphs import control_flow_graphviz 32 | from python_graphs import control_flow_test_components as tc 33 | from python_graphs import program_utils 34 | 35 | 36 | def plot_control_flow_graph(fn, path): 37 | graph = control_flow.get_control_flow_graph(fn) 38 | source = program_utils.getsource(fn) 39 | control_flow_graphviz.render(graph, include_src=source, path=path) 40 | 41 | 42 | def main(argv) -> None: 43 | del argv # Unused 44 | 45 | # Create the output directory. 46 | os.makedirs('out', exist_ok=True) 47 | 48 | # For each function in control_flow_test_components.py, visualize its 49 | # control flow graph. Save the results in the output directory. 50 | for name, fn in inspect.getmembers(tc, predicate=inspect.isfunction): 51 | path = f'out/{name}_cfg.png' 52 | plot_control_flow_graph(fn, path) 53 | print('Done. See the `out` directory for the results.') 54 | 55 | 56 | if __name__ == '__main__': 57 | app.run(main) 58 | -------------------------------------------------------------------------------- /examples/cyclomatic_complexity_example.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Example computing the cyclomatic complexity of various Python functions. 16 | 17 | For each of the functions in control_flow_test_components.py, this computes and 18 | prints the function's cyclomatic complexity. 19 | 20 | Usage: 21 | python -m examples.cyclomatic_complexity_example 22 | """ 23 | 24 | import inspect 25 | 26 | from absl import app 27 | 28 | from python_graphs import control_flow 29 | from python_graphs import control_flow_test_components as tc 30 | from python_graphs import cyclomatic_complexity 31 | 32 | 33 | def main(argv) -> None: 34 | del argv # Unused 35 | 36 | # For each function in control_flow_test_components.py, compute its cyclomatic 37 | # complexity and print the result. 38 | for name, fn in inspect.getmembers(tc, predicate=inspect.isfunction): 39 | print(f'{name}: ', end='') 40 | graph = control_flow.get_control_flow_graph(fn) 41 | value = cyclomatic_complexity.cyclomatic_complexity(graph) 42 | print(value) 43 | 44 | 45 | if __name__ == '__main__': 46 | app.run(main) 47 | -------------------------------------------------------------------------------- /examples/program_graph_example.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Example generating a complete program graph from a Python function. 16 | 17 | Generates an image visualizing the complete program graph for each function 18 | in program_graph_test_components.py. Saves the resulting images to the directory 19 | `out`. 20 | 21 | Usage: 22 | python -m examples.program_graph_example 23 | """ 24 | 25 | import inspect 26 | import os 27 | 28 | from absl import app 29 | from python_graphs import program_graph 30 | from python_graphs import program_graph_graphviz 31 | from python_graphs import program_graph_test_components as tc 32 | 33 | 34 | def main(argv) -> None: 35 | del argv # Unused 36 | 37 | # Create the output directory. 38 | os.makedirs('out', exist_ok=True) 39 | 40 | # For each function in program_graph_test_components.py, visualize its 41 | # program graph. Save the results in the output directory. 42 | for name, fn in inspect.getmembers(tc, predicate=inspect.isfunction): 43 | path = f'out/{name}-program-graph.png' 44 | graph = program_graph.get_program_graph(fn) 45 | program_graph_graphviz.render(graph, path=path) 46 | print('Done. See the `out` directory for the results.') 47 | 48 | 49 | if __name__ == '__main__': 50 | app.run(main) 51 | -------------------------------------------------------------------------------- /python_graphs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/python-graphs/0201fb090b9224dfb1a9a05ce836e9ef6da8ccc9/python_graphs/__init__.py -------------------------------------------------------------------------------- /python_graphs/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google-research/python-graphs/0201fb090b9224dfb1a9a05ce836e9ef6da8ccc9/python_graphs/analysis/__init__.py -------------------------------------------------------------------------------- /python_graphs/analysis/program_graph_analysis.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Functions to analyze program graphs. 16 | 17 | Computes properties such as the height of a program graph's AST. 18 | """ 19 | 20 | import gast as ast 21 | import networkx as nx 22 | 23 | 24 | def num_nodes(graph): 25 | """Returns the number of nodes in a ProgramGraph.""" 26 | return len(graph.all_nodes()) 27 | 28 | 29 | def num_edges(graph): 30 | """Returns the number of edges in a ProgramGraph.""" 31 | return len(graph.edges) 32 | 33 | 34 | def ast_height(ast_node): 35 | """Computes the height of an AST from the given node. 36 | 37 | Args: 38 | ast_node: An AST node. 39 | 40 | Returns: 41 | The height of the AST starting at ast_node. A leaf node or single-node AST 42 | has a height of 1. 43 | """ 44 | max_child_height = 0 45 | for child_node in ast.iter_child_nodes(ast_node): 46 | max_child_height = max(max_child_height, ast_height(child_node)) 47 | return 1 + max_child_height 48 | 49 | 50 | def graph_ast_height(graph): 51 | """Computes the height of the AST of a ProgramGraph. 52 | 53 | Args: 54 | graph: A ProgramGraph. 55 | 56 | Returns: 57 | The height of the graph's AST. A single-node AST has a height of 1. 58 | """ 59 | return ast_height(graph.to_ast()) 60 | 61 | 62 | def degrees(graph): 63 | """Returns a list of node degrees in a ProgramGraph. 64 | 65 | Args: 66 | graph: A ProgramGraph. 67 | 68 | Returns: 69 | An (unsorted) list of node degrees (in-degree plus out-degree). 70 | """ 71 | return [len(graph.neighbors(node)) for node in graph.all_nodes()] 72 | 73 | 74 | def in_degrees(graph): 75 | """Returns a list of node in-degrees in a ProgramGraph. 76 | 77 | Args: 78 | graph: A ProgramGraph. 79 | 80 | Returns: 81 | An (unsorted) list of node in-degrees. 82 | """ 83 | return [len(graph.incoming_neighbors(node)) for node in graph.all_nodes()] 84 | 85 | 86 | def out_degrees(graph): 87 | """Returns a list of node out-degrees in a ProgramGraph. 88 | 89 | Args: 90 | graph: A ProgramGraph. 91 | 92 | Returns: 93 | An (unsorted) list of node out-degrees. 94 | """ 95 | return [len(graph.outgoing_neighbors(node)) for node in graph.all_nodes()] 96 | 97 | 98 | def _program_graph_to_nx(program_graph, directed=False): 99 | """Converts a ProgramGraph to a NetworkX graph. 100 | 101 | Args: 102 | program_graph: A ProgramGraph. 103 | directed: Whether the graph should be treated as a directed graph. 104 | 105 | Returns: 106 | A NetworkX graph that can be analyzed by the networkx module. 107 | """ 108 | # Create a dict-of-lists representation, where {0: [1]} represents a directed 109 | # edge from node 0 to node 1. 110 | dict_of_lists = {} 111 | for node in program_graph.all_nodes(): 112 | neighbor_ids = [neighbor.id 113 | for neighbor in program_graph.outgoing_neighbors(node)] 114 | dict_of_lists[node.id] = neighbor_ids 115 | return nx.DiGraph(dict_of_lists) if directed else nx.Graph(dict_of_lists) 116 | 117 | 118 | def diameter(graph): 119 | """Returns the diameter of a ProgramGraph. 120 | 121 | Note: this is very slow for large graphs. 122 | 123 | Args: 124 | graph: A ProgramGraph. 125 | 126 | Returns: 127 | The diameter of the graph. A single-node graph has diameter 0. The graph is 128 | treated as an undirected graph. 129 | 130 | Raises: 131 | networkx.exception.NetworkXError: Raised if the graph is not connected. 132 | """ 133 | nx_graph = _program_graph_to_nx(graph, directed=False) 134 | return nx.algorithms.distance_measures.diameter(nx_graph) 135 | 136 | 137 | def max_betweenness(graph): 138 | """Returns the maximum node betweenness centrality in a ProgramGraph. 139 | 140 | Note: this is very slow for large graphs. 141 | 142 | Args: 143 | graph: A ProgramGraph. 144 | 145 | Returns: 146 | The maximum betweenness centrality value among all nodes in the graph. The 147 | graph is treated as an undirected graph. 148 | """ 149 | nx_graph = _program_graph_to_nx(graph, directed=False) 150 | return max(nx.algorithms.centrality.betweenness_centrality(nx_graph).values()) 151 | -------------------------------------------------------------------------------- /python_graphs/analysis/program_graph_analysis_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for program_graph_analysis.py.""" 16 | 17 | from absl.testing import absltest 18 | import gast as ast 19 | import networkx as nx 20 | 21 | from python_graphs import program_graph 22 | from python_graphs.analysis import program_graph_analysis as pga 23 | 24 | 25 | class ProgramGraphAnalysisTest(absltest.TestCase): 26 | 27 | def setUp(self): 28 | super(ProgramGraphAnalysisTest, self).setUp() 29 | self.singleton = self.create_singleton_graph() 30 | self.disconnected = self.create_disconnected_graph() 31 | self.cycle_3 = self.create_cycle_3() 32 | self.chain_4 = self.create_chain_4() 33 | self.wide_tree = self.create_wide_tree() 34 | 35 | def create_singleton_graph(self): 36 | """Returns a graph with one node and zero edges.""" 37 | graph = program_graph.ProgramGraph() 38 | node = program_graph.make_node_from_syntax('singleton_node') 39 | graph.add_node(node) 40 | graph.root_id = node.id 41 | return graph 42 | 43 | def create_disconnected_graph(self): 44 | """Returns a disconnected graph with two nodes and zero edges.""" 45 | graph = program_graph.ProgramGraph() 46 | a = program_graph.make_node_from_syntax('a') 47 | b = program_graph.make_node_from_syntax('b') 48 | graph.add_node(a) 49 | graph.add_node(b) 50 | graph.root_id = a.id 51 | return graph 52 | 53 | def create_cycle_3(self): 54 | """Returns a 3-cycle graph, A -> B -> C -> A.""" 55 | graph = program_graph.ProgramGraph() 56 | a = program_graph.make_node_from_syntax('A') 57 | b = program_graph.make_node_from_ast_value('B') 58 | c = program_graph.make_node_from_syntax('C') 59 | graph.add_node(a) 60 | graph.add_node(b) 61 | graph.add_node(c) 62 | graph.add_new_edge(a, b) 63 | graph.add_new_edge(b, c) 64 | graph.add_new_edge(c, a) 65 | graph.root_id = a.id 66 | return graph 67 | 68 | def create_chain_4(self): 69 | """Returns a chain of 4 nodes, A -> B -> C -> D.""" 70 | graph = program_graph.ProgramGraph() 71 | a = program_graph.make_node_from_syntax('A') 72 | b = program_graph.make_node_from_ast_value('B') 73 | c = program_graph.make_node_from_syntax('C') 74 | d = program_graph.make_node_from_ast_value('D') 75 | graph.add_node(a) 76 | graph.add_node(b) 77 | graph.add_node(c) 78 | graph.add_node(d) 79 | graph.add_new_edge(a, b) 80 | graph.add_new_edge(b, c) 81 | graph.add_new_edge(c, d) 82 | graph.root_id = a.id 83 | return graph 84 | 85 | def create_wide_tree(self): 86 | """Returns a tree where the root has 4 children that are all leaves.""" 87 | graph = program_graph.ProgramGraph() 88 | root = program_graph.make_node_from_syntax('root') 89 | graph.add_node(root) 90 | graph.root_id = root.id 91 | for i in range(4): 92 | leaf = program_graph.make_node_from_ast_value(i) 93 | graph.add_node(leaf) 94 | graph.add_new_edge(root, leaf) 95 | return graph 96 | 97 | def ids_from_cycle_3(self): 98 | """Returns a triplet of IDs from the 3-cycle graph in cycle order.""" 99 | root = self.cycle_3.root 100 | id_a = root.id 101 | id_b = self.cycle_3.outgoing_neighbors(root)[0].id 102 | id_c = self.cycle_3.incoming_neighbors(root)[0].id 103 | return id_a, id_b, id_c 104 | 105 | def test_num_nodes_returns_expected(self): 106 | self.assertEqual(pga.num_nodes(self.singleton), 1) 107 | self.assertEqual(pga.num_nodes(self.disconnected), 2) 108 | self.assertEqual(pga.num_nodes(self.cycle_3), 3) 109 | self.assertEqual(pga.num_nodes(self.chain_4), 4) 110 | self.assertEqual(pga.num_nodes(self.wide_tree), 5) 111 | 112 | def test_num_edges_returns_expected(self): 113 | self.assertEqual(pga.num_edges(self.singleton), 0) 114 | self.assertEqual(pga.num_edges(self.disconnected), 0) 115 | self.assertEqual(pga.num_edges(self.cycle_3), 3) 116 | self.assertEqual(pga.num_edges(self.chain_4), 3) 117 | self.assertEqual(pga.num_edges(self.wide_tree), 4) 118 | 119 | def test_ast_height_returns_expected_for_constructed_expression_ast(self): 120 | # Testing the expression "1". 121 | # Height 3: Module -> Expr -> Num. 122 | ast_node = ast.Module( 123 | body=[ast.Expr(value=ast.Constant(value=1, kind=None))], 124 | type_ignores=[]) 125 | self.assertEqual(pga.ast_height(ast_node), 3) 126 | 127 | # Testing the expression "1 + 1". 128 | # Height 4: Module -> Expr -> BinOp -> Num. 129 | ast_node = ast.Module( 130 | body=[ 131 | ast.Expr( 132 | value=ast.BinOp( 133 | left=ast.Constant(value=1, kind=None), 134 | op=ast.Add(), 135 | right=ast.Constant(value=1, kind=None))) 136 | ], 137 | type_ignores=[]) 138 | self.assertEqual(pga.ast_height(ast_node), 4) 139 | 140 | # Testing the expression "a + 1". 141 | # Height 5: Module -> Expr -> BinOp -> Name -> Load. 142 | ast_node = ast.Module( 143 | body=[ 144 | ast.Expr( 145 | value=ast.BinOp( 146 | left=ast.Name( 147 | id='a', 148 | ctx=ast.Load(), 149 | annotation=None, 150 | type_comment=None), 151 | op=ast.Add(), 152 | right=ast.Constant(value=1, kind=None))) 153 | ], 154 | type_ignores=[]) 155 | self.assertEqual(pga.ast_height(ast_node), 5) 156 | 157 | # Testing the expression "a.b + 1". 158 | # Height 6: Module -> Expr -> BinOp -> Attribute -> Name -> Load. 159 | ast_node = ast.Module( 160 | body=[ 161 | ast.Expr( 162 | value=ast.BinOp( 163 | left=ast.Attribute( 164 | value=ast.Name( 165 | id='a', 166 | ctx=ast.Load(), 167 | annotation=None, 168 | type_comment=None), 169 | attr='b', 170 | ctx=ast.Load()), 171 | op=ast.Add(), 172 | right=ast.Constant(value=1, kind=None))) 173 | ], 174 | type_ignores=[]) 175 | self.assertEqual(pga.ast_height(ast_node), 6) 176 | 177 | def test_ast_height_returns_expected_for_constructed_function_ast(self): 178 | # Testing the function declaration "def foo(n): return". 179 | # Height 5: Module -> FunctionDef -> arguments -> Name -> Param. 180 | ast_node = ast.Module( 181 | body=[ 182 | ast.FunctionDef( 183 | name='foo', 184 | args=ast.arguments( 185 | args=[ 186 | ast.Name( 187 | id='n', 188 | ctx=ast.Param(), 189 | annotation=None, 190 | type_comment=None) 191 | ], 192 | posonlyargs=[], 193 | vararg=None, 194 | kwonlyargs=[], 195 | kw_defaults=[], 196 | kwarg=None, 197 | defaults=[]), 198 | body=[ast.Return(value=None)], 199 | decorator_list=[], 200 | returns=None, 201 | type_comment=None) 202 | ], 203 | type_ignores=[]) 204 | self.assertEqual(pga.ast_height(ast_node), 5) 205 | 206 | # Testing the function declaration "def foo(n): return n + 1". 207 | # Height 6: Module -> FunctionDef -> Return -> BinOp -> Name -> Load. 208 | ast_node = ast.Module( 209 | body=[ 210 | ast.FunctionDef( 211 | name='foo', 212 | args=ast.arguments( 213 | args=[ 214 | ast.Name( 215 | id='n', 216 | ctx=ast.Param(), 217 | annotation=None, 218 | type_comment=None) 219 | ], 220 | posonlyargs=[], 221 | vararg=None, 222 | kwonlyargs=[], 223 | kw_defaults=[], 224 | kwarg=None, 225 | defaults=[]), 226 | body=[ 227 | ast.Return( 228 | value=ast.BinOp( 229 | left=ast.Name( 230 | id='n', 231 | ctx=ast.Load(), 232 | annotation=None, 233 | type_comment=None), 234 | op=ast.Add(), 235 | right=ast.Constant(value=1, kind=None))) 236 | ], 237 | decorator_list=[], 238 | returns=None, 239 | type_comment=None) 240 | ], 241 | type_ignores=[], 242 | ) 243 | self.assertEqual(pga.ast_height(ast_node), 6) 244 | 245 | def test_ast_height_returns_expected_for_parsed_ast(self): 246 | # Height 3: Module -> Expr -> Num. 247 | self.assertEqual(pga.ast_height(ast.parse('1')), 3) 248 | 249 | # Height 6: Module -> Expr -> BinOp -> Attribute -> Name -> Load. 250 | self.assertEqual(pga.ast_height(ast.parse('a.b + 1')), 6) 251 | 252 | # Height 6: Module -> FunctionDef -> Return -> BinOp -> Name -> Load. 253 | self.assertEqual(pga.ast_height(ast.parse('def foo(n): return n + 1')), 6) 254 | 255 | # Height 9: Module -> FunctionDef -> If -> Return -> BinOp -> Call 256 | # -> BinOp -> Name -> Load. 257 | # Adding whitespace before "def foo" causes an IndentationError in parse(). 258 | ast_node = ast.parse("""def foo(n): 259 | if n <= 0: 260 | return 0 261 | else: 262 | return 1 + foo(n - 1) 263 | """) 264 | self.assertEqual(pga.ast_height(ast_node), 9) 265 | 266 | def test_graph_ast_height_returns_expected(self): 267 | # Height 6: Module -> FunctionDef -> Return -> BinOp -> Name -> Load. 268 | def foo1(n): 269 | return n + 1 270 | 271 | graph = program_graph.get_program_graph(foo1) 272 | self.assertEqual(pga.graph_ast_height(graph), 6) 273 | 274 | # Height 9: Module -> FunctionDef -> If -> Return -> BinOp -> Call 275 | # -> BinOp -> Name -> Load. 276 | def foo2(n): 277 | if n <= 0: 278 | return 0 279 | else: 280 | return 1 + foo2(n - 1) 281 | 282 | graph = program_graph.get_program_graph(foo2) 283 | self.assertEqual(pga.graph_ast_height(graph), 9) 284 | 285 | def test_degrees_returns_expected(self): 286 | self.assertCountEqual(pga.degrees(self.singleton), [0]) 287 | self.assertCountEqual(pga.degrees(self.disconnected), [0, 0]) 288 | self.assertCountEqual(pga.degrees(self.cycle_3), [2, 2, 2]) 289 | self.assertCountEqual(pga.degrees(self.chain_4), [1, 2, 2, 1]) 290 | self.assertCountEqual(pga.degrees(self.wide_tree), [4, 1, 1, 1, 1]) 291 | 292 | def test_in_degrees_returns_expected(self): 293 | self.assertCountEqual(pga.in_degrees(self.singleton), [0]) 294 | self.assertCountEqual(pga.in_degrees(self.disconnected), [0, 0]) 295 | self.assertCountEqual(pga.in_degrees(self.cycle_3), [1, 1, 1]) 296 | self.assertCountEqual(pga.in_degrees(self.chain_4), [0, 1, 1, 1]) 297 | self.assertCountEqual(pga.in_degrees(self.wide_tree), [0, 1, 1, 1, 1]) 298 | 299 | def test_out_degrees_returns_expected(self): 300 | self.assertCountEqual(pga.out_degrees(self.singleton), [0]) 301 | self.assertCountEqual(pga.out_degrees(self.disconnected), [0, 0]) 302 | self.assertCountEqual(pga.out_degrees(self.cycle_3), [1, 1, 1]) 303 | self.assertCountEqual(pga.out_degrees(self.chain_4), [1, 1, 1, 0]) 304 | self.assertCountEqual(pga.out_degrees(self.wide_tree), [4, 0, 0, 0, 0]) 305 | 306 | def test_diameter_returns_expected_if_connected(self): 307 | self.assertEqual(pga.diameter(self.singleton), 0) 308 | self.assertEqual(pga.diameter(self.cycle_3), 1) 309 | self.assertEqual(pga.diameter(self.chain_4), 3) 310 | self.assertEqual(pga.diameter(self.wide_tree), 2) 311 | 312 | def test_diameter_throws_exception_if_disconnected(self): 313 | with self.assertRaises(nx.exception.NetworkXError): 314 | pga.diameter(self.disconnected) 315 | 316 | def test_program_graph_to_nx_undirected_has_correct_edges(self): 317 | id_a, id_b, id_c = self.ids_from_cycle_3() 318 | nx_graph = pga._program_graph_to_nx(self.cycle_3, directed=False) 319 | self.assertCountEqual(nx_graph.nodes(), [id_a, id_b, id_c]) 320 | expected_adj = { 321 | id_a: { 322 | id_b: {}, 323 | id_c: {} 324 | }, 325 | id_b: { 326 | id_a: {}, 327 | id_c: {} 328 | }, 329 | id_c: { 330 | id_a: {}, 331 | id_b: {} 332 | }, 333 | } 334 | self.assertEqual(nx_graph.adj, expected_adj) 335 | 336 | def test_program_graph_to_nx_directed_has_correct_edges(self): 337 | id_a, id_b, id_c = self.ids_from_cycle_3() 338 | nx_digraph = pga._program_graph_to_nx(self.cycle_3, directed=True) 339 | self.assertCountEqual(nx_digraph.nodes(), [id_a, id_b, id_c]) 340 | expected_adj = { 341 | id_a: { 342 | id_b: {} 343 | }, 344 | id_b: { 345 | id_c: {} 346 | }, 347 | id_c: { 348 | id_a: {} 349 | }, 350 | } 351 | self.assertEqual(nx_digraph.adj, expected_adj) 352 | 353 | def test_max_betweenness_returns_expected(self): 354 | self.assertAlmostEqual(pga.max_betweenness(self.singleton), 0) 355 | self.assertAlmostEqual(pga.max_betweenness(self.disconnected), 0) 356 | self.assertAlmostEqual(pga.max_betweenness(self.cycle_3), 0) 357 | 358 | # Middle nodes are in 2 shortest paths, normalizer = (4-1)*(4-2)/2 = 3 359 | self.assertAlmostEqual(pga.max_betweenness(self.chain_4), 2 / 3) 360 | 361 | # Root is in 6 shortest paths, normalizer = (5-1)*(5-2)/2 = 6 362 | self.assertAlmostEqual(pga.max_betweenness(self.wide_tree), 6 / 6) 363 | 364 | 365 | if __name__ == '__main__': 366 | absltest.main() 367 | -------------------------------------------------------------------------------- /python_graphs/analysis/run_program_graph_analysis.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Runs the program graph analysis for datasets of programs. 16 | 17 | Analyzes each dataset of programs, producing plots for properties such as the 18 | AST height. 19 | """ 20 | 21 | import inspect 22 | import math 23 | 24 | from absl import app 25 | from absl import logging 26 | import matplotlib.pyplot as plt 27 | import numpy as np 28 | from python_graphs import control_flow_test_components as cftc 29 | from python_graphs import program_graph 30 | from python_graphs import program_graph_test_components as pgtc 31 | from python_graphs.analysis import program_graph_analysis 32 | import six 33 | from six.moves import range 34 | 35 | 36 | 37 | TARGET_NUM_BINS = 15 # A reasonable constant number of histogram bins. 38 | MAX_NUM_BINS = 20 # The maximum number of bins reasonable on a histogram. 39 | 40 | 41 | def test_components(): 42 | """Generates functions from two sets of test components. 43 | 44 | Yields: 45 | All functions in the program graph and control flow test components files. 46 | """ 47 | for unused_name, fn in inspect.getmembers(pgtc, predicate=inspect.isfunction): 48 | yield fn 49 | 50 | for unused_name, fn in inspect.getmembers(cftc, predicate=inspect.isfunction): 51 | yield fn 52 | 53 | 54 | 55 | 56 | def get_graph_generator(function_generator): 57 | """Generates ProgramGraph objects from functions. 58 | 59 | Args: 60 | function_generator: A function generator. 61 | 62 | Yields: 63 | ProgramGraph objects for the functions. 64 | """ 65 | for index, function in enumerate(function_generator): 66 | try: 67 | graph = program_graph.get_program_graph(function) 68 | yield graph 69 | except SyntaxError: 70 | # get_program_graph can fail for programs with different string encodings. 71 | logging.info('SyntaxError in get_program_graph for function index %d. ' 72 | 'First 100 chars of function source:\n%s', 73 | index, function[:100]) 74 | except RuntimeError: 75 | # get_program_graph can fail for programs that are only return statements. 76 | logging.info('RuntimeError in get_program_graph for function index %d. ' 77 | 'First 100 chars of function source:\n%s', 78 | index, function[:100]) 79 | 80 | 81 | def get_percentiles(data, percentiles, integer_valued=True): 82 | """Returns a dict of percentiles of the data. 83 | 84 | Args: 85 | data: An unsorted list of datapoints. 86 | percentiles: A list of ints or floats in the range [0, 100] representing the 87 | percentiles to compute. 88 | integer_valued: Whether or not the values are all integers. If so, 89 | interpolate to the nearest datapoint (instead of computing a fractional 90 | value between the two nearest datapoints). 91 | 92 | Returns: 93 | A dict mapping each element of percentiles to the computed result. 94 | """ 95 | # Ensure integer datapoints for cleaner binning if necessary. 96 | interpolation = 'nearest' if integer_valued else 'linear' 97 | results = np.percentile(data, percentiles, interpolation=interpolation) 98 | return {percentiles[i]: results[i] for i in range(len(percentiles))} 99 | 100 | 101 | def analyze_graph(graph, identifier): 102 | """Performs various analyses on a graph. 103 | 104 | Args: 105 | graph: A ProgramGraph to analyze. 106 | identifier: A unique identifier for this graph (for later aggregation). 107 | 108 | Returns: 109 | A pair (identifier, result_dict), where result_dict contains the results of 110 | analyses run on the graph. 111 | """ 112 | num_nodes = program_graph_analysis.num_nodes(graph) 113 | num_edges = program_graph_analysis.num_edges(graph) 114 | ast_height = program_graph_analysis.graph_ast_height(graph) 115 | 116 | degree_percentiles = [10, 25, 50, 75, 90] 117 | degrees = get_percentiles(program_graph_analysis.degrees(graph), 118 | degree_percentiles) 119 | in_degrees = get_percentiles(program_graph_analysis.in_degrees(graph), 120 | degree_percentiles) 121 | out_degrees = get_percentiles(program_graph_analysis.out_degrees(graph), 122 | degree_percentiles) 123 | 124 | diameter = program_graph_analysis.diameter(graph) 125 | max_betweenness = program_graph_analysis.max_betweenness(graph) 126 | 127 | # TODO(kshi): Turn this into a protobuf and fix everywhere else in this file. 128 | # Eventually this should be parallelized (currently takes ~6 hours to run). 129 | result_dict = { 130 | 'num_nodes': num_nodes, 131 | 'num_edges': num_edges, 132 | 'ast_height': ast_height, 133 | 'degrees': degrees, 134 | 'in_degrees': in_degrees, 135 | 'out_degrees': out_degrees, 136 | 'diameter': diameter, 137 | 'max_betweenness': max_betweenness, 138 | } 139 | 140 | return (identifier, result_dict) 141 | 142 | 143 | def create_bins(values, integer_valued=True, log_x=False): 144 | """Creates appropriate histogram bins. 145 | 146 | Args: 147 | values: The values to be plotted in a histogram. 148 | integer_valued: Whether the values are all integers. 149 | log_x: Whether to plot the x-axis using a log scale. 150 | 151 | Returns: 152 | An object (sequence, integer, or 'auto') that can be used as the 'bins' 153 | keyword argument to plt.hist(). If there are no values to plot, or all of 154 | the values are identical, then 'auto' is returned. 155 | """ 156 | if not values: 157 | return 'auto' # No data to plot; let pyplot handle this case. 158 | min_value = min(values) 159 | max_value = max(values) 160 | if min_value == max_value: 161 | return 'auto' # All values are identical; let pyplot handle this case. 162 | 163 | if log_x: 164 | return np.logspace(np.log10(min_value), np.log10(max_value + 1), 165 | num=(TARGET_NUM_BINS + 1)) 166 | elif integer_valued: 167 | # The minimum integer width resulting in at most MAX_NUM_BINS bins. 168 | bin_width = math.ceil((max_value - min_value + 1) / MAX_NUM_BINS) 169 | # Place bin boundaries between integers. 170 | return np.arange(min_value - 0.5, max_value + bin_width + 0.5, bin_width) 171 | else: 172 | return TARGET_NUM_BINS 173 | 174 | 175 | def create_histogram(values, title, percentiles=False, integer_valued=True, 176 | log_x=False, log_y=False): 177 | """Returns a histogram of integer values computed from a dataset. 178 | 179 | Args: 180 | values: A list of integer values to plot, or if percentiles is True, then 181 | each value is a dict mapping some chosen percentiles in [0, 100] to the 182 | corresponding data value. 183 | title: The figure title. 184 | percentiles: Whether to plot multiple histograms for percentiles. 185 | integer_valued: Whether the values are all integers, which affects how the 186 | data is partitioned into bins. 187 | log_x: Whether to plot the x-axis using a log scale. 188 | log_y: Whether to plot the y-axis using a log scale. 189 | 190 | Returns: 191 | A histogram figure. 192 | """ 193 | figure = plt.figure() 194 | 195 | if percentiles: 196 | for percentile in sorted(values[0].keys()): 197 | new_values = [percentile_dict[percentile] 198 | for percentile_dict in values] 199 | bins = create_bins(new_values, integer_valued=integer_valued, log_x=log_x) 200 | plt.hist(new_values, bins=bins, alpha=0.5, label='{}%'.format(percentile)) 201 | plt.legend(loc='upper right') 202 | else: 203 | bins = create_bins(values, integer_valued=integer_valued, log_x=log_x) 204 | plt.hist(values, bins=bins) 205 | 206 | if log_x: 207 | plt.xscale('log', nonposx='clip') 208 | if log_y: 209 | plt.yscale('log', nonposy='clip') 210 | plt.title(title) 211 | return figure 212 | 213 | 214 | def save_histogram(all_results, result_key, dataset_name, path_root, 215 | percentiles=False, integer_valued=True, 216 | log_x=False, log_y=False): 217 | """Saves a histogram image to disk. 218 | 219 | Args: 220 | all_results: A list of dicts containing all analysis results for each graph. 221 | result_key: The key in the result dicts specifying what data to plot. 222 | dataset_name: The name of the dataset, which appears in the figure title and 223 | the image filename. 224 | path_root: The directory to save the histogram image in. 225 | percentiles: Whether the data has multiple percentiles to plot. 226 | integer_valued: Whether the values are all integers, which affects how the 227 | data is partitioned into bins. 228 | log_x: Whether to plot the x-axis using a log scale. 229 | log_y: Whether to plot the y-axis using a log scale. 230 | """ 231 | values = [result[result_key] for result in all_results] 232 | title = '{} distribution for {}'.format(result_key, dataset_name) 233 | figure = create_histogram(values, title, percentiles=percentiles, 234 | integer_valued=integer_valued, 235 | log_x=log_x, log_y=log_y) 236 | path = '{}/{}-{}.png'.format(path_root, result_key, dataset_name) 237 | figure.savefig(path) 238 | logging.info('Saved image %s', path) 239 | 240 | 241 | def main(argv): 242 | del argv # Unused. 243 | 244 | dataset_pairs = [ 245 | (test_components(), 'test_components'), 246 | ] 247 | path_root = '/tmp/program_graph_analysis' 248 | 249 | for function_generator, dataset_name in dataset_pairs: 250 | logging.info('Analyzing graphs in dataset %s...', dataset_name) 251 | graph_generator = get_graph_generator(function_generator) 252 | all_results = [] 253 | for index, graph in enumerate(graph_generator): 254 | identifier = '{}-{}'.format(dataset_name, index) 255 | # Discard the identifiers (not needed until this is parallelized). 256 | all_results.append(analyze_graph(graph, identifier)[1]) 257 | 258 | if all_results: 259 | logging.info('Creating plots for dataset %s...', dataset_name) 260 | for result_key in ['num_nodes', 'num_edges']: 261 | save_histogram(all_results, result_key, dataset_name, path_root, 262 | percentiles=False, integer_valued=True, log_x=True) 263 | for result_key in ['ast_height', 'diameter']: 264 | save_histogram(all_results, result_key, dataset_name, path_root, 265 | percentiles=False, integer_valued=True) 266 | for result_key in ['max_betweenness']: 267 | save_histogram(all_results, result_key, dataset_name, path_root, 268 | percentiles=False, integer_valued=False) 269 | for result_key in ['degrees', 'in_degrees', 'out_degrees']: 270 | save_histogram(all_results, result_key, dataset_name, path_root, 271 | percentiles=True, integer_valued=True) 272 | else: 273 | logging.warn('Dataset %s is empty.', dataset_name) 274 | 275 | 276 | if __name__ == '__main__': 277 | app.run(main) 278 | -------------------------------------------------------------------------------- /python_graphs/control_flow_graphviz.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Graphviz render for control flow graphs.""" 16 | 17 | from absl import logging # pylint: disable=unused-import 18 | import astunparse 19 | import gast as ast 20 | import pygraphviz 21 | 22 | LEFT_ALIGN = '\l' # pylint: disable=anomalous-backslash-in-string 23 | 24 | 25 | def render(graph, include_src=None, path='/tmp/graph.png'): 26 | g = to_graphviz(graph, include_src=include_src) 27 | g.draw(path, prog='dot') 28 | 29 | 30 | def trim(line, max_length=30): 31 | if len(line) <= max_length: 32 | return line 33 | return line[:max_length - 3] + '...' 34 | 35 | 36 | def unparse(node): 37 | source = astunparse.unparse(node) 38 | trimmed_source = '\n'.join(trim(line) for line in source.split('\n')) 39 | return ( 40 | trimmed_source.strip() 41 | .rstrip(' \n') 42 | .lstrip(' \n') 43 | .replace('\n', LEFT_ALIGN) 44 | ) 45 | 46 | 47 | def write_as_str(write): 48 | if isinstance(write, ast.AST): 49 | return unparse(write) 50 | else: 51 | return write 52 | 53 | 54 | def get_label_for_instruction(instruction): 55 | if instruction.source is not None: 56 | line = ', '.join(write for write in instruction.get_write_names()) 57 | line += ' <- ' + instruction.source 58 | return line 59 | else: 60 | return unparse(instruction.node) 61 | 62 | 63 | def get_label(block): 64 | """Gets the source code for a control flow basic block.""" 65 | lines = [] 66 | for control_flow_node in block.control_flow_nodes: 67 | instruction = control_flow_node.instruction 68 | line = get_label_for_instruction(instruction) 69 | if line.strip(): 70 | lines.append(line) 71 | 72 | return LEFT_ALIGN.join(lines) + LEFT_ALIGN 73 | 74 | 75 | def to_graphviz(graph, include_src=None): 76 | """To graphviz.""" 77 | g = pygraphviz.AGraph(strict=False, directed=True) 78 | for block in graph.blocks: 79 | node_attrs = {} 80 | label = get_label(block) 81 | # We only show the , , , , block labels. 82 | if block.label is not None and block.label.startswith('<'): 83 | node_attrs['style'] = 'bold' 84 | if not label.rstrip(LEFT_ALIGN): 85 | label = block.label + LEFT_ALIGN 86 | else: 87 | label = block.label + LEFT_ALIGN + label 88 | node_attrs['label'] = label 89 | node_attrs['fontname'] = 'Courier New' 90 | node_attrs['fontsize'] = 10.0 91 | 92 | node_id = id(block) 93 | g.add_node(node_id, **node_attrs) 94 | for next_node in block.next: 95 | next_node_id = id(next_node) 96 | if next_node in block.exits_from_middle: 97 | edge_attrs = {} 98 | edge_attrs['style'] = 'dashed' 99 | g.add_edge(node_id, next_node_id, **edge_attrs) 100 | if next_node in block.exits_from_end: 101 | edge_attrs = {} 102 | edge_attrs['style'] = 'solid' 103 | g.add_edge(node_id, next_node_id, **edge_attrs) 104 | 105 | if include_src is not None: 106 | node_id = id(include_src) 107 | node_attrs['label'] = include_src.replace('\n', LEFT_ALIGN) 108 | node_attrs['fontname'] = 'Courier New' 109 | node_attrs['fontsize'] = 10.0 110 | node_attrs['shape'] = 'box' 111 | g.add_node(node_id, **node_attrs) 112 | 113 | return g 114 | -------------------------------------------------------------------------------- /python_graphs/control_flow_graphviz_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for control_flow_graphviz.py.""" 16 | 17 | import inspect 18 | 19 | from absl.testing import absltest 20 | from python_graphs import control_flow 21 | from python_graphs import control_flow_graphviz 22 | from python_graphs import control_flow_test_components as tc 23 | 24 | 25 | class ControlFlowGraphvizTest(absltest.TestCase): 26 | 27 | def test_to_graphviz_for_all_test_components(self): 28 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction): 29 | graph = control_flow.get_control_flow_graph(fn) 30 | control_flow_graphviz.to_graphviz(graph) 31 | 32 | def test_get_label_multi_op_expression(self): 33 | graph = control_flow.get_control_flow_graph(tc.multi_op_expression) 34 | block = graph.get_block_by_source('1 + 2 * 3') 35 | self.assertEqual( 36 | control_flow_graphviz.get_label(block).strip(), 37 | 'return (1 + (2 * 3))\\l') 38 | 39 | 40 | if __name__ == '__main__': 41 | absltest.main() 42 | -------------------------------------------------------------------------------- /python_graphs/control_flow_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for control_flow.py.""" 16 | 17 | import inspect 18 | 19 | from absl import logging # pylint: disable=unused-import 20 | from absl.testing import absltest 21 | import gast as ast 22 | from python_graphs import control_flow 23 | from python_graphs import control_flow_test_components as tc 24 | from python_graphs import instruction as instruction_module 25 | from python_graphs import program_utils 26 | import six 27 | 28 | 29 | class ControlFlowTest(absltest.TestCase): 30 | 31 | def get_block(self, graph, selector): 32 | if isinstance(selector, control_flow.BasicBlock): 33 | return selector 34 | elif isinstance(selector, six.string_types): 35 | return graph.get_block_by_source(selector) 36 | 37 | def assertSameBlock(self, graph, selector1, selector2): 38 | block1 = self.get_block(graph, selector1) 39 | block2 = self.get_block(graph, selector2) 40 | self.assertEqual(block1, block2) 41 | 42 | def assertExitsTo(self, graph, selector1, selector2): 43 | block1 = self.get_block(graph, selector1) 44 | block2 = self.get_block(graph, selector2) 45 | self.assertTrue(block1.exits_to(block2)) 46 | 47 | def assertNotExitsTo(self, graph, selector1, selector2): 48 | block1 = self.get_block(graph, selector1) 49 | block2 = self.get_block(graph, selector2) 50 | self.assertFalse(block1.exits_to(block2)) 51 | 52 | def assertRaisesTo(self, graph, selector1, selector2): 53 | block1 = self.get_block(graph, selector1) 54 | block2 = self.get_block(graph, selector2) 55 | self.assertTrue(block1.raises_to(block2)) 56 | 57 | def assertNotRaisesTo(self, graph, selector1, selector2): 58 | block1 = self.get_block(graph, selector1) 59 | block2 = self.get_block(graph, selector2) 60 | self.assertFalse(block1.raises_to(block2)) 61 | 62 | def test_control_flow_straight_line_code(self): 63 | graph = control_flow.get_control_flow_graph(tc.straight_line_code) 64 | self.assertSameBlock(graph, 'x = 1', 'y = x + 2') 65 | self.assertSameBlock(graph, 'x = 1', 'z = y * 3') 66 | self.assertSameBlock(graph, 'x = 1', 'return z') 67 | 68 | def test_control_flow_simple_if_statement(self): 69 | graph = control_flow.get_control_flow_graph(tc.simple_if_statement) 70 | x1_block = 'x = 1' 71 | y2_block = 'y = 2' 72 | xy_block = 'x > y' 73 | y3_block = 'y = 3' 74 | return_block = 'return y' 75 | self.assertSameBlock(graph, x1_block, y2_block) 76 | self.assertSameBlock(graph, x1_block, xy_block) 77 | self.assertExitsTo(graph, xy_block, y3_block) 78 | self.assertExitsTo(graph, xy_block, return_block) 79 | self.assertExitsTo(graph, y3_block, return_block) 80 | self.assertNotExitsTo(graph, y3_block, x1_block) 81 | self.assertNotExitsTo(graph, return_block, x1_block) 82 | self.assertNotExitsTo(graph, return_block, y3_block) 83 | 84 | def test_control_flow_simple_for_loop(self): 85 | graph = control_flow.get_control_flow_graph(tc.simple_for_loop) 86 | x1_block = 'x = 1' 87 | iter_block = 'range' 88 | target_block = 'y' 89 | body_block = 'y + 3' 90 | return_block = 'return z' 91 | self.assertSameBlock(graph, x1_block, iter_block) 92 | self.assertExitsTo(graph, iter_block, target_block) 93 | self.assertExitsTo(graph, target_block, body_block) 94 | self.assertNotExitsTo(graph, body_block, return_block) 95 | self.assertExitsTo(graph, target_block, return_block) 96 | 97 | def test_control_flow_simple_while_loop(self): 98 | graph = control_flow.get_control_flow_graph(tc.simple_while_loop) 99 | x1_block = 'x = 1' 100 | test_block = 'x < 2' 101 | body_block = 'x += 3' 102 | return_block = 'return x' 103 | 104 | self.assertExitsTo(graph, x1_block, test_block) 105 | self.assertExitsTo(graph, test_block, body_block) 106 | self.assertExitsTo(graph, body_block, test_block) 107 | self.assertNotExitsTo(graph, body_block, return_block) 108 | self.assertExitsTo(graph, test_block, return_block) 109 | 110 | def test_control_flow_break_in_while_loop(self): 111 | graph = control_flow.get_control_flow_graph(tc.break_in_while_loop) 112 | # This is just one block since there's no edge from the while loop end 113 | # back to the while loop test, and so the 'x = 1' line can be merged with 114 | # the test. 115 | x1_and_test_block = 'x < 2' 116 | body_block = 'x += 3' 117 | return_block = 'return x' 118 | 119 | self.assertExitsTo(graph, x1_and_test_block, body_block) 120 | self.assertExitsTo(graph, body_block, return_block) 121 | self.assertNotExitsTo(graph, body_block, x1_and_test_block) 122 | self.assertExitsTo(graph, x1_and_test_block, return_block) 123 | 124 | def test_control_flow_nested_while_loops(self): 125 | graph = control_flow.get_control_flow_graph(tc.nested_while_loops) 126 | x1_block = 'x = 1' 127 | outer_test_block = 'x < 2' 128 | y3_block = 'y = 3' 129 | inner_test_block = 'y < 4' 130 | y5_block = 'y += 5' 131 | x6_block = 'x += 6' 132 | return_block = 'return x' 133 | 134 | self.assertExitsTo(graph, x1_block, outer_test_block) 135 | self.assertExitsTo(graph, outer_test_block, y3_block) 136 | self.assertExitsTo(graph, outer_test_block, return_block) 137 | self.assertExitsTo(graph, y3_block, inner_test_block) 138 | self.assertExitsTo(graph, inner_test_block, y5_block) 139 | self.assertExitsTo(graph, inner_test_block, x6_block) 140 | self.assertExitsTo(graph, y5_block, inner_test_block) 141 | self.assertExitsTo(graph, x6_block, outer_test_block) 142 | 143 | def test_control_flow_exception_handling(self): 144 | graph = control_flow.get_control_flow_graph(tc.exception_handling) 145 | self.assertSameBlock(graph, 'before_stmt0', 'before_stmt1') 146 | self.assertExitsTo(graph, 'before_stmt1', 'try_block') 147 | self.assertNotExitsTo(graph, 'before_stmt0', 'except_block1') 148 | self.assertNotExitsTo(graph, 'before_stmt1', 'final_block_stmt0') 149 | self.assertRaisesTo(graph, 'try_block', 'error_type') 150 | self.assertRaisesTo(graph, 'error_type', 'except_block2_stmt0') 151 | self.assertExitsTo(graph, 'except_block1', 'after_stmt0') 152 | 153 | self.assertRaisesTo(graph, 'after_stmt0', 'except_block2_stmt0') 154 | self.assertNotRaisesTo(graph, 'try_block', 'except_block2_stmt0') 155 | 156 | def test_control_flow_try_with_loop(self): 157 | graph = control_flow.get_control_flow_graph(tc.try_with_loop) 158 | self.assertSameBlock(graph, 'for_body0', 'for_body1') 159 | self.assertSameBlock(graph, 'except_body0', 'except_body1') 160 | 161 | self.assertExitsTo(graph, 'before_stmt0', 'iterator') 162 | self.assertExitsTo(graph, 'iterator', 'target') 163 | self.assertExitsTo(graph, 'target', 'for_body0') 164 | self.assertExitsTo(graph, 'for_body1', 'target') 165 | self.assertExitsTo(graph, 'target', 'after_stmt0') 166 | 167 | self.assertRaisesTo(graph, 'iterator', 'except_body0') 168 | self.assertRaisesTo(graph, 'target', 'except_body0') 169 | self.assertRaisesTo(graph, 'for_body1', 'except_body0') 170 | 171 | def test_control_flow_break_in_finally(self): 172 | graph = control_flow.get_control_flow_graph(tc.break_in_finally) 173 | 174 | # The exception handlers are tried sequentially until one matches. 175 | self.assertRaisesTo(graph, 'try0', 'Exception0') 176 | self.assertExitsTo(graph, 'Exception0', 'Exception1') 177 | self.assertExitsTo(graph, 'Exception1', 'finally_stmt0') 178 | # If the finally block were to finish and the exception hadn't matched, then 179 | # the exception would exit to the FunctionDef's raise_block. However, the 180 | # break statement prevents the finally from finishing and so the exception 181 | # is lost when the break statement is reached. 182 | # TODO(dbieber): Add the following assert. 183 | # raise_block = graph.get_raise_block('break_in_finally') 184 | # self.assertNotExitsFromEndTo(graph, 'finally_stmt1', raise_block) 185 | # The finally block can of course still raise an exception of its own, so 186 | # the following is still true: 187 | # TODO(dbieber): Add the following assert. 188 | # self.assertRaisesTo(graph, 'finally_stmt1', raise_block) 189 | 190 | # An exception in the except handlers could flow to the finally block. 191 | self.assertRaisesTo(graph, 'Exception0', 'finally_stmt0') 192 | self.assertRaisesTo(graph, 'exception0_stmt0', 'finally_stmt0') 193 | self.assertRaisesTo(graph, 'Exception1', 'finally_stmt0') 194 | 195 | # The break statement flows to after0, rather than to the loop header. 196 | self.assertNotExitsTo(graph, 'finally_stmt1', 'target0') 197 | self.assertExitsTo(graph, 'finally_stmt1', 'after0') 198 | 199 | def test_control_flow_for_loop_with_else(self): 200 | graph = control_flow.get_control_flow_graph(tc.for_with_else) 201 | self.assertExitsTo(graph, 'target', 'for_stmt0') 202 | self.assertSameBlock(graph, 'for_stmt0', 'condition') 203 | 204 | # If break is encountered, then the else clause is skipped. 205 | self.assertExitsTo(graph, 'condition', 'after_stmt0') 206 | 207 | # The else clause executes if the loop completes without reaching the break. 208 | self.assertExitsTo(graph, 'target', 'else_stmt0') 209 | self.assertNotExitsTo(graph, 'target', 'after_stmt0') 210 | 211 | def test_control_flow_lambda(self): 212 | graph = control_flow.get_control_flow_graph(tc.create_lambda) 213 | self.assertNotExitsTo(graph, 'before_stmt0', 'args') 214 | self.assertNotExitsTo(graph, 'before_stmt0', 'output') 215 | 216 | def test_control_flow_generator(self): 217 | graph = control_flow.get_control_flow_graph(tc.generator) 218 | self.assertExitsTo(graph, 'target', 'yield_statement') 219 | self.assertSameBlock(graph, 'yield_statement', 'after_stmt0') 220 | 221 | def test_control_flow_inner_fn_while_loop(self): 222 | graph = control_flow.get_control_flow_graph(tc.fn_with_inner_fn) 223 | self.assertExitsTo(graph, 'x = 10', 'True') 224 | self.assertExitsTo(graph, 'True', 'True') 225 | self.assertSameBlock(graph, 'True', 'True') 226 | 227 | def test_control_flow_example_class(self): 228 | graph = control_flow.get_control_flow_graph(tc.ExampleClass) 229 | self.assertSameBlock(graph, 'method_stmt0', 'method_stmt1') 230 | 231 | def test_control_flow_return_outside_function(self): 232 | with self.assertRaises(RuntimeError) as error: 233 | control_flow.get_control_flow_graph('return x') 234 | self.assertContainsSubsequence(str(error.exception), 235 | 'outside of a function frame') 236 | 237 | def test_control_flow_continue_outside_loop(self): 238 | control_flow.get_control_flow_graph('for i in j: continue') 239 | with self.assertRaises(RuntimeError) as error: 240 | control_flow.get_control_flow_graph('if x: continue') 241 | self.assertContainsSubsequence(str(error.exception), 242 | 'outside of a loop frame') 243 | 244 | def test_control_flow_break_outside_loop(self): 245 | control_flow.get_control_flow_graph('for i in j: break') 246 | with self.assertRaises(RuntimeError) as error: 247 | control_flow.get_control_flow_graph('if x: break') 248 | self.assertContainsSubsequence(str(error.exception), 249 | 'outside of a loop frame') 250 | 251 | def test_control_flow_for_all_test_components(self): 252 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction): 253 | control_flow.get_control_flow_graph(fn) 254 | 255 | def test_control_flow_for_all_test_components_ast_to_instruction(self): 256 | """All INSTRUCTION_AST_NODES in an AST correspond to one Instruction. 257 | 258 | This assumes that a simple statement can't contain another simple statement. 259 | However, Yield nodes are the exception to this as they are contained within 260 | Expr nodes. 261 | 262 | We omit Yield nodes from INSTRUCTION_AST_NODES despite them being listed 263 | as simple statements in the Python docs. 264 | """ 265 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction): 266 | node = program_utils.program_to_ast(fn) 267 | graph = control_flow.get_control_flow_graph(node) 268 | for n in ast.walk(node): 269 | if not isinstance(n, instruction_module.INSTRUCTION_AST_NODES): 270 | continue 271 | control_flow_nodes = list(graph.get_control_flow_nodes_by_ast_node(n)) 272 | self.assertLen(control_flow_nodes, 1, ast.dump(n)) 273 | 274 | def test_control_flow_reads_and_writes_appear_once(self): 275 | """Asserts each read and write in an Instruction is unique in the graph. 276 | 277 | Note that in the case of AugAssign, the same Name AST node is used once as 278 | a read and once as a write. 279 | """ 280 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction): 281 | reads = set() 282 | writes = set() 283 | node = program_utils.program_to_ast(fn) 284 | graph = control_flow.get_control_flow_graph(node) 285 | for instruction in graph.get_instructions(): 286 | # Check that all reads are unique. 287 | for read in instruction.get_reads(): 288 | if isinstance(read, tuple): 289 | read = read[1] 290 | self.assertIsInstance(read, ast.Name, 'Unexpected read type.') 291 | self.assertNotIn(read, reads, 292 | instruction_module.access_name(read)) 293 | reads.add(read) 294 | 295 | # Check that all writes are unique. 296 | for write in instruction.get_writes(): 297 | if isinstance(write, tuple): 298 | write = write[1] 299 | if isinstance(write, six.string_types): 300 | continue 301 | self.assertIsInstance(write, ast.Name) 302 | self.assertNotIn(write, writes, 303 | instruction_module.access_name(write)) 304 | writes.add(write) 305 | 306 | 307 | if __name__ == '__main__': 308 | absltest.main() 309 | -------------------------------------------------------------------------------- /python_graphs/control_flow_test_components.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test components for testing control flow. 16 | 17 | Many of these components would produce RuntimeErrors if run. Their purpose is 18 | for the testing of the control_flow module. 19 | """ 20 | 21 | 22 | # pylint: disable=missing-docstring 23 | # pylint: disable=pointless-statement,undefined-variable 24 | # pylint: disable=unused-variable,unused-argument 25 | # pylint: disable=bare-except,lost-exception,unreachable 26 | # pylint: disable=keyword-arg-before-vararg 27 | def straight_line_code(): 28 | x = 1 29 | y = x + 2 30 | z = y * 3 31 | return z 32 | 33 | 34 | def simple_if_statement(): 35 | x = 1 36 | y = 2 37 | if x > y: 38 | y = 3 39 | return y 40 | 41 | 42 | def simple_for_loop(): 43 | x = 1 44 | for y in range(x + 2): 45 | z = y + 3 46 | return z 47 | 48 | 49 | def tuple_in_for_loop(): 50 | a, b = 0, 1 51 | for a, b in [(1, 2), (2, 3)]: 52 | if a > b: 53 | break 54 | return b - a 55 | 56 | 57 | def simple_while_loop(): 58 | x = 1 59 | while x < 2: 60 | x += 3 61 | return x 62 | 63 | 64 | def break_in_while_loop(): 65 | x = 1 66 | while x < 2: 67 | x += 3 68 | break 69 | return x 70 | 71 | 72 | def nested_while_loops(): 73 | x = 1 74 | while x < 2: 75 | y = 3 76 | while y < 4: 77 | y += 5 78 | x += 6 79 | return x 80 | 81 | 82 | def multiple_excepts(): 83 | try: 84 | x = 1 85 | except ValueError: 86 | x = 2 87 | x = 3 88 | except RuntimeError: 89 | x = 4 90 | except: 91 | x = 5 92 | return x 93 | 94 | 95 | def try_finally(): 96 | header0 97 | try: 98 | try0 99 | try1 100 | except Exception0 as value0: 101 | exception0_stmt0 102 | finally: 103 | finally_stmt0 104 | finally_stmt1 105 | after0 106 | 107 | 108 | def exception_handling(): 109 | try: 110 | before_stmt0 111 | before_stmt1 112 | try: 113 | try_block 114 | except error_type as value: 115 | except_block1 116 | after_stmt0 117 | after_stmt1 118 | except: 119 | except_block2_stmt0 120 | except_block2_stmt1 121 | finally: 122 | final_block_stmt0 123 | final_block_stmt1 124 | end_block_stmt0 125 | end_block_stmt1 126 | 127 | 128 | def fn_with_args(a, b=10, *varargs, **kwargs): 129 | body_stmt0 130 | body_stmt1 131 | return 132 | 133 | 134 | def fn1(a, b): 135 | return a + b 136 | 137 | 138 | def fn2(a, b): 139 | c = a 140 | if a > b: 141 | c -= b 142 | return c 143 | 144 | 145 | def fn3(a, b): 146 | c = a 147 | if a > b: 148 | c -= b 149 | c += 1 150 | c += 2 151 | c += 3 152 | else: 153 | c += b 154 | return c 155 | 156 | 157 | def fn4(i): 158 | count = 0 159 | for i in range(i): 160 | count += 1 161 | return count 162 | 163 | 164 | def fn5(i): 165 | count = 0 166 | for _ in range(i): 167 | if count > 5: 168 | break 169 | count += 1 170 | return count 171 | 172 | 173 | def fn6(): 174 | count = 0 175 | while count < 10: 176 | count += 1 177 | return count 178 | 179 | 180 | def fn7(): 181 | try: 182 | raise ValueError('This will be caught.') 183 | except ValueError as e: 184 | del e 185 | return 186 | 187 | 188 | def try_with_else(): 189 | try: 190 | raise ValueError('This will be caught.') 191 | except ValueError as e: 192 | del e 193 | else: 194 | return 1 195 | return 2 196 | 197 | 198 | def for_with_else(): 199 | for target in iterator: 200 | for_stmt0 201 | if condition: 202 | break 203 | for_stmt1 204 | else: 205 | else_stmt0 206 | else_stmt1 207 | after_stmt0 208 | 209 | 210 | def fn8(a): 211 | a += 1 212 | 213 | 214 | def nested_loops(a): 215 | """A test function illustrating nested loops.""" 216 | for i in range(a): 217 | while True: 218 | break 219 | unreachable = 10 220 | for j in range(i): 221 | for k in range(j): 222 | if j * k > 10: 223 | continue 224 | unreachable = 5 225 | if i + j == 10: 226 | return True 227 | return False 228 | 229 | 230 | def try_with_loop(): 231 | before_stmt0 232 | try: 233 | for target in iterator: 234 | for_body0 235 | for_body1 236 | except: 237 | except_body0 238 | except_body1 239 | after_stmt0 240 | 241 | 242 | def break_in_finally(): 243 | header0 244 | for target0 in iter0: 245 | try: 246 | try0 247 | try1 248 | except Exception0 as value0: 249 | exception0_stmt0 250 | except Exception1 as value1: 251 | exception1_stmt0 252 | exception1_stmt1 253 | finally: 254 | finally_stmt0 255 | finally_stmt1 256 | # This breaks out of the for-loop. 257 | break 258 | after0 259 | 260 | 261 | def break_in_try(): 262 | count = 0 263 | for _ in range(10): 264 | try: 265 | count += 1 266 | # This breaks out of the for-loop through the finally block. 267 | break 268 | except ValueError: 269 | pass 270 | finally: 271 | count += 2 272 | return count 273 | 274 | 275 | def nested_try_excepts(): 276 | try: 277 | try: 278 | x = 0 279 | x += 1 280 | try: 281 | x = 2 + 2 282 | except ValueError(1+1) as e: 283 | x = 3 - 3 284 | finally: 285 | x = 4 286 | except RuntimeError: 287 | x = 5 * 5 288 | finally: 289 | x = 6 ** 6 290 | except: 291 | x = 7 / 7 292 | return x 293 | 294 | 295 | def multi_op_expression(): 296 | return 1 + 2 * 3 297 | 298 | 299 | def create_lambda(): 300 | before_stmt0 301 | fn = lambda args: output 302 | after_stmt0 303 | 304 | 305 | def generator(): 306 | for target in iterator: 307 | yield yield_statement 308 | after_stmt0 309 | 310 | 311 | def fn_with_inner_fn(): 312 | def inner_fn(): 313 | x = 10 314 | while True: 315 | pass 316 | 317 | 318 | class ExampleClass(object): 319 | 320 | def method0(self, arg): 321 | method_stmt0 322 | method_stmt1 323 | -------------------------------------------------------------------------------- /python_graphs/control_flow_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | r"""Create control flow graph visualizations for the test components. 16 | 17 | 18 | Usage: 19 | python -m python_graphs.control_flow_visualizer 20 | """ 21 | 22 | import inspect 23 | import os 24 | 25 | from absl import app 26 | from absl import flags 27 | from absl import logging # pylint: disable=unused-import 28 | 29 | from python_graphs import control_flow 30 | from python_graphs import control_flow_graphviz 31 | from python_graphs import control_flow_test_components as tc 32 | from python_graphs import program_utils 33 | 34 | FLAGS = flags.FLAGS 35 | 36 | 37 | def render_functions(functions): 38 | for name, function in functions: 39 | logging.info(name) 40 | graph = control_flow.get_control_flow_graph(function) 41 | path = '/tmp/control_flow_graphs/{}.png'.format(name) 42 | source = program_utils.getsource(function) # pylint: disable=protected-access 43 | control_flow_graphviz.render(graph, include_src=source, path=path) 44 | 45 | 46 | def render_filepaths(filepaths): 47 | for filepath in filepaths: 48 | filename = os.path.basename(filepath).split('.')[0] 49 | logging.info(filename) 50 | with open(filepath, 'r') as f: 51 | source = f.read() 52 | graph = control_flow.get_control_flow_graph(source) 53 | path = '/tmp/control_flow_graphs/{}.png'.format(filename) 54 | control_flow_graphviz.render(graph, include_src=source, path=path) 55 | 56 | 57 | def main(argv): 58 | del argv # Unused. 59 | 60 | functions = [ 61 | (name, fn) 62 | for name, fn in inspect.getmembers(tc, predicate=inspect.isfunction) 63 | ] 64 | render_functions(functions) 65 | 66 | # Add filepaths here to visualize their functions. 67 | filepaths = [ 68 | __file__, 69 | ] 70 | render_filepaths(filepaths) 71 | 72 | 73 | if __name__ == '__main__': 74 | app.run(main) 75 | -------------------------------------------------------------------------------- /python_graphs/cyclomatic_complexity.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Computes the cyclomatic complexity of a program or control flow graph.""" 16 | 17 | 18 | def cyclomatic_complexity(control_flow_graph): 19 | """Computes the cyclomatic complexity of a function from its cfg.""" 20 | enter_block = next(control_flow_graph.get_enter_blocks()) 21 | 22 | new_blocks = [] 23 | seen_block_ids = set() 24 | new_blocks.append(enter_block) 25 | seen_block_ids.add(id(enter_block)) 26 | num_edges = 0 27 | 28 | while new_blocks: 29 | block = new_blocks.pop() 30 | for next_block in block.exits_from_end: 31 | num_edges += 1 32 | if id(next_block) not in seen_block_ids: 33 | new_blocks.append(next_block) 34 | seen_block_ids.add(id(next_block)) 35 | num_nodes = len(seen_block_ids) 36 | 37 | p = 1 # num_connected_components 38 | e = num_edges 39 | n = num_nodes 40 | return e - n + 2 * p 41 | 42 | 43 | def cyclomatic_complexity2(control_flow_graph): 44 | """Computes the cyclomatic complexity of a program from its cfg.""" 45 | # Assumes a single connected component. 46 | p = 1 # num_connected_components 47 | e = sum(len(block.exits_from_end) for block in control_flow_graph.blocks) 48 | n = len(control_flow_graph.blocks) 49 | return e - n + 2 * p 50 | 51 | 52 | def cyclomatic_complexity3(control_flow_graph): 53 | """Computes the cyclomatic complexity of a program from its cfg.""" 54 | start_block = control_flow_graph.start_block 55 | enter_blocks = control_flow_graph.get_enter_blocks() 56 | 57 | new_blocks = [start_block] 58 | seen_block_ids = {id(start_block)} 59 | num_connected_components = 1 60 | num_edges = 0 61 | 62 | for enter_block in enter_blocks: 63 | new_blocks.append(enter_block) 64 | seen_block_ids.add(id(enter_block)) 65 | num_connected_components += 1 66 | 67 | while new_blocks: 68 | block = new_blocks.pop() 69 | for next_block in block.exits_from_end: 70 | num_edges += 1 71 | if id(next_block) not in seen_block_ids: 72 | new_blocks.append(next_block) 73 | seen_block_ids.add(id(next_block)) 74 | num_nodes = len(seen_block_ids) 75 | 76 | p = num_connected_components 77 | e = num_edges 78 | n = num_nodes 79 | return e - n + 2 * p 80 | -------------------------------------------------------------------------------- /python_graphs/cyclomatic_complexity_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for cyclomatic_complexity.py.""" 16 | 17 | from absl.testing import absltest 18 | from absl.testing import parameterized 19 | 20 | from python_graphs import control_flow 21 | from python_graphs import control_flow_test_components as tc 22 | from python_graphs import cyclomatic_complexity 23 | 24 | 25 | class CyclomaticComplexityTest(parameterized.TestCase): 26 | 27 | @parameterized.parameters( 28 | (tc.straight_line_code, 1), 29 | (tc.simple_if_statement, 2), 30 | (tc.simple_for_loop, 2), 31 | ) 32 | def test_cyclomatic_complexity(self, component, target_value): 33 | graph = control_flow.get_control_flow_graph(component) 34 | value = cyclomatic_complexity.cyclomatic_complexity(graph) 35 | self.assertEqual(value, target_value) 36 | 37 | if __name__ == '__main__': 38 | absltest.main() 39 | -------------------------------------------------------------------------------- /python_graphs/data_flow.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Data flow analysis of Python programs.""" 16 | 17 | import collections 18 | 19 | from absl import logging # pylint: disable=unused-import 20 | import gast as ast 21 | 22 | from python_graphs import control_flow 23 | from python_graphs import instruction as instruction_module 24 | 25 | 26 | READ = instruction_module.READ 27 | WRITE = instruction_module.WRITE 28 | 29 | 30 | class Analysis(object): 31 | """Base class for a data flow analysis. 32 | 33 | Attributes: 34 | label: The name of the analysis. 35 | forward: (bool) True for forward analyses, False for backward analyses. 36 | in_label: The name of the analysis, suffixed with _in. 37 | out_label: The name of the analysis, suffixed with _out. 38 | before_label: Either the in_label or out_label depending on the direction of 39 | the analysis. Marks the before_value on a node during an analysis. 40 | after_label: Either the in_label or out_label depending on the direction of 41 | the analysis. Marks the after_value on a node during an analysis. 42 | """ 43 | 44 | def __init__(self, label, forward): 45 | self.label = label 46 | self.forward = forward 47 | 48 | self.in_label = label + '_in' 49 | self.out_label = label + '_out' 50 | 51 | self.before_label = self.in_label if forward else self.out_label 52 | self.after_label = self.out_label if forward else self.in_label 53 | 54 | def aggregate_previous_after_values(self, previous_after_values): 55 | """Computes the before value for a node from the previous after values. 56 | 57 | This is the 'meet' or 'join' function of the analysis. 58 | TODO(dbieber): Update terminology to match standard textbook notation. 59 | 60 | Args: 61 | previous_after_values: The after values of all before nodes. 62 | Returns: 63 | The before value for the current node. 64 | """ 65 | raise NotImplementedError 66 | 67 | def compute_after_value(self, node, before_value): 68 | """Computes the after value for a node from the node and the before value. 69 | 70 | This is the 'transfer' function of the analysis. 71 | TODO(dbieber): Update terminology to match standard textbook notation. 72 | 73 | Args: 74 | node: The node or block for which to compute the after value. 75 | before_value: The before value of the node. 76 | Returns: 77 | The computed after value for the node. 78 | """ 79 | raise NotImplementedError 80 | 81 | def visit(self, node): 82 | """Visit the nodes of the control flow graph, performing the analysis. 83 | 84 | Terminology: 85 | in_value: The value of the analysis at the start of a node. 86 | out_value: The value of the analysis at the end of a node. 87 | before_value: in_value in a forward analysis; out_value in a backward 88 | analysis. 89 | after_value: out_value in a forward analysis; in_value in a backward 90 | analysis. 91 | 92 | Args: 93 | node: A graph element that supports the .next / .prev API, such as a 94 | ControlFlowNode from a ControlFlowGraph or a BasicBlock from a 95 | ControlFlowGraph. 96 | """ 97 | to_visit = collections.deque([node]) 98 | while to_visit: 99 | node = to_visit.popleft() 100 | 101 | before_nodes = node.prev if self.forward else node.next 102 | after_nodes = node.next if self.forward else node.prev 103 | previous_after_values = [ 104 | before_node.get_label(self.after_label) 105 | for before_node in before_nodes 106 | if before_node.has_label(self.after_label)] 107 | 108 | if node.has_label(self.after_label): 109 | initial_after_value_hash = hash(node.get_label(self.after_label)) 110 | else: 111 | initial_after_value_hash = None 112 | before_value = self.aggregate_previous_after_values(previous_after_values) 113 | node.set_label(self.before_label, before_value) 114 | after_value = self.compute_after_value(node, before_value) 115 | node.set_label(self.after_label, after_value) 116 | if hash(after_value) != initial_after_value_hash: 117 | for after_node in after_nodes: 118 | to_visit.append(after_node) 119 | 120 | 121 | def get_while_loop_variables(node, graph=None): 122 | """Gets the set of loop variables used for while loop rewriting. 123 | 124 | This is the set of variables used for rewriting a while loop into its 125 | functional form. 126 | 127 | Args: 128 | node: An ast.While AST node. 129 | graph: (Optional) The ControlFlowGraph of the function or program containing 130 | the while loop. If not present, the control flow graph for the while loop 131 | will be computed. 132 | Returns: 133 | The set of variable identifiers that are live at the start of the loop's 134 | test and at the start of the loop's body. 135 | """ 136 | graph = graph or control_flow.get_control_flow_graph(node) 137 | test_block = graph.get_block_by_ast_node(node.test) 138 | 139 | for block in graph.get_exit_blocks(): 140 | analysis = LivenessAnalysis() 141 | analysis.visit(block) 142 | # TODO(dbieber): Move this logic into the Analysis class to avoid the use of 143 | # magic strings. 144 | live_variables = test_block.get_label('liveness_in') 145 | written_variables = { 146 | write.id 147 | for write in instruction_module.get_writes_from_ast_node(node) 148 | if isinstance(write, ast.Name) 149 | } 150 | return live_variables & written_variables 151 | 152 | 153 | class LivenessAnalysis(Analysis): 154 | """Liveness analysis by basic block. 155 | 156 | In the liveness analysis, the in_value of a block is the set of variables 157 | that are live at the start of a block. "Live" means that the current value of 158 | the variable may be used later in the execution. The out_value of a block is 159 | the set of variable identifiers that are live at the end of the block. 160 | 161 | Since this is a backward analysis, the "before_value" is the out_value and the 162 | "after_value" is the in_value. 163 | """ 164 | 165 | def __init__(self): 166 | super(LivenessAnalysis, self).__init__(label='liveness', forward=False) 167 | 168 | def aggregate_previous_after_values(self, previous_after_values): 169 | """Computes the out_value (before_value) of a block. 170 | 171 | Args: 172 | previous_after_values: A list of the sets of live variables at the start 173 | of each of the blocks following the current block. 174 | Returns: 175 | The set of live variables at the end of the current block. This is the 176 | union of live variable sets at the start of each subsequent block. 177 | """ 178 | result = set() 179 | for before_value in previous_after_values: 180 | result |= before_value 181 | return frozenset(result) 182 | 183 | def compute_after_value(self, block, before_value): 184 | """Computes the liveness analysis gen and kill sets for a basic block. 185 | 186 | The gen set is the set of variables read by the block before they are 187 | written to. 188 | The kill set is the set of variables written to by the basic block. 189 | 190 | Args: 191 | block: The BasicBlock to analyze. 192 | before_value: The out_value for block (the set of variables live at the 193 | end of the block.) 194 | Returns: 195 | The in_value for block (the set of variables live at the start of the 196 | block). 197 | """ 198 | gen = set() 199 | kill = set() 200 | for control_flow_node in block.control_flow_nodes: 201 | instruction = control_flow_node.instruction 202 | for read in instruction.get_read_names(): 203 | if read not in kill: 204 | gen.add(read) 205 | kill.update(instruction.get_write_names()) 206 | return frozenset((before_value - kill) | gen) 207 | 208 | 209 | class FrozenDict(dict): 210 | 211 | def __hash__(self): 212 | return hash(tuple(sorted(self.items()))) 213 | 214 | 215 | class LastAccessAnalysis(Analysis): 216 | """Computes for each variable its possible last reads and last writes.""" 217 | 218 | def __init__(self): 219 | super(LastAccessAnalysis, self).__init__(label='last_access', forward=True) 220 | 221 | def aggregate_previous_after_values(self, previous_after_values): 222 | result = collections.defaultdict(frozenset) 223 | for previous_after_value in previous_after_values: 224 | for key, value in previous_after_value.items(): 225 | result[key] |= value 226 | return FrozenDict(result) 227 | 228 | def compute_after_value(self, node, before_value): 229 | result = before_value.copy() 230 | for access in node.instruction.accesses: 231 | kind_and_name = instruction_module.access_kind_and_name(access) 232 | result[kind_and_name] = frozenset([access]) 233 | return FrozenDict(result) 234 | -------------------------------------------------------------------------------- /python_graphs/data_flow_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for data_flow.py.""" 16 | 17 | import inspect 18 | 19 | from absl import logging # pylint: disable=unused-import 20 | from absl.testing import absltest 21 | import gast as ast 22 | 23 | from python_graphs import control_flow 24 | from python_graphs import control_flow_test_components as tc 25 | from python_graphs import data_flow 26 | from python_graphs import program_utils 27 | 28 | 29 | class DataFlowTest(absltest.TestCase): 30 | 31 | def test_get_while_loop_variables(self): 32 | root = program_utils.program_to_ast(tc.nested_while_loops) 33 | graph = control_flow.get_control_flow_graph(root) 34 | 35 | # node = graph.get_ast_node_by_type(ast.While) 36 | # TODO(dbieber): data_flow.get_while_loop_variables(node, graph) 37 | 38 | analysis = data_flow.LivenessAnalysis() 39 | for block in graph.get_exit_blocks(): 40 | analysis.visit(block) 41 | 42 | for block in graph.get_blocks_by_ast_node_type_and_label( 43 | ast.While, 'test_block'): 44 | logging.info(block.get_label('liveness_out')) 45 | 46 | def test_liveness_simple_while_loop(self): 47 | def simple_while_loop(): 48 | a = 2 49 | b = 10 50 | x = 1 51 | while x < b: 52 | tmp = x + a 53 | x = tmp + 1 54 | 55 | program_node = program_utils.program_to_ast(simple_while_loop) 56 | graph = control_flow.get_control_flow_graph(program_node) 57 | 58 | # TODO(dbieber): Use unified query system. 59 | while_node = [ 60 | node for node in ast.walk(program_node) 61 | if isinstance(node, ast.While)][0] 62 | loop_variables = data_flow.get_while_loop_variables(while_node, graph) 63 | self.assertEqual(loop_variables, {'x'}) 64 | 65 | def test_data_flow_nested_loops(self): 66 | def fn(): 67 | count = 0 68 | for x in range(10): 69 | for y in range(10): 70 | if x == y: 71 | count += 1 72 | return count 73 | 74 | program_node = program_utils.program_to_ast(fn) 75 | graph = control_flow.get_control_flow_graph(program_node) 76 | 77 | # Perform the analysis. 78 | analysis = data_flow.LastAccessAnalysis() 79 | analysis.visit(graph.start_block.control_flow_nodes[0]) 80 | for node in graph.get_enter_control_flow_nodes(): 81 | analysis.visit(node) 82 | 83 | # Verify correctness. 84 | node = graph.get_control_flow_node_by_source('count += 1') 85 | last_accesses_in = node.get_label('last_access_in') 86 | last_accesses_out = node.get_label('last_access_out') 87 | self.assertLen(last_accesses_in['write-count'], 2) # += 1, = 0 88 | self.assertLen(last_accesses_in['read-count'], 1) # += 1 89 | self.assertLen(last_accesses_out['write-count'], 1) # += 1 90 | self.assertLen(last_accesses_out['read-count'], 1) # += 1 91 | 92 | def test_last_accesses_analysis(self): 93 | root = program_utils.program_to_ast(tc.nested_while_loops) 94 | graph = control_flow.get_control_flow_graph(root) 95 | 96 | analysis = data_flow.LastAccessAnalysis() 97 | analysis.visit(graph.start_block.control_flow_nodes[0]) 98 | 99 | for node in graph.get_enter_control_flow_nodes(): 100 | analysis.visit(node) 101 | 102 | for block in graph.blocks: 103 | for cfn in block.control_flow_nodes: 104 | self.assertTrue(cfn.has_label('last_access_in')) 105 | self.assertTrue(cfn.has_label('last_access_out')) 106 | 107 | node = graph.get_control_flow_node_by_source('y += 5') 108 | last_accesses = node.get_label('last_access_out') 109 | # TODO(dbieber): Add asserts that these are the correct accesses. 110 | self.assertLen(last_accesses['write-x'], 2) # x = 1, x += 6 111 | self.assertLen(last_accesses['read-x'], 1) # x < 2 112 | 113 | node = graph.get_control_flow_node_by_source('return x') 114 | last_accesses = node.get_label('last_access_out') 115 | self.assertLen(last_accesses['write-x'], 2) # x = 1, x += 6 116 | self.assertLen(last_accesses['read-x'], 1) # x < 2 117 | 118 | def test_liveness_analysis_all_test_components(self): 119 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction): 120 | root = program_utils.program_to_ast(fn) 121 | graph = control_flow.get_control_flow_graph(root) 122 | 123 | analysis = data_flow.LivenessAnalysis() 124 | for block in graph.get_exit_blocks(): 125 | analysis.visit(block) 126 | 127 | def test_last_access_analysis_all_test_components(self): 128 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction): 129 | root = program_utils.program_to_ast(fn) 130 | graph = control_flow.get_control_flow_graph(root) 131 | 132 | analysis = data_flow.LastAccessAnalysis() 133 | for node in graph.get_enter_control_flow_nodes(): 134 | analysis.visit(node) 135 | 136 | 137 | if __name__ == '__main__': 138 | absltest.main() 139 | -------------------------------------------------------------------------------- /python_graphs/instruction.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """An Instruction represents an executable unit of a Python program. 16 | 17 | Almost all simple statements correspond to Instructions, except for statements 18 | likes pass, continue, and break, whose effects are already represented in the 19 | structure of the control-flow graph. 20 | 21 | In addition to simple statements, assignments that take place outside of simple 22 | statements such as implicitly in a function or class definition also correspond 23 | to Instructions. 24 | 25 | The complete set of places where Instructions occur in source are listed here: 26 | 27 | 1. (Any node in INSTRUCTION_AST_NODES used as a statement.) 28 | 2. if : ... (elif is the same.) 29 | 3+4. for in : ... 30 | 5. while : ... 31 | 6. try: ... except : ... 32 | 7. TODO(dbieber): Test for "with :"... 33 | 34 | In the code: 35 | 36 | @decorator 37 | def fn(args=defaults): 38 | body 39 | 40 | Outside of the function definition, we get the following instructions: 41 | 8. Each decorator is an Instruction. 42 | 9. Each default is an Instruction. 43 | 10. The assignment of the function def to the function name is an Instruction. 44 | Inside the function definition, we get the following instructions: 45 | 11. An Instruction for the assignment of values to the arguments. 46 | (1, again) And then the body can consist of multiple Instructions too. 47 | 48 | Likewise in the code: 49 | 50 | @decorator 51 | class C(object): 52 | body 53 | 54 | The following are Instructions: 55 | (8, again) Each decorator is an Instruction 56 | 12. The assignment of the class to the variable C is an Instruction. 57 | (1, again) And then the body can consist of multiple Instructions too. 58 | 13. TODO(dbieber): The base class (object) is an Instruction too. 59 | """ 60 | 61 | import gast as ast 62 | import six 63 | 64 | # Types of accesses: 65 | READ = 'read' 66 | WRITE = 'write' 67 | 68 | # Context lists 69 | WRITE_CONTEXTS = (ast.Store, ast.Del, ast.Param, ast.AugStore) 70 | READ_CONTEXTS = (ast.Load, ast.AugLoad) 71 | 72 | # Sources of implicit writes: 73 | CLASS = 'class' 74 | FUNCTION = 'function' 75 | ARGS = 'args' 76 | KWARG = 'kwarg' 77 | KWONLYARGS = 'kwonlyargs' 78 | VARARG = 'vararg' 79 | ITERATOR = 'iter' 80 | EXCEPTION = 'exception' 81 | 82 | INSTRUCTION_AST_NODES = ( 83 | ast.Expr, # expression_stmt 84 | ast.Assert, # assert_stmt 85 | ast.Assign, # assignment_stmt 86 | ast.AugAssign, # augmented_assignment_stmt 87 | ast.Delete, # del_stmt 88 | ast.Print, # print_stmt 89 | ast.Return, # return_stmt 90 | # ast.Yield, # yield_stmt. ast.Yield nodes are contained in ast.Expr nodes. 91 | ast.Raise, # raise_stmt 92 | ast.Import, # import_stmt 93 | ast.ImportFrom, 94 | ast.Global, # global_stmt 95 | ast.Exec, # exec_stmt 96 | ) 97 | 98 | # https://docs.python.org/2/reference/simple_stmts.html 99 | SIMPLE_STATEMENT_AST_NODES = INSTRUCTION_AST_NODES + ( 100 | ast.Pass, # pass_stmt 101 | ast.Break, # break_stmt 102 | ast.Continue, # continue_stmt 103 | ) 104 | 105 | 106 | def _canonicalize(node): 107 | if isinstance(node, list) and len(node) == 1: 108 | return _canonicalize(node[0]) 109 | if isinstance(node, ast.Module): 110 | return _canonicalize(node.body) 111 | if isinstance(node, ast.Expr): 112 | return _canonicalize(node.value) 113 | return node 114 | 115 | 116 | def represent_same_program(node1, node2): 117 | """Whether AST nodes node1 and node2 represent the same program syntactically. 118 | 119 | Two programs are the same syntactically is they have equivalent ASTs, up to 120 | some small changes. The context field of Name nodes can change without the 121 | syntax represented by the AST changing. This allows for example for the short 122 | program 'x' (a read) to match with a subprogram 'x' of 'x = 3' (in which x is 123 | a write), since these two programs are the same syntactically ('x' and 'x'). 124 | 125 | Except for the context field of Name nodes, the two nodes are recursively 126 | checked for exact equality. 127 | 128 | Args: 129 | node1: An AST node. This can be an ast.AST object, a primitive, or a list of 130 | AST nodes (primitives or ast.AST objects). 131 | node2: An AST node. This can be an ast.AST object, a primitive, or a list of 132 | AST nodes (primitives or ast.AST objects). 133 | 134 | Returns: 135 | Whether the two nodes represent equivalent programs. 136 | """ 137 | node1 = _canonicalize(node1) 138 | node2 = _canonicalize(node2) 139 | 140 | if type(node1) != type(node2): # pylint: disable=unidiomatic-typecheck 141 | return False 142 | if not isinstance(node1, ast.AST): 143 | return node1 == node2 144 | 145 | fields1 = list(ast.iter_fields(node1)) 146 | fields2 = list(ast.iter_fields(node2)) 147 | if len(fields1) != len(fields2): 148 | return False 149 | 150 | for (field1, value1), (field2, value2) in zip(fields1, fields2): 151 | if field1 == 'ctx': 152 | continue 153 | if field1 != field2 or type(value1) is not type(value2): 154 | return False 155 | if isinstance(value1, list): 156 | for item1, item2 in zip(value1, value2): 157 | if not represent_same_program(item1, item2): 158 | return False 159 | elif not represent_same_program(value1, value2): 160 | return False 161 | 162 | return True 163 | 164 | 165 | class AccessVisitor(ast.NodeVisitor): 166 | """Visitor that computes an ordered list of accesses. 167 | 168 | Accesses are ordered based on a depth-first traversal of the AST, using the 169 | order of fields defined in `gast`, except for Assign nodes, for which the RHS 170 | is ordered before the LHS. 171 | 172 | This may differ from Python execution semantics in two ways: 173 | 174 | - Both branches sides of short-circuit `and`/`or` expressions or conditional 175 | `X if Y else Z` expressions are considered to be evaluated, even if one of 176 | them is actually skipped at runtime. 177 | - For AST nodes whose field order doesn't match the Python interpreter's 178 | evaluation order, the field order is used instead. Most AST nodes match 179 | execution order, but some differ (e.g. for dictionary literals, the 180 | interpreter alternates evaluating keys and values, but the field order has 181 | all keys and then all values). Assignments are a special case; the 182 | AccessVisitor evaluates the RHS first even though the LHS occurs first in 183 | the expression. 184 | 185 | Attributes: 186 | accesses: List of accesses encountered by the visitor. 187 | """ 188 | 189 | # TODO(dbieber): Include accesses of ast.Subscript and ast.Attribute targets. 190 | 191 | def __init__(self): 192 | self.accesses = [] 193 | 194 | def visit_Name(self, node): 195 | """Visit a Name, adding it to the list of accesses.""" 196 | self.accesses.append(node) 197 | 198 | def visit_Assign(self, node): 199 | """Visit an Assign, ordering RHS accesses before LHS accesses.""" 200 | self.visit(node.value) 201 | for target in node.targets: 202 | self.visit(target) 203 | 204 | def visit_AugAssign(self, node): 205 | """Visit an AugAssign, which contains both a read and a write.""" 206 | # An AugAssign is a read as well as a write, even with the ctx of a write. 207 | self.visit(node.value) 208 | # Add a read access if we are assigning to a name. 209 | if isinstance(node.target, ast.Name): 210 | # TODO(dbieber): Use a proper type instead of a tuple for accesses. 211 | self.accesses.append(('read', node.target, node)) 212 | # Add the write access as normal. 213 | self.visit(node.target) 214 | 215 | 216 | def get_accesses_from_ast_node(node): 217 | """Get all accesses for an AST node, in depth-first AST field order.""" 218 | visitor = AccessVisitor() 219 | visitor.visit(node) 220 | return visitor.accesses 221 | 222 | 223 | def get_reads_from_ast_node(ast_node): 224 | """Get all reads for an AST node, in depth-first AST field order. 225 | 226 | Args: 227 | ast_node: The AST node of interest. 228 | 229 | Returns: 230 | A list of writes performed by that AST node. 231 | """ 232 | return [ 233 | access for access in get_accesses_from_ast_node(ast_node) 234 | if access_is_read(access) 235 | ] 236 | 237 | 238 | def get_writes_from_ast_node(ast_node): 239 | """Get all writes for an AST node, in depth-first AST field order. 240 | 241 | Args: 242 | ast_node: The AST node of interest. 243 | 244 | Returns: 245 | A list of writes performed by that AST node. 246 | """ 247 | return [ 248 | access for access in get_accesses_from_ast_node(ast_node) 249 | if access_is_write(access) 250 | ] 251 | 252 | 253 | def create_writes(node, parent=None): 254 | # TODO(dbieber): Use a proper type instead of a tuple for accesses. 255 | if isinstance(node, ast.AST): 256 | return [ 257 | ('write', n, parent) for n in ast.walk(node) if isinstance(n, ast.Name) 258 | ] 259 | else: 260 | return [('write', node, parent)] 261 | 262 | 263 | def access_is_read(access): 264 | if isinstance(access, ast.AST): 265 | assert isinstance(access, ast.Name), access 266 | return isinstance(access.ctx, READ_CONTEXTS) 267 | else: 268 | return access[0] == 'read' 269 | 270 | 271 | def access_is_write(access): 272 | if isinstance(access, ast.AST): 273 | assert isinstance(access, ast.Name), access 274 | return isinstance(access.ctx, WRITE_CONTEXTS) 275 | else: 276 | return access[0] == 'write' 277 | 278 | 279 | def access_name(access): 280 | if isinstance(access, ast.AST): 281 | return access.id 282 | elif isinstance(access, tuple): 283 | if isinstance(access[1], six.string_types): 284 | return access[1] 285 | elif isinstance(access[1], ast.Name): 286 | return access[1].id 287 | raise ValueError('Unexpected access type.', access) 288 | 289 | 290 | def access_kind(access): 291 | if access_is_read(access): 292 | return 'read' 293 | elif access_is_write(access): 294 | return 'write' 295 | 296 | 297 | def access_kind_and_name(access): 298 | return '{}-{}'.format(access_kind(access), access_name(access)) 299 | 300 | 301 | def access_identifier(name, kind): 302 | return '{}-{}'.format(kind, name) 303 | 304 | 305 | class Instruction(object): 306 | # pyformat:disable 307 | """Represents an executable unit of a Python program. 308 | 309 | An Instruction is a part of an AST corresponding to a simple statement or 310 | assignment, not corresponding to control flow. The part of the AST is not 311 | necessarily an AST node. It may be an AST node, or it may instead be a string 312 | (such as a variable name). 313 | 314 | Instructions play an important part in control flow graphs. An Instruction 315 | is the smallest unit of a control flow graph (wrapped in a ControlFlowNode). 316 | A control flow graph consists of basic blocks which represent a sequence of 317 | Instructions that are executed in a straight-line manner, or not at all. 318 | 319 | Conceptually an Instruction is immutable. This means that while Python does 320 | permit the mutation of an Instruction, in practice an Instruction object 321 | should not be modified once it is created. 322 | 323 | Note that an Instruction may be interrupted by an exception mid-execution. 324 | This is captured in control flow graphs via interrupting exits from basic 325 | blocks to either exception handlers or special 'raises' blocks. 326 | 327 | In addition to pure simple statements, an Instruction can represent a number 328 | of different parts of code. These are all listed explicitly in the module 329 | docstring. 330 | 331 | In the common case, the accesses made by an Instruction are given by the Name 332 | AST nodes contained in the Instruction's AST node. In some cases, when the 333 | instruction.source field is not None, the accesses made by an Instruction are 334 | not simply the Name AST nodes of the Instruction's node. For example, in a 335 | function definition, the only access is the assignment of the function def to 336 | the variable with the function's name; the Name nodes contained in the 337 | function definition are not part of the function definition Instruction, and 338 | instead are part of other Instructions that make up the function. The set of 339 | accesses made by an Instruction is computed when the Instruction is created 340 | and available via the accesses attribute of the Instruction. 341 | 342 | Attributes: 343 | node: The AST node corresponding to the instruction. 344 | accesses: (optional) An ordered list of all reads and writes made by this 345 | instruction. Each item in `accesses` is one of either: 346 | - A 3-tuple with fields (kind, node, parent). kind is either 'read' or 347 | 'write'. node is either a string or Name AST node. parent is an AST 348 | node where node occurs. 349 | - A Name AST node 350 | # TODO(dbieber): Use a single type for all accesses. 351 | source: (optional) The source of the writes. For example in the for loop 352 | `for x in items: pass` there is a instruction for the Name node "x". Its 353 | source is ITERATOR, indicating that this instruction corresponds to x 354 | being assigned a value from an iterator. When source is not None, the 355 | Python code corresponding to the instruction does not coincide with the 356 | Python code corresponding to the instruction's node. 357 | """ 358 | # pyformat:enable 359 | 360 | def __init__(self, node, accesses=None, source=None): 361 | if not isinstance(node, ast.AST): 362 | raise TypeError('node must be an instance of ast.AST.', node) 363 | self.node = node 364 | if accesses is None: 365 | accesses = get_accesses_from_ast_node(node) 366 | self.accesses = accesses 367 | self.source = source 368 | 369 | def contains_subprogram(self, node): 370 | """Whether this Instruction contains the given AST as a subprogram. 371 | 372 | Computes whether `node` is a subtree of this Instruction's AST. 373 | If the Instruction represents an implied write, then the node must match 374 | against the Instruction's writes. 375 | 376 | Args: 377 | node: The node to check the instruction against for a match. 378 | 379 | Returns: 380 | (bool) Whether or not this Instruction contains the node, syntactically. 381 | """ 382 | if self.source is not None: 383 | # Only exact matches are permissible if source is not None. 384 | return represent_same_program(node, self.node) 385 | for subtree in ast.walk(self.node): 386 | if represent_same_program(node, subtree): 387 | return True 388 | return False 389 | 390 | def get_reads(self): 391 | return {access for access in self.accesses if access_is_read(access)} 392 | 393 | def get_read_names(self): 394 | return {access_name(access) for access in self.get_reads()} 395 | 396 | def get_writes(self): 397 | return {access for access in self.accesses if access_is_write(access)} 398 | 399 | def get_write_names(self): 400 | return {access_name(access) for access in self.get_writes()} 401 | -------------------------------------------------------------------------------- /python_graphs/instruction_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for instruction module.""" 16 | 17 | from absl.testing import absltest 18 | import gast as ast 19 | from python_graphs import instruction as instruction_module 20 | 21 | 22 | def create_instruction(source): 23 | node = ast.parse(source) 24 | node = instruction_module._canonicalize(node) 25 | return instruction_module.Instruction(node) 26 | 27 | 28 | class InstructionTest(absltest.TestCase): 29 | 30 | def test_instruction(self): 31 | self.assertIsNotNone(instruction_module.Instruction) 32 | 33 | def test_represent_same_program_basic_positive_case(self): 34 | program1 = ast.parse('x + 1') 35 | program2 = ast.parse('x + 1') 36 | self.assertTrue( 37 | instruction_module.represent_same_program(program1, program2)) 38 | 39 | def test_represent_same_program_basic_negative_case(self): 40 | program1 = ast.parse('x + 1') 41 | program2 = ast.parse('x + 2') 42 | self.assertFalse( 43 | instruction_module.represent_same_program(program1, program2)) 44 | 45 | def test_represent_same_program_different_contexts(self): 46 | full_program1 = ast.parse('y = x + 1') # y is a write 47 | program1 = full_program1.body[0].targets[0] # 'y' 48 | program2 = ast.parse('y') # y is a read 49 | self.assertTrue( 50 | instruction_module.represent_same_program(program1, program2)) 51 | 52 | def test_get_accesses(self): 53 | instruction = create_instruction('x + 1') 54 | self.assertEqual(instruction.get_read_names(), {'x'}) 55 | self.assertEqual(instruction.get_write_names(), set()) 56 | 57 | instruction = create_instruction('return x + y + z') 58 | self.assertEqual(instruction.get_read_names(), {'x', 'y', 'z'}) 59 | self.assertEqual(instruction.get_write_names(), set()) 60 | 61 | instruction = create_instruction('fn(a, b, c)') 62 | self.assertEqual(instruction.get_read_names(), {'a', 'b', 'c', 'fn'}) 63 | self.assertEqual(instruction.get_write_names(), set()) 64 | 65 | instruction = create_instruction('c = fn(a, b, c)') 66 | self.assertEqual(instruction.get_read_names(), {'a', 'b', 'c', 'fn'}) 67 | self.assertEqual(instruction.get_write_names(), {'c'}) 68 | 69 | def test_get_accesses_augassign(self): 70 | instruction = create_instruction('x += 1') 71 | self.assertEqual(instruction.get_read_names(), {'x'}) 72 | self.assertEqual(instruction.get_write_names(), {'x'}) 73 | 74 | instruction = create_instruction('x *= y') 75 | self.assertEqual(instruction.get_read_names(), {'x', 'y'}) 76 | self.assertEqual(instruction.get_write_names(), {'x'}) 77 | 78 | def test_get_accesses_augassign_subscript(self): 79 | instruction = create_instruction('x[0] *= y') 80 | # This is not currently considered a write of x. It is a read of x. 81 | self.assertEqual(instruction.get_read_names(), {'x', 'y'}) 82 | self.assertEqual(instruction.get_write_names(), set()) 83 | 84 | def test_get_accesses_augassign_attribute(self): 85 | instruction = create_instruction('x.attribute *= y') 86 | # This is not currently considered a write of x. It is a read of x. 87 | self.assertEqual(instruction.get_read_names(), {'x', 'y'}) 88 | self.assertEqual(instruction.get_write_names(), set()) 89 | 90 | def test_get_accesses_subscript(self): 91 | instruction = create_instruction('x[0] = y') 92 | # This is not currently considered a write of x. It is a read of x. 93 | self.assertEqual(instruction.get_read_names(), {'x', 'y'}) 94 | self.assertEqual(instruction.get_write_names(), set()) 95 | 96 | def test_get_accesses_attribute(self): 97 | instruction = create_instruction('x.attribute = y') 98 | # This is not currently considered a write of x. It is a read of x. 99 | self.assertEqual(instruction.get_read_names(), {'x', 'y'}) 100 | self.assertEqual(instruction.get_write_names(), set()) 101 | 102 | def test_access_ordering(self): 103 | instruction = create_instruction('c = fn(a, b + c, d / a)') 104 | access_names_and_kinds = [(instruction_module.access_name(access), 105 | instruction_module.access_kind(access)) 106 | for access in instruction.accesses] 107 | self.assertEqual(access_names_and_kinds, [('fn', 'read'), ('a', 'read'), 108 | ('b', 'read'), ('c', 'read'), 109 | ('d', 'read'), ('a', 'read'), 110 | ('c', 'write')]) 111 | 112 | instruction = create_instruction('c += fn(a, b + c, d / a)') 113 | access_names_and_kinds = [(instruction_module.access_name(access), 114 | instruction_module.access_kind(access)) 115 | for access in instruction.accesses] 116 | self.assertEqual(access_names_and_kinds, [('fn', 'read'), ('a', 'read'), 117 | ('b', 'read'), ('c', 'read'), 118 | ('d', 'read'), ('a', 'read'), 119 | ('c', 'read'), ('c', 'write')]) 120 | 121 | 122 | if __name__ == '__main__': 123 | absltest.main() 124 | -------------------------------------------------------------------------------- /python_graphs/program_graph.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Creates ProgramGraphs from a program or function's AST. 16 | 17 | A ProgramGraph represents a Python program or function. The nodes in a 18 | ProgramGraph represent an Instruction (see instruction.py), an AST node, or a 19 | piece of syntax from the program. The edges in a ProgramGraph represent the 20 | relationships between these nodes. 21 | """ 22 | 23 | import codecs 24 | import collections 25 | import os 26 | 27 | from absl import logging 28 | import astunparse 29 | from astunparse import unparser 30 | import gast as ast 31 | from python_graphs import control_flow 32 | from python_graphs import data_flow 33 | from python_graphs import instruction as instruction_module 34 | from python_graphs import program_graph_dataclasses as pb 35 | from python_graphs import program_utils 36 | from python_graphs import unparser_patch # pylint: disable=unused-import 37 | 38 | import six 39 | from six.moves import builtins 40 | from six.moves import filter 41 | 42 | NEWLINE_TOKEN = '#NEWLINE#' 43 | UNINDENT_TOKEN = '#UNINDENT#' 44 | INDENT_TOKEN = '#INDENT#' 45 | 46 | 47 | class ProgramGraph(object): 48 | """A ProgramGraph represents a Python program or function. 49 | 50 | Attributes: 51 | root_id: The id of the root ProgramGraphNode. 52 | nodes: Maps from node id to the ProgramGraphNode with that id. 53 | edges: A list of the edges (from_node.id, to_node.id, edge type) in the 54 | graph. 55 | child_map: Maps from node id to a list of that node's AST children node ids. 56 | parent_map: Maps from node id to that node's AST parent node id. 57 | neighbors_map: Maps from node id to a list of that node's neighboring edges. 58 | ast_id_to_program_graph_node: Maps from an AST node's object id to the 59 | corresponding AST program graph node, if it exists. 60 | root: The root ProgramGraphNode. 61 | """ 62 | 63 | def __init__(self): 64 | """Constructs an empty ProgramGraph with no root.""" 65 | self.root_id = None 66 | 67 | self.nodes = {} 68 | # TODO(charlessutton): Seems odd to have Edge proto objects as part of the 69 | # program graph object if node protos aren't. Consider a more consistent 70 | # treatment. 71 | self.edges = [] 72 | 73 | self.ast_id_to_program_graph_node = {} 74 | self.child_map = collections.defaultdict(list) 75 | self.parent_map = collections.defaultdict(lambda: None) 76 | self.neighbors_map = collections.defaultdict(list) 77 | 78 | # Accessors 79 | @property 80 | def root(self): 81 | if self.root_id not in self.nodes: 82 | raise ValueError('Graph has no root node.') 83 | return self.nodes[self.root_id] 84 | 85 | def all_nodes(self): 86 | return self.nodes.values() 87 | 88 | def get_node(self, obj): 89 | """Returns the node in the program graph corresponding to an object. 90 | 91 | Arguments: 92 | obj: Can be an integer, AST node, ProgramGraphNode, or program graph node 93 | protobuf. 94 | 95 | Raises: 96 | ValueError: no node exists in the program graph matching obj. 97 | """ 98 | if isinstance(obj, six.integer_types) and obj in self.nodes: 99 | return self.get_node_by_id(obj) 100 | elif isinstance(obj, ProgramGraphNode): 101 | # assert obj in self.nodes.values() 102 | return obj 103 | elif isinstance(obj, pb.Node): 104 | return self.get_node_by_id(obj.id) 105 | elif isinstance(obj, (ast.AST, list)): 106 | return self.get_node_by_ast_node(obj) 107 | else: 108 | raise ValueError('Unexpected value for obj.', obj) 109 | 110 | def get_node_by_id(self, obj): 111 | """Gets a ProgramGraph node for the given integer id.""" 112 | return self.nodes[obj] 113 | 114 | def get_node_by_access(self, access): 115 | """Gets a ProgramGraph node for the given read or write.""" 116 | if isinstance(access, ast.Name): 117 | return self.get_node(access) 118 | else: 119 | assert isinstance(access, tuple) 120 | if isinstance(access[1], ast.Name): 121 | return self.get_node(access[1]) 122 | else: 123 | return self.get_node(access[2]) 124 | raise ValueError('Could not find node for access.', access) 125 | 126 | def get_nodes_by_source(self, source): 127 | """Generates the nodes in the program graph containing the query source. 128 | 129 | Args: 130 | source: The query source. 131 | 132 | Returns: 133 | A generator of all nodes in the program graph with an Instruction with 134 | source that includes the query source. 135 | """ 136 | module = ast.parse(source, mode='exec') # TODO(dbieber): Factor out 4 lines 137 | # TODO(dbieber): Use statements beyond the first statement from source. 138 | node = module.body[0] 139 | # If the query source is an Expression, and the matching instruction matches 140 | # the value field of that Expression, then the matching instruction is 141 | # considered a match. This allows us to match subexpressions which appear in 142 | # ast.Expr nodes in the query but not in the parent. 143 | if isinstance(node, ast.Expr): 144 | node = node.value 145 | 146 | def matches_source(pg_node): 147 | if pg_node.has_instruction(): 148 | return pg_node.instruction.contains_subprogram(node) 149 | else: 150 | return instruction_module.represent_same_program(pg_node.ast_node, node) 151 | 152 | return filter(matches_source, self.nodes.values()) 153 | 154 | def get_node_by_source(self, node): 155 | # We use min since nodes can contain each other and we want the most 156 | # specific one. 157 | return min( 158 | self.get_nodes_by_source(node), key=lambda x: len(ast.dump(x.node))) 159 | 160 | def get_nodes_by_function_name(self, name): 161 | return filter( 162 | lambda n: n.has_instance_of(ast.FunctionDef) and n.node.name == name, 163 | self.nodes.values()) 164 | 165 | def get_node_by_function_name(self, name): 166 | return next(self.get_nodes_by_function_name(name)) 167 | 168 | def get_node_by_ast_node(self, ast_node): 169 | return self.ast_id_to_program_graph_node[id(ast_node)] 170 | 171 | def contains_ast_node(self, ast_node): 172 | return id(ast_node) in self.ast_id_to_program_graph_node 173 | 174 | def get_ast_nodes_of_type(self, ast_type): 175 | for node in six.itervalues(self.nodes): 176 | if node.node_type == pb.NodeType.AST_NODE and node.ast_type == ast_type: 177 | yield node 178 | 179 | # TODO(dbieber): Unify selectors across program_graph and control_flow. 180 | def get_nodes_by_source_and_identifier(self, source, name): 181 | for pg_node in self.get_nodes_by_source(source): 182 | for node in ast.walk(pg_node.node): 183 | if isinstance(node, ast.Name) and node.id == name: 184 | if self.contains_ast_node(node): 185 | yield self.get_node_by_ast_node(node) 186 | 187 | def get_node_by_source_and_identifier(self, source, name): 188 | return next(self.get_nodes_by_source_and_identifier(source, name)) 189 | 190 | # Graph Construction Methods 191 | def add_node(self, node): 192 | """Adds a ProgramGraphNode to this graph. 193 | 194 | Args: 195 | node: The ProgramGraphNode that should be added. 196 | 197 | Returns: 198 | The node that was added. 199 | 200 | Raises: 201 | ValueError: the node has already been added to this graph. 202 | """ 203 | assert isinstance(node, ProgramGraphNode), 'Not a ProgramGraphNode' 204 | if node.id in self.nodes: 205 | raise ValueError('Already contains node', self.nodes[node.id], node.id) 206 | if node.ast_node is not None: 207 | if self.contains_ast_node(node.ast_node): 208 | raise ValueError('Already contains ast node', node.ast_node) 209 | self.ast_id_to_program_graph_node[id(node.ast_node)] = node 210 | self.nodes[node.id] = node 211 | return node 212 | 213 | def add_node_from_instruction(self, instruction): 214 | """Adds a node to the program graph.""" 215 | node = make_node_from_instruction(instruction) 216 | return self.add_node(node) 217 | 218 | def add_edge(self, edge): 219 | """Adds an edge between two nodes in the graph. 220 | 221 | Args: 222 | edge: The edge, a pb.Edge proto. 223 | """ 224 | assert isinstance(edge, pb.Edge), 'Not a pb.Edge' 225 | self.edges.append(edge) 226 | 227 | n1 = self.get_node_by_id(edge.id1) 228 | n2 = self.get_node_by_id(edge.id2) 229 | if edge.type == pb.EdgeType.FIELD: # An AST node. 230 | self.child_map[edge.id1].append(edge.id2) 231 | # TODO(charlessutton): Add the below sanity check back once Instruction 232 | # updates are complete. 233 | # pylint: disable=line-too-long 234 | # other_parent_id = self.parent_map[edge.id2] 235 | # if other_parent_id and other_parent_id != edge.id1: 236 | # raise Exception('Node {} {} with two parents\n {} {}\n {} {}' 237 | # .format(edge.id2, dump_node(self.get_node(edge.id2)), 238 | # edge.id1, dump_node(self.get_node(edge.id1)), 239 | # other_parent_id, dump_node(self.get_node(other_parent_id)))) 240 | # pylint: enable=line-too-long 241 | self.parent_map[n2.id] = edge.id1 242 | self.neighbors_map[n1.id].append((edge, edge.id2)) 243 | self.neighbors_map[n2.id].append((edge, edge.id1)) 244 | 245 | def remove_edge(self, edge): 246 | """Removes an edge from the graph. 247 | 248 | If there are multiple copies of the same edge, only one copy is removed. 249 | 250 | Args: 251 | edge: The edge, a pb.Edge proto. 252 | """ 253 | self.edges.remove(edge) 254 | 255 | n1 = self.get_node_by_id(edge.id1) 256 | n2 = self.get_node_by_id(edge.id2) 257 | 258 | if edge.type == pb.EdgeType.FIELD: # An AST node. 259 | self.child_map[edge.id1].remove(edge.id2) 260 | del self.parent_map[n2.id] 261 | 262 | self.neighbors_map[n1.id].remove((edge, edge.id2)) 263 | self.neighbors_map[n2.id].remove((edge, edge.id1)) 264 | 265 | def add_new_edge(self, n1, n2, edge_type=None, field_name=None): 266 | """Adds a new edge between two nodes in the graph. 267 | 268 | Both nodes must already be part of the graph. 269 | 270 | Args: 271 | n1: Specifies the from node of the edge. Can be any object type accepted 272 | by get_node. 273 | n2: Specifies the to node of the edge. Can be any object type accepted by 274 | get_node. 275 | edge_type: The type of edge. Can be any integer in the pb.Edge enum. 276 | field_name: For AST edges, a string describing the Python AST field 277 | 278 | Returns: 279 | The new edge. 280 | """ 281 | n1 = self.get_node(n1) 282 | n2 = self.get_node(n2) 283 | new_edge = pb.Edge( 284 | id1=n1.id, id2=n2.id, type=edge_type, field_name=field_name) 285 | self.add_edge(new_edge) 286 | return new_edge 287 | 288 | # AST Methods 289 | # TODO(charlessutton): Consider whether AST manipulation should be moved 290 | # e.g., to a more general graph object. 291 | def to_ast(self, node=None): 292 | """Convert the program graph to a Python AST.""" 293 | if node is None: 294 | node = self.root 295 | return self._build_ast(node=node, update_references=False) 296 | 297 | def reconstruct_ast(self): 298 | """Reconstruct all internal ProgramGraphNode.ast_node references. 299 | 300 | After calling this method, all nodes of type AST_NODE will have their 301 | `ast_node` property refer to subtrees of a reconstructed AST object, and 302 | self.ast_id_to_program_graph_node will contain only entries from this new 303 | AST. 304 | 305 | Note that only AST nodes reachable by fields from the root node will be 306 | converted; this should be all of them but this is not checked. 307 | """ 308 | self.ast_id_to_program_graph_node.clear() 309 | self._build_ast(node=self.root, update_references=True) 310 | 311 | def _build_ast(self, node, update_references): 312 | """Helper method: builds an AST and optionally sets ast_node references. 313 | 314 | Args: 315 | node: Program graph node to build an AST for. 316 | update_references: Whether to modify this node and all of its children so 317 | that they point to the reconstructed AST node. 318 | 319 | Returns: 320 | AST node corresponding to the program graph node. 321 | """ 322 | if node.node_type == pb.NodeType.AST_NODE: 323 | ast_node = getattr(ast, node.ast_type)() 324 | adjacent_edges = self.neighbors_map[node.id] 325 | for edge, other_node_id in adjacent_edges: 326 | if other_node_id == edge.id1: # it's an incoming edge 327 | continue 328 | if edge.type == pb.EdgeType.FIELD: 329 | child_id = other_node_id 330 | child = self.get_node_by_id(child_id) 331 | setattr( 332 | ast_node, edge.field_name, 333 | self._build_ast(node=child, update_references=update_references)) 334 | if update_references: 335 | node.ast_node = ast_node 336 | self.ast_id_to_program_graph_node[id(ast_node)] = node 337 | return ast_node 338 | elif node.node_type == pb.NodeType.AST_LIST: 339 | list_items = {} 340 | adjacent_edges = self.neighbors_map[node.id] 341 | for edge, other_node_id in adjacent_edges: 342 | if other_node_id == edge.id1: # it's an incoming edge 343 | continue 344 | if edge.type == pb.EdgeType.FIELD: 345 | child_id = other_node_id 346 | child = self.get_node_by_id(child_id) 347 | unused_field_name, index = parse_list_field_name(edge.field_name) 348 | list_items[index] = self._build_ast( 349 | node=child, update_references=update_references) 350 | 351 | ast_list = [] 352 | for index in six.moves.range(len(list_items)): 353 | ast_list.append(list_items[index]) 354 | return ast_list 355 | elif node.node_type == pb.NodeType.AST_VALUE: 356 | return node.ast_value 357 | else: 358 | raise ValueError('This ProgramGraphNode does not correspond to a node in' 359 | ' an AST.') 360 | 361 | def walk_ast_descendants(self, node=None): 362 | """Yields the nodes that correspond to the descendants of node in the AST. 363 | 364 | Args: 365 | node: the node in the program graph corresponding to the root of the AST 366 | subtree that should be walked. If None, defaults to the root of the 367 | program graph. 368 | 369 | Yields: 370 | All nodes corresponding to descendants of node in the AST. 371 | """ 372 | if node is None: 373 | node = self.root 374 | frontier = [node] 375 | while frontier: 376 | current = frontier.pop() 377 | for child_id in reversed(self.child_map[current.id]): 378 | frontier.append(self.get_node_by_id(child_id)) 379 | yield current 380 | 381 | def parent(self, node): 382 | """Returns the AST parent of an AST program graph node. 383 | 384 | Args: 385 | node: A ProgramGraphNode. 386 | 387 | Returns: 388 | The node's AST parent, which is also a ProgramGraphNode. 389 | """ 390 | parent_id = self.parent_map[node.id] 391 | if parent_id is None: 392 | return None 393 | else: 394 | return self.get_node_by_id(parent_id) 395 | 396 | def children(self, node): 397 | """Yields the (direct) AST children of an AST program graph node. 398 | 399 | Args: 400 | node: A ProgramGraphNode. 401 | 402 | Yields: 403 | The AST children of node, which are ProgramGraphNode objects. 404 | """ 405 | for child_id in self.child_map[node.id]: 406 | yield self.get_node_by_id(child_id) 407 | 408 | def neighbors(self, node, edge_type=None): 409 | """Returns the incoming and outgoing neighbors of a program graph node. 410 | 411 | Args: 412 | node: A ProgramGraphNode. 413 | edge_type: If provided, only edges of this type are considered. 414 | 415 | Returns: 416 | The incoming and outgoing neighbors of node, which are ProgramGraphNode 417 | objects but not necessarily AST nodes. 418 | """ 419 | adj_edges = self.neighbors_map[node.id] 420 | if edge_type is None: 421 | ids = list(tup[1] for tup in adj_edges) 422 | else: 423 | ids = list(tup[1] for tup in adj_edges if tup[0].type == edge_type) 424 | return [self.get_node_by_id(id0) for id0 in ids] 425 | 426 | def incoming_neighbors(self, node, edge_type=None): 427 | """Returns the incoming neighbors of a program graph node. 428 | 429 | Args: 430 | node: A ProgramGraphNode. 431 | edge_type: If provided, only edges of this type are considered. 432 | 433 | Returns: 434 | The incoming neighbors of node, which are ProgramGraphNode objects but not 435 | necessarily AST nodes. 436 | """ 437 | adj_edges = self.neighbors_map[node.id] 438 | result = [] 439 | for edge, neighbor_id in adj_edges: 440 | if edge.id2 == node.id: 441 | if (edge_type is None) or (edge.type == edge_type): 442 | result.append(self.get_node_by_id(neighbor_id)) 443 | return result 444 | 445 | def outgoing_neighbors(self, node, edge_type=None): 446 | """Returns the outgoing neighbors of a program graph node. 447 | 448 | Args: 449 | node: A ProgramGraphNode. 450 | edge_type: If provided, only edges of this type are considered. 451 | 452 | Returns: 453 | The outgoing neighbors of node, which are ProgramGraphNode objects but not 454 | necessarily AST nodes. 455 | """ 456 | adj_edges = self.neighbors_map[node.id] 457 | result = [] 458 | for edge, neighbor_id in adj_edges: 459 | if edge.id1 == node.id: 460 | if (edge_type is None) or (edge.type == edge_type): 461 | result.append(self.get_node_by_id(neighbor_id)) 462 | return result 463 | 464 | def dump_tree(self, start_node=None): 465 | """Returns a string representation for debugging.""" 466 | 467 | def dump_tree_recurse(node, indent, all_lines): 468 | """Create a string representation for a subtree.""" 469 | indent_str = ' ' + ('--' * indent) 470 | node_str = dump_node(node) 471 | line = ' '.join([indent_str, node_str, '\n']) 472 | all_lines.append(line) 473 | # output long distance edges 474 | for edge, neighbor_id in self.neighbors_map[node.id]: 475 | if (not is_ast_edge(edge) and not is_syntax_edge(edge) and 476 | node.id == edge.id1): 477 | type_str = edge.type.name 478 | line = [indent_str, '--((', type_str, '))-->', str(neighbor_id), '\n'] 479 | all_lines.append(' '.join(line)) 480 | for child in self.children(node): 481 | dump_tree_recurse(child, indent + 1, all_lines) 482 | return all_lines 483 | 484 | if start_node is None: 485 | start_node = self.root 486 | return ''.join(dump_tree_recurse(start_node, 0, [])) 487 | 488 | # TODO(charlessutton): Consider whether this belongs in ProgramGraph 489 | # or in make_synthesis_problems. 490 | def copy_with_placeholder(self, node): 491 | """Returns a new program graph in which the subtree of NODE is removed. 492 | 493 | In the new graph, the subtree headed by NODE is replaced by a single 494 | node of type PLACEHOLDER, which is connected to the AST parent of NODE 495 | by the same edge type as in the original graph. 496 | 497 | The new program graph will share structure (i.e. the ProgramGraphNode 498 | objects) with the original graph. 499 | 500 | Args: 501 | node: A node in this program graph 502 | 503 | Returns: 504 | A new ProgramGraph object with NODE replaced 505 | """ 506 | descendant_ids = {n.id for n in self.walk_ast_descendants(node)} 507 | new_graph = ProgramGraph() 508 | new_graph.add_node(self.root) 509 | new_graph.root_id = self.root_id 510 | for edge in self.edges: 511 | v1 = self.nodes[edge.id1] 512 | v2 = self.nodes[edge.id2] 513 | # Omit edges that are adjacent to the subtree rooted at `node` UNLESS this 514 | # is the AST edge to the root of the subtree. 515 | # In that case, create an edge to a new placeholder node 516 | adj_bad_subtree = ((edge.id1 in descendant_ids) or 517 | (edge.id2 in descendant_ids)) 518 | if adj_bad_subtree: 519 | if edge.id2 == node.id and is_ast_edge(edge): 520 | placeholder = ProgramGraphNode() 521 | placeholder.node_type = pb.NodeType.PLACEHOLDER 522 | placeholder.id = node.id 523 | new_graph.add_node(placeholder) 524 | new_graph.add_new_edge(v1, placeholder, edge_type=edge.type) 525 | else: 526 | # nodes on the edge have not been added yet 527 | if edge.id1 not in new_graph.nodes: 528 | new_graph.add_node(v1) 529 | if edge.id2 not in new_graph.nodes: 530 | new_graph.add_node(v2) 531 | new_graph.add_new_edge(v1, v2, edge_type=edge.type) 532 | return new_graph 533 | 534 | def copy_subgraph(self, node): 535 | """Returns a new program graph containing only the subtree rooted at NODE. 536 | 537 | All edges that connect nodes in the subtree are included, both AST edges 538 | and other types of edges. 539 | 540 | Args: 541 | node: A node in this program graph 542 | 543 | Returns: 544 | A new ProgramGraph object whose root is NODE 545 | """ 546 | descendant_ids = {n.id for n in self.walk_ast_descendants(node)} 547 | new_graph = ProgramGraph() 548 | new_graph.add_node(node) 549 | new_graph.root_id = node.id 550 | for edge in self.edges: 551 | v1 = self.nodes[edge.id1] 552 | v2 = self.nodes[edge.id2] 553 | # Omit edges that are adjacent to the subtree rooted at NODE 554 | # UNLESS this is the AST edge to the root of the subtree. 555 | # In that case, create an edge to a new placeholder node 556 | good_edge = ((edge.id1 in descendant_ids) and 557 | (edge.id2 in descendant_ids)) 558 | if good_edge: 559 | if edge.id1 not in new_graph.nodes: 560 | new_graph.add_node(v1) 561 | if edge.id2 not in new_graph.nodes: 562 | new_graph.add_node(v2) 563 | new_graph.add_new_edge(v1, v2, edge_type=edge.type) 564 | return new_graph 565 | 566 | 567 | def is_ast_node(node): 568 | return node.node_type == pb.NodeType.AST_NODE 569 | 570 | 571 | def is_ast_edge(edge): 572 | # TODO(charlessutton): Expand to enumerate edge types in gast. 573 | return edge.type == pb.EdgeType.FIELD 574 | 575 | 576 | def is_syntax_edge(edge): 577 | return edge.type == pb.EdgeType.SYNTAX 578 | 579 | 580 | def dump_node(node): 581 | type_str = '[' + node.node_type.name + ']' 582 | elements = [type_str, str(node.id), node.ast_type] 583 | if node.ast_value: 584 | elements.append(str(node.ast_value)) 585 | if node.syntax: 586 | elements.append(str(node.syntax)) 587 | return ' '.join(elements) 588 | 589 | 590 | def get_program_graph(program): 591 | """Constructs a program graph to represent the given program.""" 592 | program_node = program_utils.program_to_ast(program) # An AST node. 593 | 594 | # TODO(dbieber): Refactor sections of graph building into separate functions. 595 | program_graph = ProgramGraph() 596 | 597 | # Perform control flow analysis. 598 | control_flow_graph = control_flow.get_control_flow_graph(program_node) 599 | 600 | # Add AST_NODE program graph nodes corresponding to Instructions in the 601 | # control flow graph. 602 | for control_flow_node in control_flow_graph.get_control_flow_nodes(): 603 | program_graph.add_node_from_instruction(control_flow_node.instruction) 604 | 605 | # Add AST_NODE program graph nodes corresponding to AST nodes. 606 | for ast_node in ast.walk(program_node): 607 | if not program_graph.contains_ast_node(ast_node): 608 | pg_node = make_node_from_ast_node(ast_node) 609 | program_graph.add_node(pg_node) 610 | 611 | root = program_graph.get_node_by_ast_node(program_node) 612 | program_graph.root_id = root.id 613 | 614 | # Add AST edges (FIELD). Also add AST_LIST and AST_VALUE program graph nodes. 615 | for ast_node in ast.walk(program_node): 616 | for field_name, value in ast.iter_fields(ast_node): 617 | if isinstance(value, list): 618 | pg_node = make_node_for_ast_list() 619 | program_graph.add_node(pg_node) 620 | program_graph.add_new_edge( 621 | ast_node, pg_node, pb.EdgeType.FIELD, field_name) 622 | for index, item in enumerate(value): 623 | list_field_name = make_list_field_name(field_name, index) 624 | if isinstance(item, ast.AST): 625 | program_graph.add_new_edge(pg_node, item, pb.EdgeType.FIELD, 626 | list_field_name) 627 | else: 628 | item_node = make_node_from_ast_value(item) 629 | program_graph.add_node(item_node) 630 | program_graph.add_new_edge(pg_node, item_node, pb.EdgeType.FIELD, 631 | list_field_name) 632 | elif isinstance(value, ast.AST): 633 | program_graph.add_new_edge( 634 | ast_node, value, pb.EdgeType.FIELD, field_name) 635 | else: 636 | pg_node = make_node_from_ast_value(value) 637 | program_graph.add_node(pg_node) 638 | program_graph.add_new_edge( 639 | ast_node, pg_node, pb.EdgeType.FIELD, field_name) 640 | 641 | # Add SYNTAX_NODE nodes. Also add NEXT_SYNTAX and LAST_LEXICAL_USE edges. 642 | # Add these edges using a custom AST unparser to visit leaf nodes in preorder. 643 | SyntaxNodeUnparser(program_node, program_graph) 644 | 645 | # Perform data flow analysis. 646 | analysis = data_flow.LastAccessAnalysis() 647 | for node in control_flow_graph.get_enter_control_flow_nodes(): 648 | analysis.visit(node) 649 | 650 | # Add control flow edges (CFG_NEXT). 651 | for control_flow_node in control_flow_graph.get_control_flow_nodes(): 652 | instruction = control_flow_node.instruction 653 | for next_control_flow_node in control_flow_node.next: 654 | next_instruction = next_control_flow_node.instruction 655 | program_graph.add_new_edge( 656 | instruction.node, next_instruction.node, 657 | edge_type=pb.EdgeType.CFG_NEXT) 658 | 659 | # Add data flow edges (LAST_READ and LAST_WRITE). 660 | for control_flow_node in control_flow_graph.get_control_flow_nodes(): 661 | # Start with the most recent accesses before this instruction. 662 | last_accesses = control_flow_node.get_label('last_access_in').copy() 663 | for access in control_flow_node.instruction.accesses: 664 | # Extract the node and identifiers for the current access. 665 | pg_node = program_graph.get_node_by_access(access) 666 | access_name = instruction_module.access_name(access) 667 | read_identifier = instruction_module.access_identifier( 668 | access_name, 'read') 669 | write_identifier = instruction_module.access_identifier( 670 | access_name, 'write') 671 | # Find previous reads. 672 | for read in last_accesses.get(read_identifier, []): 673 | read_pg_node = program_graph.get_node_by_access(read) 674 | program_graph.add_new_edge( 675 | pg_node, read_pg_node, edge_type=pb.EdgeType.LAST_READ) 676 | # Find previous writes. 677 | for write in last_accesses.get(write_identifier, []): 678 | write_pg_node = program_graph.get_node_by_access(write) 679 | program_graph.add_new_edge( 680 | pg_node, write_pg_node, edge_type=pb.EdgeType.LAST_WRITE) 681 | # Update the state to refer to this access as the most recent one. 682 | if instruction_module.access_is_read(access): 683 | last_accesses[read_identifier] = [access] 684 | elif instruction_module.access_is_write(access): 685 | last_accesses[write_identifier] = [access] 686 | 687 | # Add COMPUTED_FROM edges. 688 | for node in ast.walk(program_node): 689 | if isinstance(node, ast.Assign): 690 | for value_node in ast.walk(node.value): 691 | if isinstance(value_node, ast.Name): 692 | # TODO(dbieber): If possible, improve precision of these edges. 693 | for target in node.targets: 694 | program_graph.add_new_edge( 695 | target, value_node, edge_type=pb.EdgeType.COMPUTED_FROM) 696 | 697 | # Add CALLS, FORMAL_ARG_NAME and RETURNS_TO edges. 698 | for node in ast.walk(program_node): 699 | if isinstance(node, ast.Call): 700 | if isinstance(node.func, ast.Name): 701 | # TODO(dbieber): Use data flow analysis instead of all function defs. 702 | func_defs = list(program_graph.get_nodes_by_function_name(node.func.id)) 703 | # For any possible last writes that are a function definition, add the 704 | # formal_arg_name and returns_to edges. 705 | if not func_defs: 706 | # TODO(dbieber): Add support for additional classes of functions, 707 | # such as attributes of known objects and builtins. 708 | if node.func.id in dir(builtins): 709 | message = 'Function is builtin.' 710 | else: 711 | message = 'Cannot statically determine the function being called.' 712 | logging.debug('%s (%s)', message, node.func.id) 713 | for func_def in func_defs: 714 | fn_node = func_def.node 715 | # Add calls edge from the call node to the function definition. 716 | program_graph.add_new_edge(node, fn_node, edge_type=pb.EdgeType.CALLS) 717 | # Add returns_to edges from the function's return statements to the 718 | # call node. 719 | for inner_node in ast.walk(func_def.node): 720 | # TODO(dbieber): Determine if the returns_to should instead go to 721 | # the next instruction after the Call node instead. 722 | if isinstance(inner_node, ast.Return): 723 | program_graph.add_new_edge( 724 | inner_node, node, edge_type=pb.EdgeType.RETURNS_TO) 725 | 726 | # Add formal_arg_name edges from the args of the Call node to the 727 | # args in the FunctionDef. 728 | for index, arg in enumerate(node.args): 729 | formal_arg = None 730 | if index < len(fn_node.args.args): 731 | formal_arg = fn_node.args.args[index] 732 | elif fn_node.args.vararg: 733 | # Since args.vararg is a string, we use the arguments node. 734 | # TODO(dbieber): Use a node specifically for the vararg. 735 | formal_arg = fn_node.args 736 | if formal_arg is not None: 737 | # Note: formal_arg can be an AST node or a string. 738 | program_graph.add_new_edge( 739 | arg, formal_arg, edge_type=pb.EdgeType.FORMAL_ARG_NAME) 740 | else: 741 | # TODO(dbieber): If formal_arg is None, then remove all 742 | # formal_arg_name edges for this FunctionDef. 743 | logging.debug('formal_arg is None') 744 | for keyword in node.keywords: 745 | name = keyword.arg 746 | formal_arg = None 747 | for arg in fn_node.args.args: 748 | if isinstance(arg, ast.Name) and arg.id == name: 749 | formal_arg = arg 750 | break 751 | else: 752 | if fn_node.args.kwarg: 753 | # Since args.kwarg is a string, we use the arguments node. 754 | # TODO(dbieber): Use a node specifically for the kwarg. 755 | formal_arg = fn_node.args 756 | if formal_arg is not None: 757 | program_graph.add_new_edge( 758 | keyword.value, formal_arg, 759 | edge_type=pb.EdgeType.FORMAL_ARG_NAME) 760 | else: 761 | # TODO(dbieber): If formal_arg is None, then remove all 762 | # formal_arg_name edges for this FunctionDef. 763 | logging.debug('formal_arg is None') 764 | else: 765 | # TODO(dbieber): Add a special case for Attributes. 766 | logging.debug( 767 | 'Cannot statically determine the function being called. (%s)', 768 | astunparse.unparse(node.func).strip()) 769 | 770 | return program_graph 771 | 772 | 773 | class SyntaxNodeUnparser(unparser.Unparser): 774 | """An Unparser class helpful for creating Syntax Token nodes for fn graphs.""" 775 | 776 | def __init__(self, ast_node, graph): 777 | self.graph = graph 778 | 779 | self.current_ast_node = None # The AST node currently being unparsed. 780 | self.last_syntax_node = None 781 | self.last_lexical_uses = {} 782 | self.last_indent = 0 783 | 784 | with codecs.open(os.devnull, 'w', encoding='utf-8') as devnull: 785 | super(SyntaxNodeUnparser, self).__init__(ast_node, file=devnull) 786 | 787 | def dispatch(self, ast_node): 788 | """Dispatcher function, dispatching tree type T to method _T.""" 789 | tmp_ast_node = self.current_ast_node 790 | self.current_ast_node = ast_node 791 | super(SyntaxNodeUnparser, self).dispatch(ast_node) 792 | self.current_ast_node = tmp_ast_node 793 | 794 | def fill(self, text=''): 795 | """Indent a piece of text, according to the current indentation level.""" 796 | text_with_whitespace = NEWLINE_TOKEN 797 | if self.last_indent > self._indent: 798 | text_with_whitespace += UNINDENT_TOKEN * (self.last_indent - self._indent) 799 | elif self.last_indent < self._indent: 800 | text_with_whitespace += INDENT_TOKEN * (self._indent - self.last_indent) 801 | self.last_indent = self._indent 802 | text_with_whitespace += text 803 | self._add_syntax_node(text_with_whitespace) 804 | super(SyntaxNodeUnparser, self).fill(text) 805 | 806 | def write(self, text): 807 | """Append a piece of text to the current line.""" 808 | if isinstance(text, ast.AST): # text may be a Name, Tuple, or List node. 809 | return self.dispatch(text) 810 | self._add_syntax_node(text) 811 | super(SyntaxNodeUnparser, self).write(text) 812 | 813 | def _add_syntax_node(self, text): 814 | text = text.strip() 815 | if not text: 816 | return 817 | syntax_node = make_node_from_syntax(six.text_type(text)) 818 | self.graph.add_node(syntax_node) 819 | self.graph.add_new_edge( 820 | self.current_ast_node, syntax_node, edge_type=pb.EdgeType.SYNTAX) 821 | if self.last_syntax_node: 822 | self.graph.add_new_edge( 823 | self.last_syntax_node, syntax_node, edge_type=pb.EdgeType.NEXT_SYNTAX) 824 | self.last_syntax_node = syntax_node 825 | 826 | def _Name(self, node): 827 | if node.id in self.last_lexical_uses: 828 | self.graph.add_new_edge( 829 | node, 830 | self.last_lexical_uses[node.id], 831 | edge_type=pb.EdgeType.LAST_LEXICAL_USE) 832 | self.last_lexical_uses[node.id] = node 833 | super(SyntaxNodeUnparser, self)._Name(node) 834 | 835 | 836 | class ProgramGraphNode(object): 837 | """A single node in a Program Graph. 838 | 839 | Corresponds to either a SyntaxNode or an Instruction (as in a 840 | ControlFlowGraph). 841 | 842 | Attributes: 843 | node_type: One of the node types from pb.NodeType. 844 | id: A unique id for the node. 845 | instruction: If applicable, the corresponding Instruction. 846 | ast_node: If available, the AST node corresponding to the ProgramGraphNode. 847 | ast_type: If available, the type of the AST node, as a string. 848 | ast_value: If available, the primitive Python value corresponding to the 849 | node. 850 | syntax: For SYNTAX_NODEs, the syntax information stored in the node. 851 | node: If available, the AST node for this program graph node or its 852 | instruction. 853 | """ 854 | 855 | def __init__(self): 856 | self.node_type = None 857 | self.id = None 858 | 859 | self.instruction = None 860 | self.ast_node = None 861 | self.ast_type = '' 862 | self.ast_value = '' 863 | self.syntax = '' 864 | 865 | def has_instruction(self): 866 | return self.instruction is not None 867 | 868 | def has_instance_of(self, t): 869 | """Whether the node's instruction is an instance of type `t`.""" 870 | if self.instruction is None: 871 | return False 872 | return isinstance(self.instruction.node, t) 873 | 874 | @property 875 | def node(self): 876 | if self.ast_node is not None: 877 | return self.ast_node 878 | if self.instruction is None: 879 | return None 880 | return self.instruction.node 881 | 882 | def __repr__(self): 883 | return str(self.id) + ' ' + str(self.ast_type) 884 | 885 | 886 | def make_node_from_syntax(text): 887 | node = ProgramGraphNode() 888 | node.node_type = pb.NodeType.SYNTAX_NODE 889 | node.id = program_utils.unique_id() 890 | node.syntax = text 891 | return node 892 | 893 | 894 | def make_node_from_instruction(instruction): 895 | """Creates a ProgramGraphNode corresponding to an existing Instruction. 896 | 897 | Args: 898 | instruction: An Instruction object. 899 | 900 | Returns: 901 | A ProgramGraphNode corresponding to that instruction. 902 | """ 903 | ast_node = instruction.node 904 | node = make_node_from_ast_node(ast_node) 905 | node.instruction = instruction 906 | return node 907 | 908 | 909 | def make_node_from_ast_node(ast_node): 910 | """Creates a program graph node for the provided AST node. 911 | 912 | This is only called when the AST node doesn't already correspond to an 913 | Instruction in the program's control flow graph. 914 | 915 | Args: 916 | ast_node: An AST node from the program being analyzed. 917 | 918 | Returns: 919 | A node in the program graph corresponding to the AST node. 920 | """ 921 | node = ProgramGraphNode() 922 | node.node_type = pb.NodeType.AST_NODE 923 | node.id = program_utils.unique_id() 924 | node.ast_node = ast_node 925 | node.ast_type = type(ast_node).__name__ 926 | return node 927 | 928 | 929 | def make_node_for_ast_list(): 930 | node = ProgramGraphNode() 931 | node.node_type = pb.NodeType.AST_LIST 932 | node.id = program_utils.unique_id() 933 | return node 934 | 935 | 936 | def make_node_from_ast_value(value): 937 | """Creates a ProgramGraphNode for the provided value. 938 | 939 | `value` is a primitive value appearing in a Python AST. 940 | 941 | For example, the number 1 in Python has AST Num(n=1). In this, the value '1' 942 | is a primitive appearing in the AST. It gets its own ProgramGraphNode with 943 | node_type AST_VALUE. 944 | 945 | Args: 946 | value: A primitive value appearing in an AST. 947 | 948 | Returns: 949 | A ProgramGraphNode corresponding to the provided value. 950 | """ 951 | node = ProgramGraphNode() 952 | node.node_type = pb.NodeType.AST_VALUE 953 | node.id = program_utils.unique_id() 954 | node.ast_value = value 955 | return node 956 | 957 | 958 | def make_list_field_name(field_name, index): 959 | return '{}:{}'.format(field_name, index) 960 | 961 | 962 | def parse_list_field_name(list_field_name): 963 | field_name, index = list_field_name.split(':') 964 | index = int(index) 965 | return field_name, index 966 | -------------------------------------------------------------------------------- /python_graphs/program_graph_dataclasses.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """The dataclasses for representing a Program Graph.""" 16 | 17 | import enum 18 | from typing import List, Optional, Text 19 | import dataclasses 20 | 21 | 22 | class NodeType(enum.Enum): 23 | UNSPECIFIED = 0 24 | AST_NODE = 1 25 | AST_LIST = 2 26 | AST_VALUE = 3 27 | SYNTAX_NODE = 4 28 | PLACEHOLDER = 5 29 | 30 | 31 | @dataclasses.dataclass 32 | class Node: 33 | """Represents a node in a program graph.""" 34 | id: int 35 | type: NodeType 36 | 37 | # If an AST node, a string that identifies what type of AST node, 38 | # e.g. "Num" or "Expr". These are defined by the underlying AST for the 39 | # language. 40 | ast_type: Optional[Text] = "" 41 | 42 | # Primitive valued AST node, such as: 43 | # - the name of an identifier for a Name node 44 | # - the number attached to a Num node 45 | # The corresponding ast_type value is the Python type of ast_value, not the 46 | # type of the parent AST node. 47 | ast_value_repr: Optional[Text] = "" 48 | 49 | # For syntax nodes, the syntax attached to the node. 50 | syntax: Optional[Text] = "" 51 | 52 | 53 | class EdgeType(enum.Enum): 54 | """The different kinds of edges that can appear in a program graph.""" 55 | UNSPECIFIED = 0 56 | CFG_NEXT = 1 57 | LAST_READ = 2 58 | LAST_WRITE = 3 59 | COMPUTED_FROM = 4 60 | RETURNS_TO = 5 61 | FORMAL_ARG_NAME = 6 62 | FIELD = 7 63 | SYNTAX = 8 64 | NEXT_SYNTAX = 9 65 | LAST_LEXICAL_USE = 10 66 | CALLS = 11 67 | 68 | 69 | @dataclasses.dataclass 70 | class Edge: 71 | id1: int 72 | id2: int 73 | type: EdgeType 74 | field_name: Optional[Text] = None # For FIELD edges, the field name. 75 | has_back_edge: bool = False 76 | 77 | 78 | @dataclasses.dataclass 79 | class Graph: 80 | nodes: List[Node] 81 | edges: List[Edge] 82 | root_id: int 83 | -------------------------------------------------------------------------------- /python_graphs/program_graph_graphviz.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Graphviz visualizations of Program Graphs.""" 16 | 17 | from absl import logging # pylint: disable=unused-import 18 | import pygraphviz 19 | from python_graphs import program_graph_dataclasses as pb 20 | import six 21 | 22 | 23 | def to_graphviz(graph): 24 | """Creates a graphviz representation of a ProgramGraph. 25 | 26 | Args: 27 | graph: A ProgramGraph object to visualize. 28 | Returns: 29 | A pygraphviz object representing the ProgramGraph. 30 | """ 31 | g = pygraphviz.AGraph(strict=False, directed=True) 32 | for unused_key, node in graph.nodes.items(): 33 | node_attrs = {} 34 | if node.ast_type: 35 | node_attrs['label'] = six.ensure_str(node.ast_type, 'utf-8') 36 | else: 37 | node_attrs['shape'] = 'point' 38 | node_type_colors = { 39 | } 40 | if node.node_type in node_type_colors: 41 | node_attrs['color'] = node_type_colors[node.node_type] 42 | node_attrs['colorscheme'] = 'svg' 43 | 44 | g.add_node(node.id, **node_attrs) 45 | for edge in graph.edges: 46 | edge_attrs = {} 47 | edge_attrs['label'] = edge.type.name 48 | edge_colors = { 49 | pb.EdgeType.LAST_READ: 'red', 50 | pb.EdgeType.LAST_WRITE: 'red', 51 | } 52 | if edge.type in edge_colors: 53 | edge_attrs['color'] = edge_colors[edge.type] 54 | edge_attrs['colorscheme'] = 'svg' 55 | g.add_edge(edge.id1, edge.id2, **edge_attrs) 56 | return g 57 | 58 | 59 | def render(graph, path='/tmp/graph.png'): 60 | g = to_graphviz(graph) 61 | g.draw(path, prog='dot') 62 | -------------------------------------------------------------------------------- /python_graphs/program_graph_graphviz_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for program_graph_graphviz.py.""" 16 | 17 | import inspect 18 | 19 | from absl.testing import absltest 20 | from python_graphs import control_flow_test_components as tc 21 | from python_graphs import program_graph 22 | from python_graphs import program_graph_graphviz 23 | 24 | 25 | class ControlFlowGraphvizTest(absltest.TestCase): 26 | 27 | def test_to_graphviz_for_all_test_components(self): 28 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction): 29 | graph = program_graph.get_program_graph(fn) 30 | program_graph_graphviz.to_graphviz(graph) 31 | 32 | 33 | if __name__ == '__main__': 34 | absltest.main() 35 | -------------------------------------------------------------------------------- /python_graphs/program_graph_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for program_graph.py.""" 16 | 17 | import collections 18 | import inspect 19 | import time 20 | 21 | from absl import logging 22 | from absl.testing import absltest 23 | import gast as ast 24 | 25 | from python_graphs import control_flow_test_components as cftc 26 | from python_graphs import program_graph 27 | from python_graphs import program_graph_dataclasses as pb 28 | from python_graphs import program_graph_test_components as pgtc 29 | from python_graphs import program_utils 30 | 31 | 32 | def get_test_components(): 33 | """Generates functions from two sets of test components. 34 | 35 | Yields: 36 | Functions from the program graph and control flow test components files. 37 | """ 38 | for unused_name, fn in inspect.getmembers(pgtc, predicate=inspect.isfunction): 39 | yield fn 40 | 41 | for unused_name, fn in inspect.getmembers(cftc, predicate=inspect.isfunction): 42 | yield fn 43 | 44 | 45 | class ProgramGraphTest(absltest.TestCase): 46 | 47 | def assertEdge(self, graph, n1, n2, edge_type): 48 | """Asserts that an edge of type edge_type exists from n1 to n2 in graph.""" 49 | edge = pb.Edge(id1=n1.id, id2=n2.id, type=edge_type) 50 | self.assertIn(edge, graph.edges) 51 | 52 | def assertNoEdge(self, graph, n1, n2, edge_type): 53 | """Asserts that no edge of type edge_type exists from n1 to n2 in graph.""" 54 | edge = pb.Edge(id1=n1.id, id2=n2.id, type=edge_type) 55 | self.assertNotIn(edge, graph.edges) 56 | 57 | def test_get_program_graph_test_components(self): 58 | self.analyze_get_program_graph(get_test_components(), start=0) 59 | 60 | def analyze_get_program_graph(self, program_generator, start=0): 61 | # TODO(dbieber): Remove the counting and logging logic from this method, 62 | # and instead just get_program_graph for each program in the generator. 63 | # The counting and logging logic is for development purposes only. 64 | num_edges = 0 65 | num_edges_by_type = collections.defaultdict(int) 66 | num_nodes = 0 67 | num_graphs = 1 68 | times = {} 69 | for index, program in enumerate(program_generator): 70 | if index < start: 71 | continue 72 | start_time = time.time() 73 | graph = program_graph.get_program_graph(program) 74 | end_time = time.time() 75 | times[index] = end_time - start_time 76 | num_edges += len(graph.edges) 77 | for edge in graph.edges: 78 | num_edges_by_type[edge.type] += 1 79 | num_nodes += len(graph.nodes) 80 | num_graphs += 1 81 | if index % 100 == 0: 82 | logging.debug(sorted(times.items(), key=lambda kv: -kv[1])[:10]) 83 | logging.info('%d %d %d', num_edges, num_nodes, num_graphs) 84 | logging.info('%f %f', num_edges / num_graphs, num_nodes / num_graphs) 85 | for edge_type in num_edges_by_type: 86 | logging.info('%s %f', edge_type, 87 | num_edges_by_type[edge_type] / num_graphs) 88 | 89 | logging.info(times) 90 | logging.info(sorted(times.items(), key=lambda kv: -kv[1])[:10]) 91 | 92 | def test_last_lexical_use_edges_function_call(self): 93 | graph = program_graph.get_program_graph(pgtc.function_call) 94 | read = graph.get_node_by_source_and_identifier('return z', 'z') 95 | write = graph.get_node_by_source_and_identifier( 96 | 'z = function_call_helper(x, y)', 'z') 97 | self.assertEdge(graph, read, write, pb.EdgeType.LAST_LEXICAL_USE) 98 | 99 | def test_last_write_edges_function_call(self): 100 | graph = program_graph.get_program_graph(pgtc.function_call) 101 | write_z = graph.get_node_by_source_and_identifier( 102 | 'z = function_call_helper(x, y)', 'z') 103 | read_z = graph.get_node_by_source_and_identifier('return z', 'z') 104 | self.assertEdge(graph, read_z, write_z, pb.EdgeType.LAST_WRITE) 105 | 106 | write_y = graph.get_node_by_source_and_identifier('y = 2', 'y') 107 | read_y = graph.get_node_by_source_and_identifier( 108 | 'z = function_call_helper(x, y)', 'y') 109 | self.assertEdge(graph, read_y, write_y, pb.EdgeType.LAST_WRITE) 110 | 111 | def test_last_read_edges_assignments(self): 112 | graph = program_graph.get_program_graph(pgtc.assignments) 113 | write_a0 = graph.get_node_by_source_and_identifier('a, b = 0, 0', 'a') 114 | read_a0 = graph.get_node_by_source_and_identifier('c = 2 * a + 1', 'a') 115 | write_a1 = graph.get_node_by_source_and_identifier('a = c + 3', 'a') 116 | self.assertEdge(graph, write_a1, read_a0, pb.EdgeType.LAST_READ) 117 | self.assertNoEdge(graph, write_a0, read_a0, pb.EdgeType.LAST_READ) 118 | 119 | read_a1 = graph.get_node_by_source_and_identifier('return a, b, c, d', 'a') 120 | self.assertEdge(graph, read_a1, read_a0, pb.EdgeType.LAST_READ) 121 | 122 | def test_last_read_last_write_edges_repeated_identifier(self): 123 | graph = program_graph.get_program_graph(pgtc.repeated_identifier) 124 | write_x0 = graph.get_node_by_source_and_identifier('x = 0', 'x') 125 | 126 | stmt1 = graph.get_node_by_source('x = x + 1').ast_node 127 | read_x0 = graph.get_node_by_ast_node(stmt1.value.left) 128 | write_x1 = graph.get_node_by_ast_node(stmt1.targets[0]) 129 | 130 | stmt2 = graph.get_node_by_source('x = (x + (x + x)) + x').ast_node 131 | read_x1 = graph.get_node_by_ast_node(stmt2.value.left.left) 132 | read_x2 = graph.get_node_by_ast_node(stmt2.value.left.right.left) 133 | read_x3 = graph.get_node_by_ast_node(stmt2.value.left.right.right) 134 | read_x4 = graph.get_node_by_ast_node(stmt2.value.right) 135 | write_x2 = graph.get_node_by_ast_node(stmt2.targets[0]) 136 | 137 | read_x5 = graph.get_node_by_source_and_identifier('return x', 'x') 138 | 139 | self.assertEdge(graph, write_x1, read_x0, pb.EdgeType.LAST_READ) 140 | self.assertEdge(graph, read_x1, read_x0, pb.EdgeType.LAST_READ) 141 | self.assertEdge(graph, read_x2, read_x1, pb.EdgeType.LAST_READ) 142 | self.assertEdge(graph, read_x3, read_x2, pb.EdgeType.LAST_READ) 143 | self.assertEdge(graph, read_x4, read_x3, pb.EdgeType.LAST_READ) 144 | self.assertEdge(graph, write_x2, read_x4, pb.EdgeType.LAST_READ) 145 | self.assertEdge(graph, read_x5, read_x4, pb.EdgeType.LAST_READ) 146 | 147 | self.assertEdge(graph, read_x0, write_x0, pb.EdgeType.LAST_WRITE) 148 | self.assertEdge(graph, write_x1, write_x0, pb.EdgeType.LAST_WRITE) 149 | self.assertEdge(graph, read_x2, write_x1, pb.EdgeType.LAST_WRITE) 150 | self.assertEdge(graph, read_x3, write_x1, pb.EdgeType.LAST_WRITE) 151 | self.assertEdge(graph, read_x4, write_x1, pb.EdgeType.LAST_WRITE) 152 | self.assertEdge(graph, write_x2, write_x1, pb.EdgeType.LAST_WRITE) 153 | self.assertEdge(graph, read_x5, write_x2, pb.EdgeType.LAST_WRITE) 154 | 155 | def test_computed_from_edges(self): 156 | graph = program_graph.get_program_graph(pgtc.assignments) 157 | target_c = graph.get_node_by_source_and_identifier('c = 2 * a + 1', 'c') 158 | from_a = graph.get_node_by_source_and_identifier('c = 2 * a + 1', 'a') 159 | self.assertEdge(graph, target_c, from_a, pb.EdgeType.COMPUTED_FROM) 160 | 161 | target_d = graph.get_node_by_source_and_identifier('d = b - c + 2', 'd') 162 | from_b = graph.get_node_by_source_and_identifier('d = b - c + 2', 'b') 163 | from_c = graph.get_node_by_source_and_identifier('d = b - c + 2', 'c') 164 | self.assertEdge(graph, target_d, from_b, pb.EdgeType.COMPUTED_FROM) 165 | self.assertEdge(graph, target_d, from_c, pb.EdgeType.COMPUTED_FROM) 166 | 167 | def test_calls_edges(self): 168 | graph = program_graph.get_program_graph(pgtc) 169 | call = graph.get_node_by_source('function_call_helper(x, y)') 170 | self.assertIsInstance(call.node, ast.Call) 171 | function_call_helper_def = graph.get_node_by_function_name( 172 | 'function_call_helper') 173 | assignments_def = graph.get_node_by_function_name('assignments') 174 | self.assertEdge(graph, call, function_call_helper_def, pb.EdgeType.CALLS) 175 | self.assertNoEdge(graph, call, assignments_def, pb.EdgeType.CALLS) 176 | 177 | def test_formal_arg_name_edges(self): 178 | graph = program_graph.get_program_graph(pgtc) 179 | x = graph.get_node_by_source_and_identifier('function_call_helper(x, y)', 180 | 'x') 181 | y = graph.get_node_by_source_and_identifier('function_call_helper(x, y)', 182 | 'y') 183 | function_call_helper_def = graph.get_node_by_function_name( 184 | 'function_call_helper') 185 | arg0_ast_node = function_call_helper_def.node.args.args[0] 186 | arg0 = graph.get_node_by_ast_node(arg0_ast_node) 187 | arg1_ast_node = function_call_helper_def.node.args.args[1] 188 | arg1 = graph.get_node_by_ast_node(arg1_ast_node) 189 | self.assertEdge(graph, x, arg0, pb.EdgeType.FORMAL_ARG_NAME) 190 | self.assertEdge(graph, y, arg1, pb.EdgeType.FORMAL_ARG_NAME) 191 | self.assertNoEdge(graph, x, arg1, pb.EdgeType.FORMAL_ARG_NAME) 192 | self.assertNoEdge(graph, y, arg0, pb.EdgeType.FORMAL_ARG_NAME) 193 | 194 | def test_returns_to_edges(self): 195 | graph = program_graph.get_program_graph(pgtc) 196 | call = graph.get_node_by_source('function_call_helper(x, y)') 197 | return_stmt = graph.get_node_by_source('return arg0 + arg1') 198 | self.assertEdge(graph, return_stmt, call, pb.EdgeType.RETURNS_TO) 199 | 200 | def test_syntax_information(self): 201 | # TODO(dbieber): Test that program graphs correctly capture syntax 202 | # information. Do this once representation of syntax in program graphs 203 | # stabilizes. 204 | pass 205 | 206 | def test_ast_acyclic(self): 207 | for name, fn in inspect.getmembers(cftc, predicate=inspect.isfunction): 208 | graph = program_graph.get_program_graph(fn) 209 | ast_nodes = set() 210 | worklist = [graph.root] 211 | while worklist: 212 | current = worklist.pop() 213 | self.assertNotIn( 214 | current, ast_nodes, 215 | 'ProgramGraph AST cyclic. Function {}\nAST {}'.format( 216 | name, graph.dump_tree())) 217 | ast_nodes.add(current) 218 | worklist.extend(graph.children(current)) 219 | 220 | def test_neighbors_children_consistent(self): 221 | for unused_name, fn in inspect.getmembers( 222 | cftc, predicate=inspect.isfunction): 223 | graph = program_graph.get_program_graph(fn) 224 | for node in graph.all_nodes(): 225 | if node.node_type == pb.NodeType.AST_NODE: 226 | children0 = set(graph.outgoing_neighbors(node, pb.EdgeType.FIELD)) 227 | children1 = set(graph.children(node)) 228 | self.assertEqual(children0, children1) 229 | 230 | def test_walk_ast_descendants(self): 231 | for unused_name, fn in inspect.getmembers( 232 | cftc, predicate=inspect.isfunction): 233 | graph = program_graph.get_program_graph(fn) 234 | for node in graph.walk_ast_descendants(): 235 | self.assertIn(node, graph.all_nodes()) 236 | 237 | def test_roundtrip_ast(self): 238 | for unused_name, fn in inspect.getmembers( 239 | cftc, predicate=inspect.isfunction): 240 | ast_representation = program_utils.program_to_ast(fn) 241 | graph = program_graph.get_program_graph(fn) 242 | ast_reproduction = graph.to_ast() 243 | self.assertEqual(ast.dump(ast_representation), ast.dump(ast_reproduction)) 244 | 245 | def test_reconstruct_missing_ast(self): 246 | for unused_name, fn in inspect.getmembers( 247 | cftc, predicate=inspect.isfunction): 248 | graph = program_graph.get_program_graph(fn) 249 | ast_original = graph.root.ast_node 250 | # Remove the AST. 251 | for node in graph.all_nodes(): 252 | node.ast_node = None 253 | # Reconstruct it. 254 | graph.reconstruct_ast() 255 | ast_reproduction = graph.root.ast_node 256 | # Check reconstruction. 257 | self.assertEqual(ast.dump(ast_original), ast.dump(ast_reproduction)) 258 | # Check that all AST_NODE nodes are set. 259 | for node in graph.all_nodes(): 260 | if node.node_type == pb.NodeType.AST_NODE: 261 | self.assertIsInstance(node.ast_node, ast.AST) 262 | self.assertIs(graph.get_node_by_ast_node(node.ast_node), node) 263 | # Check that old AST nodes are no longer referenced. 264 | self.assertFalse(graph.contains_ast_node(ast_original)) 265 | 266 | def test_remove(self): 267 | graph = program_graph.get_program_graph(pgtc.assignments) 268 | 269 | for edge in list(graph.edges)[:]: 270 | # Remove the edge. 271 | graph.remove_edge(edge) 272 | self.assertNotIn(edge, graph.edges) 273 | self.assertNotIn((edge, edge.id2), graph.neighbors_map[edge.id1]) 274 | self.assertNotIn((edge, edge.id1), graph.neighbors_map[edge.id2]) 275 | 276 | if edge.type == pb.EdgeType.FIELD: 277 | self.assertNotIn(edge.id2, graph.child_map[edge.id1]) 278 | self.assertNotIn(edge.id2, graph.parent_map) 279 | 280 | # Add the edge again. 281 | graph.add_edge(edge) 282 | self.assertIn(edge, graph.edges) 283 | self.assertIn((edge, edge.id2), graph.neighbors_map[edge.id1]) 284 | self.assertIn((edge, edge.id1), graph.neighbors_map[edge.id2]) 285 | 286 | if edge.type == pb.EdgeType.FIELD: 287 | self.assertIn(edge.id2, graph.child_map[edge.id1]) 288 | self.assertIn(edge.id2, graph.parent_map) 289 | 290 | 291 | if __name__ == '__main__': 292 | absltest.main() 293 | -------------------------------------------------------------------------------- /python_graphs/program_graph_test_components.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Test components for testing program graphs.""" 16 | 17 | 18 | # pylint: disable=missing-docstring 19 | # pylint: disable=pointless-statement,undefined-variable 20 | # pylint: disable=unused-variable,unused-argument 21 | # pylint: disable=bare-except,lost-exception,unreachable 22 | # pylint: disable=global-variable-undefined 23 | def function_call(): 24 | x = 1 25 | y = 2 26 | z = function_call_helper(x, y) 27 | return z 28 | 29 | 30 | def function_call_helper(arg0, arg1): 31 | return arg0 + arg1 32 | 33 | 34 | def assignments(): 35 | a, b = 0, 0 36 | c = 2 * a + 1 37 | d = b - c + 2 38 | a = c + 3 39 | return a, b, c, d 40 | 41 | 42 | def fn_with_globals(): 43 | global global_a, global_b, global_c 44 | global_a = 10 45 | global_b = 20 46 | global_c = 30 47 | return global_a + global_b + global_c 48 | 49 | 50 | def fn_with_inner_fn(): 51 | 52 | def inner_fn(): 53 | while True: 54 | pass 55 | 56 | 57 | def repeated_identifier(): 58 | x = 0 59 | x = x + 1 60 | x = (x + (x + x)) + x 61 | return x 62 | -------------------------------------------------------------------------------- /python_graphs/program_graph_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | r"""Create program graph visualizations for the test components. 16 | 17 | 18 | Usage: 19 | python -m python_graphs.program_graph_visualizer 20 | """ 21 | 22 | import inspect 23 | 24 | from absl import app 25 | from absl import logging # pylint: disable=unused-import 26 | 27 | from python_graphs import control_flow_test_components as tc 28 | from python_graphs import program_graph 29 | from python_graphs import program_graph_graphviz 30 | 31 | 32 | def render_functions(functions): 33 | for name, function in functions: 34 | logging.info(name) 35 | graph = program_graph.get_program_graph(function) 36 | path = '/tmp/program_graphs/{}.png'.format(name) 37 | program_graph_graphviz.render(graph, path=path) 38 | 39 | 40 | def main(argv): 41 | del argv # Unused. 42 | 43 | functions = [ 44 | (name, fn) 45 | for name, fn in inspect.getmembers(tc, predicate=inspect.isfunction) 46 | ] 47 | render_functions(functions) 48 | 49 | 50 | if __name__ == '__main__': 51 | app.run(main) 52 | -------------------------------------------------------------------------------- /python_graphs/program_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Program utility functions.""" 16 | 17 | import inspect 18 | import textwrap 19 | import uuid 20 | 21 | import gast as ast 22 | import six 23 | 24 | 25 | def getsource(obj): 26 | """Gets the source for the given object. 27 | 28 | Args: 29 | obj: A module, class, method, function, traceback, frame, or code object. 30 | Returns: 31 | The source of the object, if available. 32 | """ 33 | if inspect.ismethod(obj): 34 | func = obj.__func__ 35 | else: 36 | func = obj 37 | source = inspect.getsource(func) 38 | return textwrap.dedent(source) 39 | 40 | 41 | def program_to_ast(program): 42 | """Convert a program to its AST. 43 | 44 | Args: 45 | program: Either an AST node, source string, or a function. 46 | Returns: 47 | The root AST node of the AST representing the program. 48 | """ 49 | if isinstance(program, ast.AST): 50 | return program 51 | if isinstance(program, six.string_types): 52 | source = program 53 | else: 54 | source = getsource(program) 55 | module_node = ast.parse(source, mode='exec') 56 | return module_node 57 | 58 | 59 | def unique_id(): 60 | """Returns a unique id that is suitable for identifying graph nodes.""" 61 | return uuid.uuid4().int & ((1 << 64) - 1) 62 | 63 | -------------------------------------------------------------------------------- /python_graphs/unparser_patch.py: -------------------------------------------------------------------------------- 1 | import astunparse 2 | import gast as ast 3 | 4 | 5 | astunparse.Unparser.boolops = {'And': 'and', 'Or': 'or', ast.And: 'and', ast.Or: 'or'} 6 | 7 | 8 | def _arguments(self, t): 9 | first = True 10 | # normal arguments 11 | all_args = getattr(t, 'posonlyargs', []) + t.args 12 | defaults = [None] * (len(all_args) - len(t.defaults)) + t.defaults 13 | for index, elements in enumerate(zip(all_args, defaults), 1): 14 | a, d = elements 15 | if first:first = False 16 | else: self.write(", ") 17 | self.dispatch(a) 18 | if d: 19 | self.write("=") 20 | self.dispatch(d) 21 | if index == len(getattr(t, 'posonlyargs', ())): 22 | self.write(", /") 23 | 24 | # varargs, or bare '*' if no varargs but keyword-only arguments present 25 | if t.vararg or getattr(t, "kwonlyargs", False): 26 | if first:first = False 27 | else: self.write(", ") 28 | self.write("*") 29 | if t.vararg: 30 | if hasattr(t.vararg, 'arg'): 31 | self.write(t.vararg.arg) 32 | if t.vararg.annotation: 33 | self.write(": ") 34 | self.dispatch(t.vararg.annotation) 35 | else: 36 | self.write(t.vararg) 37 | if getattr(t, 'varargannotation', None): 38 | self.write(": ") 39 | self.dispatch(t.varargannotation) 40 | 41 | # keyword-only arguments 42 | if getattr(t, "kwonlyargs", False): 43 | for a, d in zip(t.kwonlyargs, t.kw_defaults): 44 | if first:first = False 45 | else: self.write(", ") 46 | self.dispatch(a), 47 | if d: 48 | self.write("=") 49 | self.dispatch(d) 50 | 51 | # kwargs 52 | if t.kwarg: 53 | if first:first = False 54 | else: self.write(", ") 55 | if hasattr(t.kwarg, 'arg'): 56 | self.write("**"+t.kwarg.arg) 57 | if t.kwarg.annotation: 58 | self.write(": ") 59 | self.dispatch(t.kwarg.annotation) 60 | elif hasattr(t.kwarg, 'id'): # if this is a gast._arguments 61 | self.write("**"+t.kwarg.id) 62 | if t.kwarg.annotation: 63 | self.write(": ") 64 | self.dispatch(t.kwarg.annotation) 65 | else: 66 | self.write("**"+t.kwarg) 67 | if getattr(t, 'kwargannotation', None): 68 | self.write(": ") 69 | self.dispatch(t.kwargannotation) 70 | 71 | astunparse.Unparser._arguments = _arguments 72 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | . 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Google Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """The setup.py file for python_graphs.""" 16 | 17 | from setuptools import setup 18 | 19 | LONG_DESCRIPTION = """ 20 | python_graphs is a static analysis tool for performing control flow and data 21 | flow analyses on Python programs, and for constructing Program Graphs. 22 | Python Program Graphs are graph representations of Python programs suitable 23 | for use with graph neural networks. 24 | """.strip() 25 | 26 | SHORT_DESCRIPTION = """ 27 | A library for generating graph representations of Python programs.""".strip() 28 | 29 | DEPENDENCIES = [ 30 | 'absl-py', 31 | 'astunparse', 32 | 'gast', 33 | 'networkx', 34 | 'pygraphviz', 35 | 'six', 36 | ] 37 | 38 | TEST_DEPENDENCIES = [ 39 | ] 40 | 41 | VERSION = '1.3.0' 42 | URL = 'https://github.com/google-research/python-graphs' 43 | 44 | setup( 45 | name='python_graphs', 46 | version=VERSION, 47 | description=SHORT_DESCRIPTION, 48 | long_description=LONG_DESCRIPTION, 49 | url=URL, 50 | 51 | author='David Bieber', 52 | author_email='dbieber@google.com', 53 | license='Apache Software License', 54 | 55 | classifiers=[ 56 | 'Development Status :: 4 - Beta', 57 | 58 | 'Intended Audience :: Developers', 59 | 'Topic :: Software Development :: Libraries :: Python Modules', 60 | 61 | 'License :: OSI Approved :: Apache Software License', 62 | 63 | 'Programming Language :: Python', 64 | 'Programming Language :: Python :: 3', 65 | 'Programming Language :: Python :: 3.6', 66 | 'Programming Language :: Python :: 3.7', 67 | 'Programming Language :: Python :: 3.8', 68 | 'Programming Language :: Python :: 3.9', 69 | 70 | 'Operating System :: OS Independent', 71 | 'Operating System :: POSIX', 72 | 'Operating System :: MacOS', 73 | 'Operating System :: Unix', 74 | ], 75 | 76 | keywords='python program control flow data flow graph neural network', 77 | 78 | packages=['python_graphs'], 79 | 80 | install_requires=DEPENDENCIES, 81 | tests_require=TEST_DEPENDENCIES, 82 | ) 83 | --------------------------------------------------------------------------------