├── .github
├── scripts
│ └── build.sh
└── workflows
│ └── build.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── examples
├── __init__.py
├── control_flow_example.py
├── cyclomatic_complexity_example.py
└── program_graph_example.py
├── python_graphs
├── __init__.py
├── analysis
│ ├── __init__.py
│ ├── program_graph_analysis.py
│ ├── program_graph_analysis_test.py
│ └── run_program_graph_analysis.py
├── control_flow.py
├── control_flow_graphviz.py
├── control_flow_graphviz_test.py
├── control_flow_test.py
├── control_flow_test_components.py
├── control_flow_visualizer.py
├── cyclomatic_complexity.py
├── cyclomatic_complexity_test.py
├── data_flow.py
├── data_flow_test.py
├── instruction.py
├── instruction_test.py
├── program_graph.py
├── program_graph_dataclasses.py
├── program_graph_graphviz.py
├── program_graph_graphviz_test.py
├── program_graph_test.py
├── program_graph_test_components.py
├── program_graph_visualizer.py
├── program_utils.py
└── unparser_patch.py
├── requirements.txt
└── setup.py
/.github/scripts/build.sh:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2022 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | #!/usr/bin/env bash
16 |
17 | # Exit when any command fails.
18 | set -e
19 |
20 | PYTHON_VERSION=${PYTHON_VERSION:-3.7}
21 |
22 | pip install --upgrade setuptools pip
23 | pip install --upgrade pylint pytest pytest-pylint pytest-runner
24 | sudo apt install libgraphviz-dev
25 | python setup.py develop
26 | python -m pytest # Run the tests.
27 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: python_graphs
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | matrix:
10 | python-version: [3.8, 3.9]
11 |
12 | steps:
13 | - name: Checkout the repository
14 | uses: actions/checkout@v2
15 |
16 | - name: Set up Python ${{ matrix.python-version }}
17 | uses: actions/setup-python@v2
18 | with:
19 | python-version: ${{ matrix.python-version }}
20 |
21 | # Build using the build.sh script.
22 | - name: Run build script
23 | shell: bash
24 | run: ./.github/scripts/build.sh
25 | env:
26 | PYTHON_VERSION: ${{ matrix.python-version }}
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | scratch
3 |
4 | .DS_Store
5 | __MACOSX
6 |
7 | *~
8 | __pycache__
9 | .pytest_cache
10 | python_graphs.egg-info
11 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement (CLA). You (or your employer) retain the copyright to your
10 | contribution; this simply gives us permission to use and redistribute your
11 | contributions as part of the project. Head over to
12 | to see your current agreements on file or
13 | to sign a new one.
14 |
15 | You generally only need to submit a CLA once, so if you've already submitted one
16 | (even if it was for a different project), you probably don't need to do it
17 | again.
18 |
19 | ## Code Reviews
20 |
21 | All submissions, including submissions by project members, require review. For
22 | external contributions, we use GitHub pull requests for this purpose. Consult
23 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
24 | information on using pull requests.
25 |
26 | ## Community Guidelines
27 |
28 | This project follows
29 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
30 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # python_graphs
2 |
3 | This package is for computing graph representations of Python programs for
4 | machine learning applications. It includes the following modules:
5 |
6 | * `control_flow` For computing control flow graphs statically from Python
7 | programs.
8 | * `data_flow` For computing data flow analyses of Python programs.
9 | * `program_graph` For computing graphs statically to represent arbitrary
10 | Python programs or functions.
11 | * `cyclomatic_complexity` For computing the cyclomatic complexity of a Python function.
12 |
13 |
14 | ## Installation
15 |
16 | To install python_graphs with pip, run: `pip install python_graphs`.
17 |
18 | To install python_graphs from source, run: `python setup.py develop`.
19 |
20 | ## Common Tasks
21 |
22 | **Generate a control flow graph from a function `fn`:**
23 |
24 | ```python
25 | from python_graphs import control_flow
26 | graph = control_flow.get_control_flow_graph(fn)
27 | ```
28 |
29 | **Generate a program graph from a function `fn`:**
30 |
31 | ```python
32 | from python_graphs import program_graph
33 | graph = program_graph.get_program_graph(fn)
34 | ```
35 |
36 | **Compute the cyclomatic complexity of a function `fn`:**
37 |
38 | ```python
39 | from python_graphs import control_flow
40 | from python_graphs import cyclomatic_complexity
41 | graph = control_flow.get_control_flow_graph(fn)
42 | value = cyclomatic_complexity.cyclomatic_complexity(graph)
43 | ```
44 |
45 | ---
46 |
47 | This is not an officially supported Google product.
48 |
--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/python-graphs/0201fb090b9224dfb1a9a05ce836e9ef6da8ccc9/examples/__init__.py
--------------------------------------------------------------------------------
/examples/control_flow_example.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Example generating a control flow graph from a Python function.
16 |
17 | Generates an image visualizing the control flow graph for each of the functions
18 | in control_flow_test_components.py. Saves the resulting images to the directory
19 | `out`.
20 |
21 | Usage:
22 | python -m examples.control_flow_example
23 | """
24 |
25 | import inspect
26 | import os
27 |
28 | from absl import app
29 |
30 | from python_graphs import control_flow
31 | from python_graphs import control_flow_graphviz
32 | from python_graphs import control_flow_test_components as tc
33 | from python_graphs import program_utils
34 |
35 |
36 | def plot_control_flow_graph(fn, path):
37 | graph = control_flow.get_control_flow_graph(fn)
38 | source = program_utils.getsource(fn)
39 | control_flow_graphviz.render(graph, include_src=source, path=path)
40 |
41 |
42 | def main(argv) -> None:
43 | del argv # Unused
44 |
45 | # Create the output directory.
46 | os.makedirs('out', exist_ok=True)
47 |
48 | # For each function in control_flow_test_components.py, visualize its
49 | # control flow graph. Save the results in the output directory.
50 | for name, fn in inspect.getmembers(tc, predicate=inspect.isfunction):
51 | path = f'out/{name}_cfg.png'
52 | plot_control_flow_graph(fn, path)
53 | print('Done. See the `out` directory for the results.')
54 |
55 |
56 | if __name__ == '__main__':
57 | app.run(main)
58 |
--------------------------------------------------------------------------------
/examples/cyclomatic_complexity_example.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Example computing the cyclomatic complexity of various Python functions.
16 |
17 | For each of the functions in control_flow_test_components.py, this computes and
18 | prints the function's cyclomatic complexity.
19 |
20 | Usage:
21 | python -m examples.cyclomatic_complexity_example
22 | """
23 |
24 | import inspect
25 |
26 | from absl import app
27 |
28 | from python_graphs import control_flow
29 | from python_graphs import control_flow_test_components as tc
30 | from python_graphs import cyclomatic_complexity
31 |
32 |
33 | def main(argv) -> None:
34 | del argv # Unused
35 |
36 | # For each function in control_flow_test_components.py, compute its cyclomatic
37 | # complexity and print the result.
38 | for name, fn in inspect.getmembers(tc, predicate=inspect.isfunction):
39 | print(f'{name}: ', end='')
40 | graph = control_flow.get_control_flow_graph(fn)
41 | value = cyclomatic_complexity.cyclomatic_complexity(graph)
42 | print(value)
43 |
44 |
45 | if __name__ == '__main__':
46 | app.run(main)
47 |
--------------------------------------------------------------------------------
/examples/program_graph_example.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Example generating a complete program graph from a Python function.
16 |
17 | Generates an image visualizing the complete program graph for each function
18 | in program_graph_test_components.py. Saves the resulting images to the directory
19 | `out`.
20 |
21 | Usage:
22 | python -m examples.program_graph_example
23 | """
24 |
25 | import inspect
26 | import os
27 |
28 | from absl import app
29 | from python_graphs import program_graph
30 | from python_graphs import program_graph_graphviz
31 | from python_graphs import program_graph_test_components as tc
32 |
33 |
34 | def main(argv) -> None:
35 | del argv # Unused
36 |
37 | # Create the output directory.
38 | os.makedirs('out', exist_ok=True)
39 |
40 | # For each function in program_graph_test_components.py, visualize its
41 | # program graph. Save the results in the output directory.
42 | for name, fn in inspect.getmembers(tc, predicate=inspect.isfunction):
43 | path = f'out/{name}-program-graph.png'
44 | graph = program_graph.get_program_graph(fn)
45 | program_graph_graphviz.render(graph, path=path)
46 | print('Done. See the `out` directory for the results.')
47 |
48 |
49 | if __name__ == '__main__':
50 | app.run(main)
51 |
--------------------------------------------------------------------------------
/python_graphs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/python-graphs/0201fb090b9224dfb1a9a05ce836e9ef6da8ccc9/python_graphs/__init__.py
--------------------------------------------------------------------------------
/python_graphs/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google-research/python-graphs/0201fb090b9224dfb1a9a05ce836e9ef6da8ccc9/python_graphs/analysis/__init__.py
--------------------------------------------------------------------------------
/python_graphs/analysis/program_graph_analysis.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Functions to analyze program graphs.
16 |
17 | Computes properties such as the height of a program graph's AST.
18 | """
19 |
20 | import gast as ast
21 | import networkx as nx
22 |
23 |
24 | def num_nodes(graph):
25 | """Returns the number of nodes in a ProgramGraph."""
26 | return len(graph.all_nodes())
27 |
28 |
29 | def num_edges(graph):
30 | """Returns the number of edges in a ProgramGraph."""
31 | return len(graph.edges)
32 |
33 |
34 | def ast_height(ast_node):
35 | """Computes the height of an AST from the given node.
36 |
37 | Args:
38 | ast_node: An AST node.
39 |
40 | Returns:
41 | The height of the AST starting at ast_node. A leaf node or single-node AST
42 | has a height of 1.
43 | """
44 | max_child_height = 0
45 | for child_node in ast.iter_child_nodes(ast_node):
46 | max_child_height = max(max_child_height, ast_height(child_node))
47 | return 1 + max_child_height
48 |
49 |
50 | def graph_ast_height(graph):
51 | """Computes the height of the AST of a ProgramGraph.
52 |
53 | Args:
54 | graph: A ProgramGraph.
55 |
56 | Returns:
57 | The height of the graph's AST. A single-node AST has a height of 1.
58 | """
59 | return ast_height(graph.to_ast())
60 |
61 |
62 | def degrees(graph):
63 | """Returns a list of node degrees in a ProgramGraph.
64 |
65 | Args:
66 | graph: A ProgramGraph.
67 |
68 | Returns:
69 | An (unsorted) list of node degrees (in-degree plus out-degree).
70 | """
71 | return [len(graph.neighbors(node)) for node in graph.all_nodes()]
72 |
73 |
74 | def in_degrees(graph):
75 | """Returns a list of node in-degrees in a ProgramGraph.
76 |
77 | Args:
78 | graph: A ProgramGraph.
79 |
80 | Returns:
81 | An (unsorted) list of node in-degrees.
82 | """
83 | return [len(graph.incoming_neighbors(node)) for node in graph.all_nodes()]
84 |
85 |
86 | def out_degrees(graph):
87 | """Returns a list of node out-degrees in a ProgramGraph.
88 |
89 | Args:
90 | graph: A ProgramGraph.
91 |
92 | Returns:
93 | An (unsorted) list of node out-degrees.
94 | """
95 | return [len(graph.outgoing_neighbors(node)) for node in graph.all_nodes()]
96 |
97 |
98 | def _program_graph_to_nx(program_graph, directed=False):
99 | """Converts a ProgramGraph to a NetworkX graph.
100 |
101 | Args:
102 | program_graph: A ProgramGraph.
103 | directed: Whether the graph should be treated as a directed graph.
104 |
105 | Returns:
106 | A NetworkX graph that can be analyzed by the networkx module.
107 | """
108 | # Create a dict-of-lists representation, where {0: [1]} represents a directed
109 | # edge from node 0 to node 1.
110 | dict_of_lists = {}
111 | for node in program_graph.all_nodes():
112 | neighbor_ids = [neighbor.id
113 | for neighbor in program_graph.outgoing_neighbors(node)]
114 | dict_of_lists[node.id] = neighbor_ids
115 | return nx.DiGraph(dict_of_lists) if directed else nx.Graph(dict_of_lists)
116 |
117 |
118 | def diameter(graph):
119 | """Returns the diameter of a ProgramGraph.
120 |
121 | Note: this is very slow for large graphs.
122 |
123 | Args:
124 | graph: A ProgramGraph.
125 |
126 | Returns:
127 | The diameter of the graph. A single-node graph has diameter 0. The graph is
128 | treated as an undirected graph.
129 |
130 | Raises:
131 | networkx.exception.NetworkXError: Raised if the graph is not connected.
132 | """
133 | nx_graph = _program_graph_to_nx(graph, directed=False)
134 | return nx.algorithms.distance_measures.diameter(nx_graph)
135 |
136 |
137 | def max_betweenness(graph):
138 | """Returns the maximum node betweenness centrality in a ProgramGraph.
139 |
140 | Note: this is very slow for large graphs.
141 |
142 | Args:
143 | graph: A ProgramGraph.
144 |
145 | Returns:
146 | The maximum betweenness centrality value among all nodes in the graph. The
147 | graph is treated as an undirected graph.
148 | """
149 | nx_graph = _program_graph_to_nx(graph, directed=False)
150 | return max(nx.algorithms.centrality.betweenness_centrality(nx_graph).values())
151 |
--------------------------------------------------------------------------------
/python_graphs/analysis/program_graph_analysis_test.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for program_graph_analysis.py."""
16 |
17 | from absl.testing import absltest
18 | import gast as ast
19 | import networkx as nx
20 |
21 | from python_graphs import program_graph
22 | from python_graphs.analysis import program_graph_analysis as pga
23 |
24 |
25 | class ProgramGraphAnalysisTest(absltest.TestCase):
26 |
27 | def setUp(self):
28 | super(ProgramGraphAnalysisTest, self).setUp()
29 | self.singleton = self.create_singleton_graph()
30 | self.disconnected = self.create_disconnected_graph()
31 | self.cycle_3 = self.create_cycle_3()
32 | self.chain_4 = self.create_chain_4()
33 | self.wide_tree = self.create_wide_tree()
34 |
35 | def create_singleton_graph(self):
36 | """Returns a graph with one node and zero edges."""
37 | graph = program_graph.ProgramGraph()
38 | node = program_graph.make_node_from_syntax('singleton_node')
39 | graph.add_node(node)
40 | graph.root_id = node.id
41 | return graph
42 |
43 | def create_disconnected_graph(self):
44 | """Returns a disconnected graph with two nodes and zero edges."""
45 | graph = program_graph.ProgramGraph()
46 | a = program_graph.make_node_from_syntax('a')
47 | b = program_graph.make_node_from_syntax('b')
48 | graph.add_node(a)
49 | graph.add_node(b)
50 | graph.root_id = a.id
51 | return graph
52 |
53 | def create_cycle_3(self):
54 | """Returns a 3-cycle graph, A -> B -> C -> A."""
55 | graph = program_graph.ProgramGraph()
56 | a = program_graph.make_node_from_syntax('A')
57 | b = program_graph.make_node_from_ast_value('B')
58 | c = program_graph.make_node_from_syntax('C')
59 | graph.add_node(a)
60 | graph.add_node(b)
61 | graph.add_node(c)
62 | graph.add_new_edge(a, b)
63 | graph.add_new_edge(b, c)
64 | graph.add_new_edge(c, a)
65 | graph.root_id = a.id
66 | return graph
67 |
68 | def create_chain_4(self):
69 | """Returns a chain of 4 nodes, A -> B -> C -> D."""
70 | graph = program_graph.ProgramGraph()
71 | a = program_graph.make_node_from_syntax('A')
72 | b = program_graph.make_node_from_ast_value('B')
73 | c = program_graph.make_node_from_syntax('C')
74 | d = program_graph.make_node_from_ast_value('D')
75 | graph.add_node(a)
76 | graph.add_node(b)
77 | graph.add_node(c)
78 | graph.add_node(d)
79 | graph.add_new_edge(a, b)
80 | graph.add_new_edge(b, c)
81 | graph.add_new_edge(c, d)
82 | graph.root_id = a.id
83 | return graph
84 |
85 | def create_wide_tree(self):
86 | """Returns a tree where the root has 4 children that are all leaves."""
87 | graph = program_graph.ProgramGraph()
88 | root = program_graph.make_node_from_syntax('root')
89 | graph.add_node(root)
90 | graph.root_id = root.id
91 | for i in range(4):
92 | leaf = program_graph.make_node_from_ast_value(i)
93 | graph.add_node(leaf)
94 | graph.add_new_edge(root, leaf)
95 | return graph
96 |
97 | def ids_from_cycle_3(self):
98 | """Returns a triplet of IDs from the 3-cycle graph in cycle order."""
99 | root = self.cycle_3.root
100 | id_a = root.id
101 | id_b = self.cycle_3.outgoing_neighbors(root)[0].id
102 | id_c = self.cycle_3.incoming_neighbors(root)[0].id
103 | return id_a, id_b, id_c
104 |
105 | def test_num_nodes_returns_expected(self):
106 | self.assertEqual(pga.num_nodes(self.singleton), 1)
107 | self.assertEqual(pga.num_nodes(self.disconnected), 2)
108 | self.assertEqual(pga.num_nodes(self.cycle_3), 3)
109 | self.assertEqual(pga.num_nodes(self.chain_4), 4)
110 | self.assertEqual(pga.num_nodes(self.wide_tree), 5)
111 |
112 | def test_num_edges_returns_expected(self):
113 | self.assertEqual(pga.num_edges(self.singleton), 0)
114 | self.assertEqual(pga.num_edges(self.disconnected), 0)
115 | self.assertEqual(pga.num_edges(self.cycle_3), 3)
116 | self.assertEqual(pga.num_edges(self.chain_4), 3)
117 | self.assertEqual(pga.num_edges(self.wide_tree), 4)
118 |
119 | def test_ast_height_returns_expected_for_constructed_expression_ast(self):
120 | # Testing the expression "1".
121 | # Height 3: Module -> Expr -> Num.
122 | ast_node = ast.Module(
123 | body=[ast.Expr(value=ast.Constant(value=1, kind=None))],
124 | type_ignores=[])
125 | self.assertEqual(pga.ast_height(ast_node), 3)
126 |
127 | # Testing the expression "1 + 1".
128 | # Height 4: Module -> Expr -> BinOp -> Num.
129 | ast_node = ast.Module(
130 | body=[
131 | ast.Expr(
132 | value=ast.BinOp(
133 | left=ast.Constant(value=1, kind=None),
134 | op=ast.Add(),
135 | right=ast.Constant(value=1, kind=None)))
136 | ],
137 | type_ignores=[])
138 | self.assertEqual(pga.ast_height(ast_node), 4)
139 |
140 | # Testing the expression "a + 1".
141 | # Height 5: Module -> Expr -> BinOp -> Name -> Load.
142 | ast_node = ast.Module(
143 | body=[
144 | ast.Expr(
145 | value=ast.BinOp(
146 | left=ast.Name(
147 | id='a',
148 | ctx=ast.Load(),
149 | annotation=None,
150 | type_comment=None),
151 | op=ast.Add(),
152 | right=ast.Constant(value=1, kind=None)))
153 | ],
154 | type_ignores=[])
155 | self.assertEqual(pga.ast_height(ast_node), 5)
156 |
157 | # Testing the expression "a.b + 1".
158 | # Height 6: Module -> Expr -> BinOp -> Attribute -> Name -> Load.
159 | ast_node = ast.Module(
160 | body=[
161 | ast.Expr(
162 | value=ast.BinOp(
163 | left=ast.Attribute(
164 | value=ast.Name(
165 | id='a',
166 | ctx=ast.Load(),
167 | annotation=None,
168 | type_comment=None),
169 | attr='b',
170 | ctx=ast.Load()),
171 | op=ast.Add(),
172 | right=ast.Constant(value=1, kind=None)))
173 | ],
174 | type_ignores=[])
175 | self.assertEqual(pga.ast_height(ast_node), 6)
176 |
177 | def test_ast_height_returns_expected_for_constructed_function_ast(self):
178 | # Testing the function declaration "def foo(n): return".
179 | # Height 5: Module -> FunctionDef -> arguments -> Name -> Param.
180 | ast_node = ast.Module(
181 | body=[
182 | ast.FunctionDef(
183 | name='foo',
184 | args=ast.arguments(
185 | args=[
186 | ast.Name(
187 | id='n',
188 | ctx=ast.Param(),
189 | annotation=None,
190 | type_comment=None)
191 | ],
192 | posonlyargs=[],
193 | vararg=None,
194 | kwonlyargs=[],
195 | kw_defaults=[],
196 | kwarg=None,
197 | defaults=[]),
198 | body=[ast.Return(value=None)],
199 | decorator_list=[],
200 | returns=None,
201 | type_comment=None)
202 | ],
203 | type_ignores=[])
204 | self.assertEqual(pga.ast_height(ast_node), 5)
205 |
206 | # Testing the function declaration "def foo(n): return n + 1".
207 | # Height 6: Module -> FunctionDef -> Return -> BinOp -> Name -> Load.
208 | ast_node = ast.Module(
209 | body=[
210 | ast.FunctionDef(
211 | name='foo',
212 | args=ast.arguments(
213 | args=[
214 | ast.Name(
215 | id='n',
216 | ctx=ast.Param(),
217 | annotation=None,
218 | type_comment=None)
219 | ],
220 | posonlyargs=[],
221 | vararg=None,
222 | kwonlyargs=[],
223 | kw_defaults=[],
224 | kwarg=None,
225 | defaults=[]),
226 | body=[
227 | ast.Return(
228 | value=ast.BinOp(
229 | left=ast.Name(
230 | id='n',
231 | ctx=ast.Load(),
232 | annotation=None,
233 | type_comment=None),
234 | op=ast.Add(),
235 | right=ast.Constant(value=1, kind=None)))
236 | ],
237 | decorator_list=[],
238 | returns=None,
239 | type_comment=None)
240 | ],
241 | type_ignores=[],
242 | )
243 | self.assertEqual(pga.ast_height(ast_node), 6)
244 |
245 | def test_ast_height_returns_expected_for_parsed_ast(self):
246 | # Height 3: Module -> Expr -> Num.
247 | self.assertEqual(pga.ast_height(ast.parse('1')), 3)
248 |
249 | # Height 6: Module -> Expr -> BinOp -> Attribute -> Name -> Load.
250 | self.assertEqual(pga.ast_height(ast.parse('a.b + 1')), 6)
251 |
252 | # Height 6: Module -> FunctionDef -> Return -> BinOp -> Name -> Load.
253 | self.assertEqual(pga.ast_height(ast.parse('def foo(n): return n + 1')), 6)
254 |
255 | # Height 9: Module -> FunctionDef -> If -> Return -> BinOp -> Call
256 | # -> BinOp -> Name -> Load.
257 | # Adding whitespace before "def foo" causes an IndentationError in parse().
258 | ast_node = ast.parse("""def foo(n):
259 | if n <= 0:
260 | return 0
261 | else:
262 | return 1 + foo(n - 1)
263 | """)
264 | self.assertEqual(pga.ast_height(ast_node), 9)
265 |
266 | def test_graph_ast_height_returns_expected(self):
267 | # Height 6: Module -> FunctionDef -> Return -> BinOp -> Name -> Load.
268 | def foo1(n):
269 | return n + 1
270 |
271 | graph = program_graph.get_program_graph(foo1)
272 | self.assertEqual(pga.graph_ast_height(graph), 6)
273 |
274 | # Height 9: Module -> FunctionDef -> If -> Return -> BinOp -> Call
275 | # -> BinOp -> Name -> Load.
276 | def foo2(n):
277 | if n <= 0:
278 | return 0
279 | else:
280 | return 1 + foo2(n - 1)
281 |
282 | graph = program_graph.get_program_graph(foo2)
283 | self.assertEqual(pga.graph_ast_height(graph), 9)
284 |
285 | def test_degrees_returns_expected(self):
286 | self.assertCountEqual(pga.degrees(self.singleton), [0])
287 | self.assertCountEqual(pga.degrees(self.disconnected), [0, 0])
288 | self.assertCountEqual(pga.degrees(self.cycle_3), [2, 2, 2])
289 | self.assertCountEqual(pga.degrees(self.chain_4), [1, 2, 2, 1])
290 | self.assertCountEqual(pga.degrees(self.wide_tree), [4, 1, 1, 1, 1])
291 |
292 | def test_in_degrees_returns_expected(self):
293 | self.assertCountEqual(pga.in_degrees(self.singleton), [0])
294 | self.assertCountEqual(pga.in_degrees(self.disconnected), [0, 0])
295 | self.assertCountEqual(pga.in_degrees(self.cycle_3), [1, 1, 1])
296 | self.assertCountEqual(pga.in_degrees(self.chain_4), [0, 1, 1, 1])
297 | self.assertCountEqual(pga.in_degrees(self.wide_tree), [0, 1, 1, 1, 1])
298 |
299 | def test_out_degrees_returns_expected(self):
300 | self.assertCountEqual(pga.out_degrees(self.singleton), [0])
301 | self.assertCountEqual(pga.out_degrees(self.disconnected), [0, 0])
302 | self.assertCountEqual(pga.out_degrees(self.cycle_3), [1, 1, 1])
303 | self.assertCountEqual(pga.out_degrees(self.chain_4), [1, 1, 1, 0])
304 | self.assertCountEqual(pga.out_degrees(self.wide_tree), [4, 0, 0, 0, 0])
305 |
306 | def test_diameter_returns_expected_if_connected(self):
307 | self.assertEqual(pga.diameter(self.singleton), 0)
308 | self.assertEqual(pga.diameter(self.cycle_3), 1)
309 | self.assertEqual(pga.diameter(self.chain_4), 3)
310 | self.assertEqual(pga.diameter(self.wide_tree), 2)
311 |
312 | def test_diameter_throws_exception_if_disconnected(self):
313 | with self.assertRaises(nx.exception.NetworkXError):
314 | pga.diameter(self.disconnected)
315 |
316 | def test_program_graph_to_nx_undirected_has_correct_edges(self):
317 | id_a, id_b, id_c = self.ids_from_cycle_3()
318 | nx_graph = pga._program_graph_to_nx(self.cycle_3, directed=False)
319 | self.assertCountEqual(nx_graph.nodes(), [id_a, id_b, id_c])
320 | expected_adj = {
321 | id_a: {
322 | id_b: {},
323 | id_c: {}
324 | },
325 | id_b: {
326 | id_a: {},
327 | id_c: {}
328 | },
329 | id_c: {
330 | id_a: {},
331 | id_b: {}
332 | },
333 | }
334 | self.assertEqual(nx_graph.adj, expected_adj)
335 |
336 | def test_program_graph_to_nx_directed_has_correct_edges(self):
337 | id_a, id_b, id_c = self.ids_from_cycle_3()
338 | nx_digraph = pga._program_graph_to_nx(self.cycle_3, directed=True)
339 | self.assertCountEqual(nx_digraph.nodes(), [id_a, id_b, id_c])
340 | expected_adj = {
341 | id_a: {
342 | id_b: {}
343 | },
344 | id_b: {
345 | id_c: {}
346 | },
347 | id_c: {
348 | id_a: {}
349 | },
350 | }
351 | self.assertEqual(nx_digraph.adj, expected_adj)
352 |
353 | def test_max_betweenness_returns_expected(self):
354 | self.assertAlmostEqual(pga.max_betweenness(self.singleton), 0)
355 | self.assertAlmostEqual(pga.max_betweenness(self.disconnected), 0)
356 | self.assertAlmostEqual(pga.max_betweenness(self.cycle_3), 0)
357 |
358 | # Middle nodes are in 2 shortest paths, normalizer = (4-1)*(4-2)/2 = 3
359 | self.assertAlmostEqual(pga.max_betweenness(self.chain_4), 2 / 3)
360 |
361 | # Root is in 6 shortest paths, normalizer = (5-1)*(5-2)/2 = 6
362 | self.assertAlmostEqual(pga.max_betweenness(self.wide_tree), 6 / 6)
363 |
364 |
365 | if __name__ == '__main__':
366 | absltest.main()
367 |
--------------------------------------------------------------------------------
/python_graphs/analysis/run_program_graph_analysis.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Runs the program graph analysis for datasets of programs.
16 |
17 | Analyzes each dataset of programs, producing plots for properties such as the
18 | AST height.
19 | """
20 |
21 | import inspect
22 | import math
23 |
24 | from absl import app
25 | from absl import logging
26 | import matplotlib.pyplot as plt
27 | import numpy as np
28 | from python_graphs import control_flow_test_components as cftc
29 | from python_graphs import program_graph
30 | from python_graphs import program_graph_test_components as pgtc
31 | from python_graphs.analysis import program_graph_analysis
32 | import six
33 | from six.moves import range
34 |
35 |
36 |
37 | TARGET_NUM_BINS = 15 # A reasonable constant number of histogram bins.
38 | MAX_NUM_BINS = 20 # The maximum number of bins reasonable on a histogram.
39 |
40 |
41 | def test_components():
42 | """Generates functions from two sets of test components.
43 |
44 | Yields:
45 | All functions in the program graph and control flow test components files.
46 | """
47 | for unused_name, fn in inspect.getmembers(pgtc, predicate=inspect.isfunction):
48 | yield fn
49 |
50 | for unused_name, fn in inspect.getmembers(cftc, predicate=inspect.isfunction):
51 | yield fn
52 |
53 |
54 |
55 |
56 | def get_graph_generator(function_generator):
57 | """Generates ProgramGraph objects from functions.
58 |
59 | Args:
60 | function_generator: A function generator.
61 |
62 | Yields:
63 | ProgramGraph objects for the functions.
64 | """
65 | for index, function in enumerate(function_generator):
66 | try:
67 | graph = program_graph.get_program_graph(function)
68 | yield graph
69 | except SyntaxError:
70 | # get_program_graph can fail for programs with different string encodings.
71 | logging.info('SyntaxError in get_program_graph for function index %d. '
72 | 'First 100 chars of function source:\n%s',
73 | index, function[:100])
74 | except RuntimeError:
75 | # get_program_graph can fail for programs that are only return statements.
76 | logging.info('RuntimeError in get_program_graph for function index %d. '
77 | 'First 100 chars of function source:\n%s',
78 | index, function[:100])
79 |
80 |
81 | def get_percentiles(data, percentiles, integer_valued=True):
82 | """Returns a dict of percentiles of the data.
83 |
84 | Args:
85 | data: An unsorted list of datapoints.
86 | percentiles: A list of ints or floats in the range [0, 100] representing the
87 | percentiles to compute.
88 | integer_valued: Whether or not the values are all integers. If so,
89 | interpolate to the nearest datapoint (instead of computing a fractional
90 | value between the two nearest datapoints).
91 |
92 | Returns:
93 | A dict mapping each element of percentiles to the computed result.
94 | """
95 | # Ensure integer datapoints for cleaner binning if necessary.
96 | interpolation = 'nearest' if integer_valued else 'linear'
97 | results = np.percentile(data, percentiles, interpolation=interpolation)
98 | return {percentiles[i]: results[i] for i in range(len(percentiles))}
99 |
100 |
101 | def analyze_graph(graph, identifier):
102 | """Performs various analyses on a graph.
103 |
104 | Args:
105 | graph: A ProgramGraph to analyze.
106 | identifier: A unique identifier for this graph (for later aggregation).
107 |
108 | Returns:
109 | A pair (identifier, result_dict), where result_dict contains the results of
110 | analyses run on the graph.
111 | """
112 | num_nodes = program_graph_analysis.num_nodes(graph)
113 | num_edges = program_graph_analysis.num_edges(graph)
114 | ast_height = program_graph_analysis.graph_ast_height(graph)
115 |
116 | degree_percentiles = [10, 25, 50, 75, 90]
117 | degrees = get_percentiles(program_graph_analysis.degrees(graph),
118 | degree_percentiles)
119 | in_degrees = get_percentiles(program_graph_analysis.in_degrees(graph),
120 | degree_percentiles)
121 | out_degrees = get_percentiles(program_graph_analysis.out_degrees(graph),
122 | degree_percentiles)
123 |
124 | diameter = program_graph_analysis.diameter(graph)
125 | max_betweenness = program_graph_analysis.max_betweenness(graph)
126 |
127 | # TODO(kshi): Turn this into a protobuf and fix everywhere else in this file.
128 | # Eventually this should be parallelized (currently takes ~6 hours to run).
129 | result_dict = {
130 | 'num_nodes': num_nodes,
131 | 'num_edges': num_edges,
132 | 'ast_height': ast_height,
133 | 'degrees': degrees,
134 | 'in_degrees': in_degrees,
135 | 'out_degrees': out_degrees,
136 | 'diameter': diameter,
137 | 'max_betweenness': max_betweenness,
138 | }
139 |
140 | return (identifier, result_dict)
141 |
142 |
143 | def create_bins(values, integer_valued=True, log_x=False):
144 | """Creates appropriate histogram bins.
145 |
146 | Args:
147 | values: The values to be plotted in a histogram.
148 | integer_valued: Whether the values are all integers.
149 | log_x: Whether to plot the x-axis using a log scale.
150 |
151 | Returns:
152 | An object (sequence, integer, or 'auto') that can be used as the 'bins'
153 | keyword argument to plt.hist(). If there are no values to plot, or all of
154 | the values are identical, then 'auto' is returned.
155 | """
156 | if not values:
157 | return 'auto' # No data to plot; let pyplot handle this case.
158 | min_value = min(values)
159 | max_value = max(values)
160 | if min_value == max_value:
161 | return 'auto' # All values are identical; let pyplot handle this case.
162 |
163 | if log_x:
164 | return np.logspace(np.log10(min_value), np.log10(max_value + 1),
165 | num=(TARGET_NUM_BINS + 1))
166 | elif integer_valued:
167 | # The minimum integer width resulting in at most MAX_NUM_BINS bins.
168 | bin_width = math.ceil((max_value - min_value + 1) / MAX_NUM_BINS)
169 | # Place bin boundaries between integers.
170 | return np.arange(min_value - 0.5, max_value + bin_width + 0.5, bin_width)
171 | else:
172 | return TARGET_NUM_BINS
173 |
174 |
175 | def create_histogram(values, title, percentiles=False, integer_valued=True,
176 | log_x=False, log_y=False):
177 | """Returns a histogram of integer values computed from a dataset.
178 |
179 | Args:
180 | values: A list of integer values to plot, or if percentiles is True, then
181 | each value is a dict mapping some chosen percentiles in [0, 100] to the
182 | corresponding data value.
183 | title: The figure title.
184 | percentiles: Whether to plot multiple histograms for percentiles.
185 | integer_valued: Whether the values are all integers, which affects how the
186 | data is partitioned into bins.
187 | log_x: Whether to plot the x-axis using a log scale.
188 | log_y: Whether to plot the y-axis using a log scale.
189 |
190 | Returns:
191 | A histogram figure.
192 | """
193 | figure = plt.figure()
194 |
195 | if percentiles:
196 | for percentile in sorted(values[0].keys()):
197 | new_values = [percentile_dict[percentile]
198 | for percentile_dict in values]
199 | bins = create_bins(new_values, integer_valued=integer_valued, log_x=log_x)
200 | plt.hist(new_values, bins=bins, alpha=0.5, label='{}%'.format(percentile))
201 | plt.legend(loc='upper right')
202 | else:
203 | bins = create_bins(values, integer_valued=integer_valued, log_x=log_x)
204 | plt.hist(values, bins=bins)
205 |
206 | if log_x:
207 | plt.xscale('log', nonposx='clip')
208 | if log_y:
209 | plt.yscale('log', nonposy='clip')
210 | plt.title(title)
211 | return figure
212 |
213 |
214 | def save_histogram(all_results, result_key, dataset_name, path_root,
215 | percentiles=False, integer_valued=True,
216 | log_x=False, log_y=False):
217 | """Saves a histogram image to disk.
218 |
219 | Args:
220 | all_results: A list of dicts containing all analysis results for each graph.
221 | result_key: The key in the result dicts specifying what data to plot.
222 | dataset_name: The name of the dataset, which appears in the figure title and
223 | the image filename.
224 | path_root: The directory to save the histogram image in.
225 | percentiles: Whether the data has multiple percentiles to plot.
226 | integer_valued: Whether the values are all integers, which affects how the
227 | data is partitioned into bins.
228 | log_x: Whether to plot the x-axis using a log scale.
229 | log_y: Whether to plot the y-axis using a log scale.
230 | """
231 | values = [result[result_key] for result in all_results]
232 | title = '{} distribution for {}'.format(result_key, dataset_name)
233 | figure = create_histogram(values, title, percentiles=percentiles,
234 | integer_valued=integer_valued,
235 | log_x=log_x, log_y=log_y)
236 | path = '{}/{}-{}.png'.format(path_root, result_key, dataset_name)
237 | figure.savefig(path)
238 | logging.info('Saved image %s', path)
239 |
240 |
241 | def main(argv):
242 | del argv # Unused.
243 |
244 | dataset_pairs = [
245 | (test_components(), 'test_components'),
246 | ]
247 | path_root = '/tmp/program_graph_analysis'
248 |
249 | for function_generator, dataset_name in dataset_pairs:
250 | logging.info('Analyzing graphs in dataset %s...', dataset_name)
251 | graph_generator = get_graph_generator(function_generator)
252 | all_results = []
253 | for index, graph in enumerate(graph_generator):
254 | identifier = '{}-{}'.format(dataset_name, index)
255 | # Discard the identifiers (not needed until this is parallelized).
256 | all_results.append(analyze_graph(graph, identifier)[1])
257 |
258 | if all_results:
259 | logging.info('Creating plots for dataset %s...', dataset_name)
260 | for result_key in ['num_nodes', 'num_edges']:
261 | save_histogram(all_results, result_key, dataset_name, path_root,
262 | percentiles=False, integer_valued=True, log_x=True)
263 | for result_key in ['ast_height', 'diameter']:
264 | save_histogram(all_results, result_key, dataset_name, path_root,
265 | percentiles=False, integer_valued=True)
266 | for result_key in ['max_betweenness']:
267 | save_histogram(all_results, result_key, dataset_name, path_root,
268 | percentiles=False, integer_valued=False)
269 | for result_key in ['degrees', 'in_degrees', 'out_degrees']:
270 | save_histogram(all_results, result_key, dataset_name, path_root,
271 | percentiles=True, integer_valued=True)
272 | else:
273 | logging.warn('Dataset %s is empty.', dataset_name)
274 |
275 |
276 | if __name__ == '__main__':
277 | app.run(main)
278 |
--------------------------------------------------------------------------------
/python_graphs/control_flow_graphviz.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Graphviz render for control flow graphs."""
16 |
17 | from absl import logging # pylint: disable=unused-import
18 | import astunparse
19 | import gast as ast
20 | import pygraphviz
21 |
22 | LEFT_ALIGN = '\l' # pylint: disable=anomalous-backslash-in-string
23 |
24 |
25 | def render(graph, include_src=None, path='/tmp/graph.png'):
26 | g = to_graphviz(graph, include_src=include_src)
27 | g.draw(path, prog='dot')
28 |
29 |
30 | def trim(line, max_length=30):
31 | if len(line) <= max_length:
32 | return line
33 | return line[:max_length - 3] + '...'
34 |
35 |
36 | def unparse(node):
37 | source = astunparse.unparse(node)
38 | trimmed_source = '\n'.join(trim(line) for line in source.split('\n'))
39 | return (
40 | trimmed_source.strip()
41 | .rstrip(' \n')
42 | .lstrip(' \n')
43 | .replace('\n', LEFT_ALIGN)
44 | )
45 |
46 |
47 | def write_as_str(write):
48 | if isinstance(write, ast.AST):
49 | return unparse(write)
50 | else:
51 | return write
52 |
53 |
54 | def get_label_for_instruction(instruction):
55 | if instruction.source is not None:
56 | line = ', '.join(write for write in instruction.get_write_names())
57 | line += ' <- ' + instruction.source
58 | return line
59 | else:
60 | return unparse(instruction.node)
61 |
62 |
63 | def get_label(block):
64 | """Gets the source code for a control flow basic block."""
65 | lines = []
66 | for control_flow_node in block.control_flow_nodes:
67 | instruction = control_flow_node.instruction
68 | line = get_label_for_instruction(instruction)
69 | if line.strip():
70 | lines.append(line)
71 |
72 | return LEFT_ALIGN.join(lines) + LEFT_ALIGN
73 |
74 |
75 | def to_graphviz(graph, include_src=None):
76 | """To graphviz."""
77 | g = pygraphviz.AGraph(strict=False, directed=True)
78 | for block in graph.blocks:
79 | node_attrs = {}
80 | label = get_label(block)
81 | # We only show the , , , , block labels.
82 | if block.label is not None and block.label.startswith('<'):
83 | node_attrs['style'] = 'bold'
84 | if not label.rstrip(LEFT_ALIGN):
85 | label = block.label + LEFT_ALIGN
86 | else:
87 | label = block.label + LEFT_ALIGN + label
88 | node_attrs['label'] = label
89 | node_attrs['fontname'] = 'Courier New'
90 | node_attrs['fontsize'] = 10.0
91 |
92 | node_id = id(block)
93 | g.add_node(node_id, **node_attrs)
94 | for next_node in block.next:
95 | next_node_id = id(next_node)
96 | if next_node in block.exits_from_middle:
97 | edge_attrs = {}
98 | edge_attrs['style'] = 'dashed'
99 | g.add_edge(node_id, next_node_id, **edge_attrs)
100 | if next_node in block.exits_from_end:
101 | edge_attrs = {}
102 | edge_attrs['style'] = 'solid'
103 | g.add_edge(node_id, next_node_id, **edge_attrs)
104 |
105 | if include_src is not None:
106 | node_id = id(include_src)
107 | node_attrs['label'] = include_src.replace('\n', LEFT_ALIGN)
108 | node_attrs['fontname'] = 'Courier New'
109 | node_attrs['fontsize'] = 10.0
110 | node_attrs['shape'] = 'box'
111 | g.add_node(node_id, **node_attrs)
112 |
113 | return g
114 |
--------------------------------------------------------------------------------
/python_graphs/control_flow_graphviz_test.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for control_flow_graphviz.py."""
16 |
17 | import inspect
18 |
19 | from absl.testing import absltest
20 | from python_graphs import control_flow
21 | from python_graphs import control_flow_graphviz
22 | from python_graphs import control_flow_test_components as tc
23 |
24 |
25 | class ControlFlowGraphvizTest(absltest.TestCase):
26 |
27 | def test_to_graphviz_for_all_test_components(self):
28 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction):
29 | graph = control_flow.get_control_flow_graph(fn)
30 | control_flow_graphviz.to_graphviz(graph)
31 |
32 | def test_get_label_multi_op_expression(self):
33 | graph = control_flow.get_control_flow_graph(tc.multi_op_expression)
34 | block = graph.get_block_by_source('1 + 2 * 3')
35 | self.assertEqual(
36 | control_flow_graphviz.get_label(block).strip(),
37 | 'return (1 + (2 * 3))\\l')
38 |
39 |
40 | if __name__ == '__main__':
41 | absltest.main()
42 |
--------------------------------------------------------------------------------
/python_graphs/control_flow_test.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for control_flow.py."""
16 |
17 | import inspect
18 |
19 | from absl import logging # pylint: disable=unused-import
20 | from absl.testing import absltest
21 | import gast as ast
22 | from python_graphs import control_flow
23 | from python_graphs import control_flow_test_components as tc
24 | from python_graphs import instruction as instruction_module
25 | from python_graphs import program_utils
26 | import six
27 |
28 |
29 | class ControlFlowTest(absltest.TestCase):
30 |
31 | def get_block(self, graph, selector):
32 | if isinstance(selector, control_flow.BasicBlock):
33 | return selector
34 | elif isinstance(selector, six.string_types):
35 | return graph.get_block_by_source(selector)
36 |
37 | def assertSameBlock(self, graph, selector1, selector2):
38 | block1 = self.get_block(graph, selector1)
39 | block2 = self.get_block(graph, selector2)
40 | self.assertEqual(block1, block2)
41 |
42 | def assertExitsTo(self, graph, selector1, selector2):
43 | block1 = self.get_block(graph, selector1)
44 | block2 = self.get_block(graph, selector2)
45 | self.assertTrue(block1.exits_to(block2))
46 |
47 | def assertNotExitsTo(self, graph, selector1, selector2):
48 | block1 = self.get_block(graph, selector1)
49 | block2 = self.get_block(graph, selector2)
50 | self.assertFalse(block1.exits_to(block2))
51 |
52 | def assertRaisesTo(self, graph, selector1, selector2):
53 | block1 = self.get_block(graph, selector1)
54 | block2 = self.get_block(graph, selector2)
55 | self.assertTrue(block1.raises_to(block2))
56 |
57 | def assertNotRaisesTo(self, graph, selector1, selector2):
58 | block1 = self.get_block(graph, selector1)
59 | block2 = self.get_block(graph, selector2)
60 | self.assertFalse(block1.raises_to(block2))
61 |
62 | def test_control_flow_straight_line_code(self):
63 | graph = control_flow.get_control_flow_graph(tc.straight_line_code)
64 | self.assertSameBlock(graph, 'x = 1', 'y = x + 2')
65 | self.assertSameBlock(graph, 'x = 1', 'z = y * 3')
66 | self.assertSameBlock(graph, 'x = 1', 'return z')
67 |
68 | def test_control_flow_simple_if_statement(self):
69 | graph = control_flow.get_control_flow_graph(tc.simple_if_statement)
70 | x1_block = 'x = 1'
71 | y2_block = 'y = 2'
72 | xy_block = 'x > y'
73 | y3_block = 'y = 3'
74 | return_block = 'return y'
75 | self.assertSameBlock(graph, x1_block, y2_block)
76 | self.assertSameBlock(graph, x1_block, xy_block)
77 | self.assertExitsTo(graph, xy_block, y3_block)
78 | self.assertExitsTo(graph, xy_block, return_block)
79 | self.assertExitsTo(graph, y3_block, return_block)
80 | self.assertNotExitsTo(graph, y3_block, x1_block)
81 | self.assertNotExitsTo(graph, return_block, x1_block)
82 | self.assertNotExitsTo(graph, return_block, y3_block)
83 |
84 | def test_control_flow_simple_for_loop(self):
85 | graph = control_flow.get_control_flow_graph(tc.simple_for_loop)
86 | x1_block = 'x = 1'
87 | iter_block = 'range'
88 | target_block = 'y'
89 | body_block = 'y + 3'
90 | return_block = 'return z'
91 | self.assertSameBlock(graph, x1_block, iter_block)
92 | self.assertExitsTo(graph, iter_block, target_block)
93 | self.assertExitsTo(graph, target_block, body_block)
94 | self.assertNotExitsTo(graph, body_block, return_block)
95 | self.assertExitsTo(graph, target_block, return_block)
96 |
97 | def test_control_flow_simple_while_loop(self):
98 | graph = control_flow.get_control_flow_graph(tc.simple_while_loop)
99 | x1_block = 'x = 1'
100 | test_block = 'x < 2'
101 | body_block = 'x += 3'
102 | return_block = 'return x'
103 |
104 | self.assertExitsTo(graph, x1_block, test_block)
105 | self.assertExitsTo(graph, test_block, body_block)
106 | self.assertExitsTo(graph, body_block, test_block)
107 | self.assertNotExitsTo(graph, body_block, return_block)
108 | self.assertExitsTo(graph, test_block, return_block)
109 |
110 | def test_control_flow_break_in_while_loop(self):
111 | graph = control_flow.get_control_flow_graph(tc.break_in_while_loop)
112 | # This is just one block since there's no edge from the while loop end
113 | # back to the while loop test, and so the 'x = 1' line can be merged with
114 | # the test.
115 | x1_and_test_block = 'x < 2'
116 | body_block = 'x += 3'
117 | return_block = 'return x'
118 |
119 | self.assertExitsTo(graph, x1_and_test_block, body_block)
120 | self.assertExitsTo(graph, body_block, return_block)
121 | self.assertNotExitsTo(graph, body_block, x1_and_test_block)
122 | self.assertExitsTo(graph, x1_and_test_block, return_block)
123 |
124 | def test_control_flow_nested_while_loops(self):
125 | graph = control_flow.get_control_flow_graph(tc.nested_while_loops)
126 | x1_block = 'x = 1'
127 | outer_test_block = 'x < 2'
128 | y3_block = 'y = 3'
129 | inner_test_block = 'y < 4'
130 | y5_block = 'y += 5'
131 | x6_block = 'x += 6'
132 | return_block = 'return x'
133 |
134 | self.assertExitsTo(graph, x1_block, outer_test_block)
135 | self.assertExitsTo(graph, outer_test_block, y3_block)
136 | self.assertExitsTo(graph, outer_test_block, return_block)
137 | self.assertExitsTo(graph, y3_block, inner_test_block)
138 | self.assertExitsTo(graph, inner_test_block, y5_block)
139 | self.assertExitsTo(graph, inner_test_block, x6_block)
140 | self.assertExitsTo(graph, y5_block, inner_test_block)
141 | self.assertExitsTo(graph, x6_block, outer_test_block)
142 |
143 | def test_control_flow_exception_handling(self):
144 | graph = control_flow.get_control_flow_graph(tc.exception_handling)
145 | self.assertSameBlock(graph, 'before_stmt0', 'before_stmt1')
146 | self.assertExitsTo(graph, 'before_stmt1', 'try_block')
147 | self.assertNotExitsTo(graph, 'before_stmt0', 'except_block1')
148 | self.assertNotExitsTo(graph, 'before_stmt1', 'final_block_stmt0')
149 | self.assertRaisesTo(graph, 'try_block', 'error_type')
150 | self.assertRaisesTo(graph, 'error_type', 'except_block2_stmt0')
151 | self.assertExitsTo(graph, 'except_block1', 'after_stmt0')
152 |
153 | self.assertRaisesTo(graph, 'after_stmt0', 'except_block2_stmt0')
154 | self.assertNotRaisesTo(graph, 'try_block', 'except_block2_stmt0')
155 |
156 | def test_control_flow_try_with_loop(self):
157 | graph = control_flow.get_control_flow_graph(tc.try_with_loop)
158 | self.assertSameBlock(graph, 'for_body0', 'for_body1')
159 | self.assertSameBlock(graph, 'except_body0', 'except_body1')
160 |
161 | self.assertExitsTo(graph, 'before_stmt0', 'iterator')
162 | self.assertExitsTo(graph, 'iterator', 'target')
163 | self.assertExitsTo(graph, 'target', 'for_body0')
164 | self.assertExitsTo(graph, 'for_body1', 'target')
165 | self.assertExitsTo(graph, 'target', 'after_stmt0')
166 |
167 | self.assertRaisesTo(graph, 'iterator', 'except_body0')
168 | self.assertRaisesTo(graph, 'target', 'except_body0')
169 | self.assertRaisesTo(graph, 'for_body1', 'except_body0')
170 |
171 | def test_control_flow_break_in_finally(self):
172 | graph = control_flow.get_control_flow_graph(tc.break_in_finally)
173 |
174 | # The exception handlers are tried sequentially until one matches.
175 | self.assertRaisesTo(graph, 'try0', 'Exception0')
176 | self.assertExitsTo(graph, 'Exception0', 'Exception1')
177 | self.assertExitsTo(graph, 'Exception1', 'finally_stmt0')
178 | # If the finally block were to finish and the exception hadn't matched, then
179 | # the exception would exit to the FunctionDef's raise_block. However, the
180 | # break statement prevents the finally from finishing and so the exception
181 | # is lost when the break statement is reached.
182 | # TODO(dbieber): Add the following assert.
183 | # raise_block = graph.get_raise_block('break_in_finally')
184 | # self.assertNotExitsFromEndTo(graph, 'finally_stmt1', raise_block)
185 | # The finally block can of course still raise an exception of its own, so
186 | # the following is still true:
187 | # TODO(dbieber): Add the following assert.
188 | # self.assertRaisesTo(graph, 'finally_stmt1', raise_block)
189 |
190 | # An exception in the except handlers could flow to the finally block.
191 | self.assertRaisesTo(graph, 'Exception0', 'finally_stmt0')
192 | self.assertRaisesTo(graph, 'exception0_stmt0', 'finally_stmt0')
193 | self.assertRaisesTo(graph, 'Exception1', 'finally_stmt0')
194 |
195 | # The break statement flows to after0, rather than to the loop header.
196 | self.assertNotExitsTo(graph, 'finally_stmt1', 'target0')
197 | self.assertExitsTo(graph, 'finally_stmt1', 'after0')
198 |
199 | def test_control_flow_for_loop_with_else(self):
200 | graph = control_flow.get_control_flow_graph(tc.for_with_else)
201 | self.assertExitsTo(graph, 'target', 'for_stmt0')
202 | self.assertSameBlock(graph, 'for_stmt0', 'condition')
203 |
204 | # If break is encountered, then the else clause is skipped.
205 | self.assertExitsTo(graph, 'condition', 'after_stmt0')
206 |
207 | # The else clause executes if the loop completes without reaching the break.
208 | self.assertExitsTo(graph, 'target', 'else_stmt0')
209 | self.assertNotExitsTo(graph, 'target', 'after_stmt0')
210 |
211 | def test_control_flow_lambda(self):
212 | graph = control_flow.get_control_flow_graph(tc.create_lambda)
213 | self.assertNotExitsTo(graph, 'before_stmt0', 'args')
214 | self.assertNotExitsTo(graph, 'before_stmt0', 'output')
215 |
216 | def test_control_flow_generator(self):
217 | graph = control_flow.get_control_flow_graph(tc.generator)
218 | self.assertExitsTo(graph, 'target', 'yield_statement')
219 | self.assertSameBlock(graph, 'yield_statement', 'after_stmt0')
220 |
221 | def test_control_flow_inner_fn_while_loop(self):
222 | graph = control_flow.get_control_flow_graph(tc.fn_with_inner_fn)
223 | self.assertExitsTo(graph, 'x = 10', 'True')
224 | self.assertExitsTo(graph, 'True', 'True')
225 | self.assertSameBlock(graph, 'True', 'True')
226 |
227 | def test_control_flow_example_class(self):
228 | graph = control_flow.get_control_flow_graph(tc.ExampleClass)
229 | self.assertSameBlock(graph, 'method_stmt0', 'method_stmt1')
230 |
231 | def test_control_flow_return_outside_function(self):
232 | with self.assertRaises(RuntimeError) as error:
233 | control_flow.get_control_flow_graph('return x')
234 | self.assertContainsSubsequence(str(error.exception),
235 | 'outside of a function frame')
236 |
237 | def test_control_flow_continue_outside_loop(self):
238 | control_flow.get_control_flow_graph('for i in j: continue')
239 | with self.assertRaises(RuntimeError) as error:
240 | control_flow.get_control_flow_graph('if x: continue')
241 | self.assertContainsSubsequence(str(error.exception),
242 | 'outside of a loop frame')
243 |
244 | def test_control_flow_break_outside_loop(self):
245 | control_flow.get_control_flow_graph('for i in j: break')
246 | with self.assertRaises(RuntimeError) as error:
247 | control_flow.get_control_flow_graph('if x: break')
248 | self.assertContainsSubsequence(str(error.exception),
249 | 'outside of a loop frame')
250 |
251 | def test_control_flow_for_all_test_components(self):
252 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction):
253 | control_flow.get_control_flow_graph(fn)
254 |
255 | def test_control_flow_for_all_test_components_ast_to_instruction(self):
256 | """All INSTRUCTION_AST_NODES in an AST correspond to one Instruction.
257 |
258 | This assumes that a simple statement can't contain another simple statement.
259 | However, Yield nodes are the exception to this as they are contained within
260 | Expr nodes.
261 |
262 | We omit Yield nodes from INSTRUCTION_AST_NODES despite them being listed
263 | as simple statements in the Python docs.
264 | """
265 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction):
266 | node = program_utils.program_to_ast(fn)
267 | graph = control_flow.get_control_flow_graph(node)
268 | for n in ast.walk(node):
269 | if not isinstance(n, instruction_module.INSTRUCTION_AST_NODES):
270 | continue
271 | control_flow_nodes = list(graph.get_control_flow_nodes_by_ast_node(n))
272 | self.assertLen(control_flow_nodes, 1, ast.dump(n))
273 |
274 | def test_control_flow_reads_and_writes_appear_once(self):
275 | """Asserts each read and write in an Instruction is unique in the graph.
276 |
277 | Note that in the case of AugAssign, the same Name AST node is used once as
278 | a read and once as a write.
279 | """
280 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction):
281 | reads = set()
282 | writes = set()
283 | node = program_utils.program_to_ast(fn)
284 | graph = control_flow.get_control_flow_graph(node)
285 | for instruction in graph.get_instructions():
286 | # Check that all reads are unique.
287 | for read in instruction.get_reads():
288 | if isinstance(read, tuple):
289 | read = read[1]
290 | self.assertIsInstance(read, ast.Name, 'Unexpected read type.')
291 | self.assertNotIn(read, reads,
292 | instruction_module.access_name(read))
293 | reads.add(read)
294 |
295 | # Check that all writes are unique.
296 | for write in instruction.get_writes():
297 | if isinstance(write, tuple):
298 | write = write[1]
299 | if isinstance(write, six.string_types):
300 | continue
301 | self.assertIsInstance(write, ast.Name)
302 | self.assertNotIn(write, writes,
303 | instruction_module.access_name(write))
304 | writes.add(write)
305 |
306 |
307 | if __name__ == '__main__':
308 | absltest.main()
309 |
--------------------------------------------------------------------------------
/python_graphs/control_flow_test_components.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Test components for testing control flow.
16 |
17 | Many of these components would produce RuntimeErrors if run. Their purpose is
18 | for the testing of the control_flow module.
19 | """
20 |
21 |
22 | # pylint: disable=missing-docstring
23 | # pylint: disable=pointless-statement,undefined-variable
24 | # pylint: disable=unused-variable,unused-argument
25 | # pylint: disable=bare-except,lost-exception,unreachable
26 | # pylint: disable=keyword-arg-before-vararg
27 | def straight_line_code():
28 | x = 1
29 | y = x + 2
30 | z = y * 3
31 | return z
32 |
33 |
34 | def simple_if_statement():
35 | x = 1
36 | y = 2
37 | if x > y:
38 | y = 3
39 | return y
40 |
41 |
42 | def simple_for_loop():
43 | x = 1
44 | for y in range(x + 2):
45 | z = y + 3
46 | return z
47 |
48 |
49 | def tuple_in_for_loop():
50 | a, b = 0, 1
51 | for a, b in [(1, 2), (2, 3)]:
52 | if a > b:
53 | break
54 | return b - a
55 |
56 |
57 | def simple_while_loop():
58 | x = 1
59 | while x < 2:
60 | x += 3
61 | return x
62 |
63 |
64 | def break_in_while_loop():
65 | x = 1
66 | while x < 2:
67 | x += 3
68 | break
69 | return x
70 |
71 |
72 | def nested_while_loops():
73 | x = 1
74 | while x < 2:
75 | y = 3
76 | while y < 4:
77 | y += 5
78 | x += 6
79 | return x
80 |
81 |
82 | def multiple_excepts():
83 | try:
84 | x = 1
85 | except ValueError:
86 | x = 2
87 | x = 3
88 | except RuntimeError:
89 | x = 4
90 | except:
91 | x = 5
92 | return x
93 |
94 |
95 | def try_finally():
96 | header0
97 | try:
98 | try0
99 | try1
100 | except Exception0 as value0:
101 | exception0_stmt0
102 | finally:
103 | finally_stmt0
104 | finally_stmt1
105 | after0
106 |
107 |
108 | def exception_handling():
109 | try:
110 | before_stmt0
111 | before_stmt1
112 | try:
113 | try_block
114 | except error_type as value:
115 | except_block1
116 | after_stmt0
117 | after_stmt1
118 | except:
119 | except_block2_stmt0
120 | except_block2_stmt1
121 | finally:
122 | final_block_stmt0
123 | final_block_stmt1
124 | end_block_stmt0
125 | end_block_stmt1
126 |
127 |
128 | def fn_with_args(a, b=10, *varargs, **kwargs):
129 | body_stmt0
130 | body_stmt1
131 | return
132 |
133 |
134 | def fn1(a, b):
135 | return a + b
136 |
137 |
138 | def fn2(a, b):
139 | c = a
140 | if a > b:
141 | c -= b
142 | return c
143 |
144 |
145 | def fn3(a, b):
146 | c = a
147 | if a > b:
148 | c -= b
149 | c += 1
150 | c += 2
151 | c += 3
152 | else:
153 | c += b
154 | return c
155 |
156 |
157 | def fn4(i):
158 | count = 0
159 | for i in range(i):
160 | count += 1
161 | return count
162 |
163 |
164 | def fn5(i):
165 | count = 0
166 | for _ in range(i):
167 | if count > 5:
168 | break
169 | count += 1
170 | return count
171 |
172 |
173 | def fn6():
174 | count = 0
175 | while count < 10:
176 | count += 1
177 | return count
178 |
179 |
180 | def fn7():
181 | try:
182 | raise ValueError('This will be caught.')
183 | except ValueError as e:
184 | del e
185 | return
186 |
187 |
188 | def try_with_else():
189 | try:
190 | raise ValueError('This will be caught.')
191 | except ValueError as e:
192 | del e
193 | else:
194 | return 1
195 | return 2
196 |
197 |
198 | def for_with_else():
199 | for target in iterator:
200 | for_stmt0
201 | if condition:
202 | break
203 | for_stmt1
204 | else:
205 | else_stmt0
206 | else_stmt1
207 | after_stmt0
208 |
209 |
210 | def fn8(a):
211 | a += 1
212 |
213 |
214 | def nested_loops(a):
215 | """A test function illustrating nested loops."""
216 | for i in range(a):
217 | while True:
218 | break
219 | unreachable = 10
220 | for j in range(i):
221 | for k in range(j):
222 | if j * k > 10:
223 | continue
224 | unreachable = 5
225 | if i + j == 10:
226 | return True
227 | return False
228 |
229 |
230 | def try_with_loop():
231 | before_stmt0
232 | try:
233 | for target in iterator:
234 | for_body0
235 | for_body1
236 | except:
237 | except_body0
238 | except_body1
239 | after_stmt0
240 |
241 |
242 | def break_in_finally():
243 | header0
244 | for target0 in iter0:
245 | try:
246 | try0
247 | try1
248 | except Exception0 as value0:
249 | exception0_stmt0
250 | except Exception1 as value1:
251 | exception1_stmt0
252 | exception1_stmt1
253 | finally:
254 | finally_stmt0
255 | finally_stmt1
256 | # This breaks out of the for-loop.
257 | break
258 | after0
259 |
260 |
261 | def break_in_try():
262 | count = 0
263 | for _ in range(10):
264 | try:
265 | count += 1
266 | # This breaks out of the for-loop through the finally block.
267 | break
268 | except ValueError:
269 | pass
270 | finally:
271 | count += 2
272 | return count
273 |
274 |
275 | def nested_try_excepts():
276 | try:
277 | try:
278 | x = 0
279 | x += 1
280 | try:
281 | x = 2 + 2
282 | except ValueError(1+1) as e:
283 | x = 3 - 3
284 | finally:
285 | x = 4
286 | except RuntimeError:
287 | x = 5 * 5
288 | finally:
289 | x = 6 ** 6
290 | except:
291 | x = 7 / 7
292 | return x
293 |
294 |
295 | def multi_op_expression():
296 | return 1 + 2 * 3
297 |
298 |
299 | def create_lambda():
300 | before_stmt0
301 | fn = lambda args: output
302 | after_stmt0
303 |
304 |
305 | def generator():
306 | for target in iterator:
307 | yield yield_statement
308 | after_stmt0
309 |
310 |
311 | def fn_with_inner_fn():
312 | def inner_fn():
313 | x = 10
314 | while True:
315 | pass
316 |
317 |
318 | class ExampleClass(object):
319 |
320 | def method0(self, arg):
321 | method_stmt0
322 | method_stmt1
323 |
--------------------------------------------------------------------------------
/python_graphs/control_flow_visualizer.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | r"""Create control flow graph visualizations for the test components.
16 |
17 |
18 | Usage:
19 | python -m python_graphs.control_flow_visualizer
20 | """
21 |
22 | import inspect
23 | import os
24 |
25 | from absl import app
26 | from absl import flags
27 | from absl import logging # pylint: disable=unused-import
28 |
29 | from python_graphs import control_flow
30 | from python_graphs import control_flow_graphviz
31 | from python_graphs import control_flow_test_components as tc
32 | from python_graphs import program_utils
33 |
34 | FLAGS = flags.FLAGS
35 |
36 |
37 | def render_functions(functions):
38 | for name, function in functions:
39 | logging.info(name)
40 | graph = control_flow.get_control_flow_graph(function)
41 | path = '/tmp/control_flow_graphs/{}.png'.format(name)
42 | source = program_utils.getsource(function) # pylint: disable=protected-access
43 | control_flow_graphviz.render(graph, include_src=source, path=path)
44 |
45 |
46 | def render_filepaths(filepaths):
47 | for filepath in filepaths:
48 | filename = os.path.basename(filepath).split('.')[0]
49 | logging.info(filename)
50 | with open(filepath, 'r') as f:
51 | source = f.read()
52 | graph = control_flow.get_control_flow_graph(source)
53 | path = '/tmp/control_flow_graphs/{}.png'.format(filename)
54 | control_flow_graphviz.render(graph, include_src=source, path=path)
55 |
56 |
57 | def main(argv):
58 | del argv # Unused.
59 |
60 | functions = [
61 | (name, fn)
62 | for name, fn in inspect.getmembers(tc, predicate=inspect.isfunction)
63 | ]
64 | render_functions(functions)
65 |
66 | # Add filepaths here to visualize their functions.
67 | filepaths = [
68 | __file__,
69 | ]
70 | render_filepaths(filepaths)
71 |
72 |
73 | if __name__ == '__main__':
74 | app.run(main)
75 |
--------------------------------------------------------------------------------
/python_graphs/cyclomatic_complexity.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Computes the cyclomatic complexity of a program or control flow graph."""
16 |
17 |
18 | def cyclomatic_complexity(control_flow_graph):
19 | """Computes the cyclomatic complexity of a function from its cfg."""
20 | enter_block = next(control_flow_graph.get_enter_blocks())
21 |
22 | new_blocks = []
23 | seen_block_ids = set()
24 | new_blocks.append(enter_block)
25 | seen_block_ids.add(id(enter_block))
26 | num_edges = 0
27 |
28 | while new_blocks:
29 | block = new_blocks.pop()
30 | for next_block in block.exits_from_end:
31 | num_edges += 1
32 | if id(next_block) not in seen_block_ids:
33 | new_blocks.append(next_block)
34 | seen_block_ids.add(id(next_block))
35 | num_nodes = len(seen_block_ids)
36 |
37 | p = 1 # num_connected_components
38 | e = num_edges
39 | n = num_nodes
40 | return e - n + 2 * p
41 |
42 |
43 | def cyclomatic_complexity2(control_flow_graph):
44 | """Computes the cyclomatic complexity of a program from its cfg."""
45 | # Assumes a single connected component.
46 | p = 1 # num_connected_components
47 | e = sum(len(block.exits_from_end) for block in control_flow_graph.blocks)
48 | n = len(control_flow_graph.blocks)
49 | return e - n + 2 * p
50 |
51 |
52 | def cyclomatic_complexity3(control_flow_graph):
53 | """Computes the cyclomatic complexity of a program from its cfg."""
54 | start_block = control_flow_graph.start_block
55 | enter_blocks = control_flow_graph.get_enter_blocks()
56 |
57 | new_blocks = [start_block]
58 | seen_block_ids = {id(start_block)}
59 | num_connected_components = 1
60 | num_edges = 0
61 |
62 | for enter_block in enter_blocks:
63 | new_blocks.append(enter_block)
64 | seen_block_ids.add(id(enter_block))
65 | num_connected_components += 1
66 |
67 | while new_blocks:
68 | block = new_blocks.pop()
69 | for next_block in block.exits_from_end:
70 | num_edges += 1
71 | if id(next_block) not in seen_block_ids:
72 | new_blocks.append(next_block)
73 | seen_block_ids.add(id(next_block))
74 | num_nodes = len(seen_block_ids)
75 |
76 | p = num_connected_components
77 | e = num_edges
78 | n = num_nodes
79 | return e - n + 2 * p
80 |
--------------------------------------------------------------------------------
/python_graphs/cyclomatic_complexity_test.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for cyclomatic_complexity.py."""
16 |
17 | from absl.testing import absltest
18 | from absl.testing import parameterized
19 |
20 | from python_graphs import control_flow
21 | from python_graphs import control_flow_test_components as tc
22 | from python_graphs import cyclomatic_complexity
23 |
24 |
25 | class CyclomaticComplexityTest(parameterized.TestCase):
26 |
27 | @parameterized.parameters(
28 | (tc.straight_line_code, 1),
29 | (tc.simple_if_statement, 2),
30 | (tc.simple_for_loop, 2),
31 | )
32 | def test_cyclomatic_complexity(self, component, target_value):
33 | graph = control_flow.get_control_flow_graph(component)
34 | value = cyclomatic_complexity.cyclomatic_complexity(graph)
35 | self.assertEqual(value, target_value)
36 |
37 | if __name__ == '__main__':
38 | absltest.main()
39 |
--------------------------------------------------------------------------------
/python_graphs/data_flow.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Data flow analysis of Python programs."""
16 |
17 | import collections
18 |
19 | from absl import logging # pylint: disable=unused-import
20 | import gast as ast
21 |
22 | from python_graphs import control_flow
23 | from python_graphs import instruction as instruction_module
24 |
25 |
26 | READ = instruction_module.READ
27 | WRITE = instruction_module.WRITE
28 |
29 |
30 | class Analysis(object):
31 | """Base class for a data flow analysis.
32 |
33 | Attributes:
34 | label: The name of the analysis.
35 | forward: (bool) True for forward analyses, False for backward analyses.
36 | in_label: The name of the analysis, suffixed with _in.
37 | out_label: The name of the analysis, suffixed with _out.
38 | before_label: Either the in_label or out_label depending on the direction of
39 | the analysis. Marks the before_value on a node during an analysis.
40 | after_label: Either the in_label or out_label depending on the direction of
41 | the analysis. Marks the after_value on a node during an analysis.
42 | """
43 |
44 | def __init__(self, label, forward):
45 | self.label = label
46 | self.forward = forward
47 |
48 | self.in_label = label + '_in'
49 | self.out_label = label + '_out'
50 |
51 | self.before_label = self.in_label if forward else self.out_label
52 | self.after_label = self.out_label if forward else self.in_label
53 |
54 | def aggregate_previous_after_values(self, previous_after_values):
55 | """Computes the before value for a node from the previous after values.
56 |
57 | This is the 'meet' or 'join' function of the analysis.
58 | TODO(dbieber): Update terminology to match standard textbook notation.
59 |
60 | Args:
61 | previous_after_values: The after values of all before nodes.
62 | Returns:
63 | The before value for the current node.
64 | """
65 | raise NotImplementedError
66 |
67 | def compute_after_value(self, node, before_value):
68 | """Computes the after value for a node from the node and the before value.
69 |
70 | This is the 'transfer' function of the analysis.
71 | TODO(dbieber): Update terminology to match standard textbook notation.
72 |
73 | Args:
74 | node: The node or block for which to compute the after value.
75 | before_value: The before value of the node.
76 | Returns:
77 | The computed after value for the node.
78 | """
79 | raise NotImplementedError
80 |
81 | def visit(self, node):
82 | """Visit the nodes of the control flow graph, performing the analysis.
83 |
84 | Terminology:
85 | in_value: The value of the analysis at the start of a node.
86 | out_value: The value of the analysis at the end of a node.
87 | before_value: in_value in a forward analysis; out_value in a backward
88 | analysis.
89 | after_value: out_value in a forward analysis; in_value in a backward
90 | analysis.
91 |
92 | Args:
93 | node: A graph element that supports the .next / .prev API, such as a
94 | ControlFlowNode from a ControlFlowGraph or a BasicBlock from a
95 | ControlFlowGraph.
96 | """
97 | to_visit = collections.deque([node])
98 | while to_visit:
99 | node = to_visit.popleft()
100 |
101 | before_nodes = node.prev if self.forward else node.next
102 | after_nodes = node.next if self.forward else node.prev
103 | previous_after_values = [
104 | before_node.get_label(self.after_label)
105 | for before_node in before_nodes
106 | if before_node.has_label(self.after_label)]
107 |
108 | if node.has_label(self.after_label):
109 | initial_after_value_hash = hash(node.get_label(self.after_label))
110 | else:
111 | initial_after_value_hash = None
112 | before_value = self.aggregate_previous_after_values(previous_after_values)
113 | node.set_label(self.before_label, before_value)
114 | after_value = self.compute_after_value(node, before_value)
115 | node.set_label(self.after_label, after_value)
116 | if hash(after_value) != initial_after_value_hash:
117 | for after_node in after_nodes:
118 | to_visit.append(after_node)
119 |
120 |
121 | def get_while_loop_variables(node, graph=None):
122 | """Gets the set of loop variables used for while loop rewriting.
123 |
124 | This is the set of variables used for rewriting a while loop into its
125 | functional form.
126 |
127 | Args:
128 | node: An ast.While AST node.
129 | graph: (Optional) The ControlFlowGraph of the function or program containing
130 | the while loop. If not present, the control flow graph for the while loop
131 | will be computed.
132 | Returns:
133 | The set of variable identifiers that are live at the start of the loop's
134 | test and at the start of the loop's body.
135 | """
136 | graph = graph or control_flow.get_control_flow_graph(node)
137 | test_block = graph.get_block_by_ast_node(node.test)
138 |
139 | for block in graph.get_exit_blocks():
140 | analysis = LivenessAnalysis()
141 | analysis.visit(block)
142 | # TODO(dbieber): Move this logic into the Analysis class to avoid the use of
143 | # magic strings.
144 | live_variables = test_block.get_label('liveness_in')
145 | written_variables = {
146 | write.id
147 | for write in instruction_module.get_writes_from_ast_node(node)
148 | if isinstance(write, ast.Name)
149 | }
150 | return live_variables & written_variables
151 |
152 |
153 | class LivenessAnalysis(Analysis):
154 | """Liveness analysis by basic block.
155 |
156 | In the liveness analysis, the in_value of a block is the set of variables
157 | that are live at the start of a block. "Live" means that the current value of
158 | the variable may be used later in the execution. The out_value of a block is
159 | the set of variable identifiers that are live at the end of the block.
160 |
161 | Since this is a backward analysis, the "before_value" is the out_value and the
162 | "after_value" is the in_value.
163 | """
164 |
165 | def __init__(self):
166 | super(LivenessAnalysis, self).__init__(label='liveness', forward=False)
167 |
168 | def aggregate_previous_after_values(self, previous_after_values):
169 | """Computes the out_value (before_value) of a block.
170 |
171 | Args:
172 | previous_after_values: A list of the sets of live variables at the start
173 | of each of the blocks following the current block.
174 | Returns:
175 | The set of live variables at the end of the current block. This is the
176 | union of live variable sets at the start of each subsequent block.
177 | """
178 | result = set()
179 | for before_value in previous_after_values:
180 | result |= before_value
181 | return frozenset(result)
182 |
183 | def compute_after_value(self, block, before_value):
184 | """Computes the liveness analysis gen and kill sets for a basic block.
185 |
186 | The gen set is the set of variables read by the block before they are
187 | written to.
188 | The kill set is the set of variables written to by the basic block.
189 |
190 | Args:
191 | block: The BasicBlock to analyze.
192 | before_value: The out_value for block (the set of variables live at the
193 | end of the block.)
194 | Returns:
195 | The in_value for block (the set of variables live at the start of the
196 | block).
197 | """
198 | gen = set()
199 | kill = set()
200 | for control_flow_node in block.control_flow_nodes:
201 | instruction = control_flow_node.instruction
202 | for read in instruction.get_read_names():
203 | if read not in kill:
204 | gen.add(read)
205 | kill.update(instruction.get_write_names())
206 | return frozenset((before_value - kill) | gen)
207 |
208 |
209 | class FrozenDict(dict):
210 |
211 | def __hash__(self):
212 | return hash(tuple(sorted(self.items())))
213 |
214 |
215 | class LastAccessAnalysis(Analysis):
216 | """Computes for each variable its possible last reads and last writes."""
217 |
218 | def __init__(self):
219 | super(LastAccessAnalysis, self).__init__(label='last_access', forward=True)
220 |
221 | def aggregate_previous_after_values(self, previous_after_values):
222 | result = collections.defaultdict(frozenset)
223 | for previous_after_value in previous_after_values:
224 | for key, value in previous_after_value.items():
225 | result[key] |= value
226 | return FrozenDict(result)
227 |
228 | def compute_after_value(self, node, before_value):
229 | result = before_value.copy()
230 | for access in node.instruction.accesses:
231 | kind_and_name = instruction_module.access_kind_and_name(access)
232 | result[kind_and_name] = frozenset([access])
233 | return FrozenDict(result)
234 |
--------------------------------------------------------------------------------
/python_graphs/data_flow_test.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for data_flow.py."""
16 |
17 | import inspect
18 |
19 | from absl import logging # pylint: disable=unused-import
20 | from absl.testing import absltest
21 | import gast as ast
22 |
23 | from python_graphs import control_flow
24 | from python_graphs import control_flow_test_components as tc
25 | from python_graphs import data_flow
26 | from python_graphs import program_utils
27 |
28 |
29 | class DataFlowTest(absltest.TestCase):
30 |
31 | def test_get_while_loop_variables(self):
32 | root = program_utils.program_to_ast(tc.nested_while_loops)
33 | graph = control_flow.get_control_flow_graph(root)
34 |
35 | # node = graph.get_ast_node_by_type(ast.While)
36 | # TODO(dbieber): data_flow.get_while_loop_variables(node, graph)
37 |
38 | analysis = data_flow.LivenessAnalysis()
39 | for block in graph.get_exit_blocks():
40 | analysis.visit(block)
41 |
42 | for block in graph.get_blocks_by_ast_node_type_and_label(
43 | ast.While, 'test_block'):
44 | logging.info(block.get_label('liveness_out'))
45 |
46 | def test_liveness_simple_while_loop(self):
47 | def simple_while_loop():
48 | a = 2
49 | b = 10
50 | x = 1
51 | while x < b:
52 | tmp = x + a
53 | x = tmp + 1
54 |
55 | program_node = program_utils.program_to_ast(simple_while_loop)
56 | graph = control_flow.get_control_flow_graph(program_node)
57 |
58 | # TODO(dbieber): Use unified query system.
59 | while_node = [
60 | node for node in ast.walk(program_node)
61 | if isinstance(node, ast.While)][0]
62 | loop_variables = data_flow.get_while_loop_variables(while_node, graph)
63 | self.assertEqual(loop_variables, {'x'})
64 |
65 | def test_data_flow_nested_loops(self):
66 | def fn():
67 | count = 0
68 | for x in range(10):
69 | for y in range(10):
70 | if x == y:
71 | count += 1
72 | return count
73 |
74 | program_node = program_utils.program_to_ast(fn)
75 | graph = control_flow.get_control_flow_graph(program_node)
76 |
77 | # Perform the analysis.
78 | analysis = data_flow.LastAccessAnalysis()
79 | analysis.visit(graph.start_block.control_flow_nodes[0])
80 | for node in graph.get_enter_control_flow_nodes():
81 | analysis.visit(node)
82 |
83 | # Verify correctness.
84 | node = graph.get_control_flow_node_by_source('count += 1')
85 | last_accesses_in = node.get_label('last_access_in')
86 | last_accesses_out = node.get_label('last_access_out')
87 | self.assertLen(last_accesses_in['write-count'], 2) # += 1, = 0
88 | self.assertLen(last_accesses_in['read-count'], 1) # += 1
89 | self.assertLen(last_accesses_out['write-count'], 1) # += 1
90 | self.assertLen(last_accesses_out['read-count'], 1) # += 1
91 |
92 | def test_last_accesses_analysis(self):
93 | root = program_utils.program_to_ast(tc.nested_while_loops)
94 | graph = control_flow.get_control_flow_graph(root)
95 |
96 | analysis = data_flow.LastAccessAnalysis()
97 | analysis.visit(graph.start_block.control_flow_nodes[0])
98 |
99 | for node in graph.get_enter_control_flow_nodes():
100 | analysis.visit(node)
101 |
102 | for block in graph.blocks:
103 | for cfn in block.control_flow_nodes:
104 | self.assertTrue(cfn.has_label('last_access_in'))
105 | self.assertTrue(cfn.has_label('last_access_out'))
106 |
107 | node = graph.get_control_flow_node_by_source('y += 5')
108 | last_accesses = node.get_label('last_access_out')
109 | # TODO(dbieber): Add asserts that these are the correct accesses.
110 | self.assertLen(last_accesses['write-x'], 2) # x = 1, x += 6
111 | self.assertLen(last_accesses['read-x'], 1) # x < 2
112 |
113 | node = graph.get_control_flow_node_by_source('return x')
114 | last_accesses = node.get_label('last_access_out')
115 | self.assertLen(last_accesses['write-x'], 2) # x = 1, x += 6
116 | self.assertLen(last_accesses['read-x'], 1) # x < 2
117 |
118 | def test_liveness_analysis_all_test_components(self):
119 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction):
120 | root = program_utils.program_to_ast(fn)
121 | graph = control_flow.get_control_flow_graph(root)
122 |
123 | analysis = data_flow.LivenessAnalysis()
124 | for block in graph.get_exit_blocks():
125 | analysis.visit(block)
126 |
127 | def test_last_access_analysis_all_test_components(self):
128 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction):
129 | root = program_utils.program_to_ast(fn)
130 | graph = control_flow.get_control_flow_graph(root)
131 |
132 | analysis = data_flow.LastAccessAnalysis()
133 | for node in graph.get_enter_control_flow_nodes():
134 | analysis.visit(node)
135 |
136 |
137 | if __name__ == '__main__':
138 | absltest.main()
139 |
--------------------------------------------------------------------------------
/python_graphs/instruction.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """An Instruction represents an executable unit of a Python program.
16 |
17 | Almost all simple statements correspond to Instructions, except for statements
18 | likes pass, continue, and break, whose effects are already represented in the
19 | structure of the control-flow graph.
20 |
21 | In addition to simple statements, assignments that take place outside of simple
22 | statements such as implicitly in a function or class definition also correspond
23 | to Instructions.
24 |
25 | The complete set of places where Instructions occur in source are listed here:
26 |
27 | 1. (Any node in INSTRUCTION_AST_NODES used as a statement.)
28 | 2. if : ... (elif is the same.)
29 | 3+4. for in : ...
30 | 5. while : ...
31 | 6. try: ... except : ...
32 | 7. TODO(dbieber): Test for "with :"...
33 |
34 | In the code:
35 |
36 | @decorator
37 | def fn(args=defaults):
38 | body
39 |
40 | Outside of the function definition, we get the following instructions:
41 | 8. Each decorator is an Instruction.
42 | 9. Each default is an Instruction.
43 | 10. The assignment of the function def to the function name is an Instruction.
44 | Inside the function definition, we get the following instructions:
45 | 11. An Instruction for the assignment of values to the arguments.
46 | (1, again) And then the body can consist of multiple Instructions too.
47 |
48 | Likewise in the code:
49 |
50 | @decorator
51 | class C(object):
52 | body
53 |
54 | The following are Instructions:
55 | (8, again) Each decorator is an Instruction
56 | 12. The assignment of the class to the variable C is an Instruction.
57 | (1, again) And then the body can consist of multiple Instructions too.
58 | 13. TODO(dbieber): The base class (object) is an Instruction too.
59 | """
60 |
61 | import gast as ast
62 | import six
63 |
64 | # Types of accesses:
65 | READ = 'read'
66 | WRITE = 'write'
67 |
68 | # Context lists
69 | WRITE_CONTEXTS = (ast.Store, ast.Del, ast.Param, ast.AugStore)
70 | READ_CONTEXTS = (ast.Load, ast.AugLoad)
71 |
72 | # Sources of implicit writes:
73 | CLASS = 'class'
74 | FUNCTION = 'function'
75 | ARGS = 'args'
76 | KWARG = 'kwarg'
77 | KWONLYARGS = 'kwonlyargs'
78 | VARARG = 'vararg'
79 | ITERATOR = 'iter'
80 | EXCEPTION = 'exception'
81 |
82 | INSTRUCTION_AST_NODES = (
83 | ast.Expr, # expression_stmt
84 | ast.Assert, # assert_stmt
85 | ast.Assign, # assignment_stmt
86 | ast.AugAssign, # augmented_assignment_stmt
87 | ast.Delete, # del_stmt
88 | ast.Print, # print_stmt
89 | ast.Return, # return_stmt
90 | # ast.Yield, # yield_stmt. ast.Yield nodes are contained in ast.Expr nodes.
91 | ast.Raise, # raise_stmt
92 | ast.Import, # import_stmt
93 | ast.ImportFrom,
94 | ast.Global, # global_stmt
95 | ast.Exec, # exec_stmt
96 | )
97 |
98 | # https://docs.python.org/2/reference/simple_stmts.html
99 | SIMPLE_STATEMENT_AST_NODES = INSTRUCTION_AST_NODES + (
100 | ast.Pass, # pass_stmt
101 | ast.Break, # break_stmt
102 | ast.Continue, # continue_stmt
103 | )
104 |
105 |
106 | def _canonicalize(node):
107 | if isinstance(node, list) and len(node) == 1:
108 | return _canonicalize(node[0])
109 | if isinstance(node, ast.Module):
110 | return _canonicalize(node.body)
111 | if isinstance(node, ast.Expr):
112 | return _canonicalize(node.value)
113 | return node
114 |
115 |
116 | def represent_same_program(node1, node2):
117 | """Whether AST nodes node1 and node2 represent the same program syntactically.
118 |
119 | Two programs are the same syntactically is they have equivalent ASTs, up to
120 | some small changes. The context field of Name nodes can change without the
121 | syntax represented by the AST changing. This allows for example for the short
122 | program 'x' (a read) to match with a subprogram 'x' of 'x = 3' (in which x is
123 | a write), since these two programs are the same syntactically ('x' and 'x').
124 |
125 | Except for the context field of Name nodes, the two nodes are recursively
126 | checked for exact equality.
127 |
128 | Args:
129 | node1: An AST node. This can be an ast.AST object, a primitive, or a list of
130 | AST nodes (primitives or ast.AST objects).
131 | node2: An AST node. This can be an ast.AST object, a primitive, or a list of
132 | AST nodes (primitives or ast.AST objects).
133 |
134 | Returns:
135 | Whether the two nodes represent equivalent programs.
136 | """
137 | node1 = _canonicalize(node1)
138 | node2 = _canonicalize(node2)
139 |
140 | if type(node1) != type(node2): # pylint: disable=unidiomatic-typecheck
141 | return False
142 | if not isinstance(node1, ast.AST):
143 | return node1 == node2
144 |
145 | fields1 = list(ast.iter_fields(node1))
146 | fields2 = list(ast.iter_fields(node2))
147 | if len(fields1) != len(fields2):
148 | return False
149 |
150 | for (field1, value1), (field2, value2) in zip(fields1, fields2):
151 | if field1 == 'ctx':
152 | continue
153 | if field1 != field2 or type(value1) is not type(value2):
154 | return False
155 | if isinstance(value1, list):
156 | for item1, item2 in zip(value1, value2):
157 | if not represent_same_program(item1, item2):
158 | return False
159 | elif not represent_same_program(value1, value2):
160 | return False
161 |
162 | return True
163 |
164 |
165 | class AccessVisitor(ast.NodeVisitor):
166 | """Visitor that computes an ordered list of accesses.
167 |
168 | Accesses are ordered based on a depth-first traversal of the AST, using the
169 | order of fields defined in `gast`, except for Assign nodes, for which the RHS
170 | is ordered before the LHS.
171 |
172 | This may differ from Python execution semantics in two ways:
173 |
174 | - Both branches sides of short-circuit `and`/`or` expressions or conditional
175 | `X if Y else Z` expressions are considered to be evaluated, even if one of
176 | them is actually skipped at runtime.
177 | - For AST nodes whose field order doesn't match the Python interpreter's
178 | evaluation order, the field order is used instead. Most AST nodes match
179 | execution order, but some differ (e.g. for dictionary literals, the
180 | interpreter alternates evaluating keys and values, but the field order has
181 | all keys and then all values). Assignments are a special case; the
182 | AccessVisitor evaluates the RHS first even though the LHS occurs first in
183 | the expression.
184 |
185 | Attributes:
186 | accesses: List of accesses encountered by the visitor.
187 | """
188 |
189 | # TODO(dbieber): Include accesses of ast.Subscript and ast.Attribute targets.
190 |
191 | def __init__(self):
192 | self.accesses = []
193 |
194 | def visit_Name(self, node):
195 | """Visit a Name, adding it to the list of accesses."""
196 | self.accesses.append(node)
197 |
198 | def visit_Assign(self, node):
199 | """Visit an Assign, ordering RHS accesses before LHS accesses."""
200 | self.visit(node.value)
201 | for target in node.targets:
202 | self.visit(target)
203 |
204 | def visit_AugAssign(self, node):
205 | """Visit an AugAssign, which contains both a read and a write."""
206 | # An AugAssign is a read as well as a write, even with the ctx of a write.
207 | self.visit(node.value)
208 | # Add a read access if we are assigning to a name.
209 | if isinstance(node.target, ast.Name):
210 | # TODO(dbieber): Use a proper type instead of a tuple for accesses.
211 | self.accesses.append(('read', node.target, node))
212 | # Add the write access as normal.
213 | self.visit(node.target)
214 |
215 |
216 | def get_accesses_from_ast_node(node):
217 | """Get all accesses for an AST node, in depth-first AST field order."""
218 | visitor = AccessVisitor()
219 | visitor.visit(node)
220 | return visitor.accesses
221 |
222 |
223 | def get_reads_from_ast_node(ast_node):
224 | """Get all reads for an AST node, in depth-first AST field order.
225 |
226 | Args:
227 | ast_node: The AST node of interest.
228 |
229 | Returns:
230 | A list of writes performed by that AST node.
231 | """
232 | return [
233 | access for access in get_accesses_from_ast_node(ast_node)
234 | if access_is_read(access)
235 | ]
236 |
237 |
238 | def get_writes_from_ast_node(ast_node):
239 | """Get all writes for an AST node, in depth-first AST field order.
240 |
241 | Args:
242 | ast_node: The AST node of interest.
243 |
244 | Returns:
245 | A list of writes performed by that AST node.
246 | """
247 | return [
248 | access for access in get_accesses_from_ast_node(ast_node)
249 | if access_is_write(access)
250 | ]
251 |
252 |
253 | def create_writes(node, parent=None):
254 | # TODO(dbieber): Use a proper type instead of a tuple for accesses.
255 | if isinstance(node, ast.AST):
256 | return [
257 | ('write', n, parent) for n in ast.walk(node) if isinstance(n, ast.Name)
258 | ]
259 | else:
260 | return [('write', node, parent)]
261 |
262 |
263 | def access_is_read(access):
264 | if isinstance(access, ast.AST):
265 | assert isinstance(access, ast.Name), access
266 | return isinstance(access.ctx, READ_CONTEXTS)
267 | else:
268 | return access[0] == 'read'
269 |
270 |
271 | def access_is_write(access):
272 | if isinstance(access, ast.AST):
273 | assert isinstance(access, ast.Name), access
274 | return isinstance(access.ctx, WRITE_CONTEXTS)
275 | else:
276 | return access[0] == 'write'
277 |
278 |
279 | def access_name(access):
280 | if isinstance(access, ast.AST):
281 | return access.id
282 | elif isinstance(access, tuple):
283 | if isinstance(access[1], six.string_types):
284 | return access[1]
285 | elif isinstance(access[1], ast.Name):
286 | return access[1].id
287 | raise ValueError('Unexpected access type.', access)
288 |
289 |
290 | def access_kind(access):
291 | if access_is_read(access):
292 | return 'read'
293 | elif access_is_write(access):
294 | return 'write'
295 |
296 |
297 | def access_kind_and_name(access):
298 | return '{}-{}'.format(access_kind(access), access_name(access))
299 |
300 |
301 | def access_identifier(name, kind):
302 | return '{}-{}'.format(kind, name)
303 |
304 |
305 | class Instruction(object):
306 | # pyformat:disable
307 | """Represents an executable unit of a Python program.
308 |
309 | An Instruction is a part of an AST corresponding to a simple statement or
310 | assignment, not corresponding to control flow. The part of the AST is not
311 | necessarily an AST node. It may be an AST node, or it may instead be a string
312 | (such as a variable name).
313 |
314 | Instructions play an important part in control flow graphs. An Instruction
315 | is the smallest unit of a control flow graph (wrapped in a ControlFlowNode).
316 | A control flow graph consists of basic blocks which represent a sequence of
317 | Instructions that are executed in a straight-line manner, or not at all.
318 |
319 | Conceptually an Instruction is immutable. This means that while Python does
320 | permit the mutation of an Instruction, in practice an Instruction object
321 | should not be modified once it is created.
322 |
323 | Note that an Instruction may be interrupted by an exception mid-execution.
324 | This is captured in control flow graphs via interrupting exits from basic
325 | blocks to either exception handlers or special 'raises' blocks.
326 |
327 | In addition to pure simple statements, an Instruction can represent a number
328 | of different parts of code. These are all listed explicitly in the module
329 | docstring.
330 |
331 | In the common case, the accesses made by an Instruction are given by the Name
332 | AST nodes contained in the Instruction's AST node. In some cases, when the
333 | instruction.source field is not None, the accesses made by an Instruction are
334 | not simply the Name AST nodes of the Instruction's node. For example, in a
335 | function definition, the only access is the assignment of the function def to
336 | the variable with the function's name; the Name nodes contained in the
337 | function definition are not part of the function definition Instruction, and
338 | instead are part of other Instructions that make up the function. The set of
339 | accesses made by an Instruction is computed when the Instruction is created
340 | and available via the accesses attribute of the Instruction.
341 |
342 | Attributes:
343 | node: The AST node corresponding to the instruction.
344 | accesses: (optional) An ordered list of all reads and writes made by this
345 | instruction. Each item in `accesses` is one of either:
346 | - A 3-tuple with fields (kind, node, parent). kind is either 'read' or
347 | 'write'. node is either a string or Name AST node. parent is an AST
348 | node where node occurs.
349 | - A Name AST node
350 | # TODO(dbieber): Use a single type for all accesses.
351 | source: (optional) The source of the writes. For example in the for loop
352 | `for x in items: pass` there is a instruction for the Name node "x". Its
353 | source is ITERATOR, indicating that this instruction corresponds to x
354 | being assigned a value from an iterator. When source is not None, the
355 | Python code corresponding to the instruction does not coincide with the
356 | Python code corresponding to the instruction's node.
357 | """
358 | # pyformat:enable
359 |
360 | def __init__(self, node, accesses=None, source=None):
361 | if not isinstance(node, ast.AST):
362 | raise TypeError('node must be an instance of ast.AST.', node)
363 | self.node = node
364 | if accesses is None:
365 | accesses = get_accesses_from_ast_node(node)
366 | self.accesses = accesses
367 | self.source = source
368 |
369 | def contains_subprogram(self, node):
370 | """Whether this Instruction contains the given AST as a subprogram.
371 |
372 | Computes whether `node` is a subtree of this Instruction's AST.
373 | If the Instruction represents an implied write, then the node must match
374 | against the Instruction's writes.
375 |
376 | Args:
377 | node: The node to check the instruction against for a match.
378 |
379 | Returns:
380 | (bool) Whether or not this Instruction contains the node, syntactically.
381 | """
382 | if self.source is not None:
383 | # Only exact matches are permissible if source is not None.
384 | return represent_same_program(node, self.node)
385 | for subtree in ast.walk(self.node):
386 | if represent_same_program(node, subtree):
387 | return True
388 | return False
389 |
390 | def get_reads(self):
391 | return {access for access in self.accesses if access_is_read(access)}
392 |
393 | def get_read_names(self):
394 | return {access_name(access) for access in self.get_reads()}
395 |
396 | def get_writes(self):
397 | return {access for access in self.accesses if access_is_write(access)}
398 |
399 | def get_write_names(self):
400 | return {access_name(access) for access in self.get_writes()}
401 |
--------------------------------------------------------------------------------
/python_graphs/instruction_test.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for instruction module."""
16 |
17 | from absl.testing import absltest
18 | import gast as ast
19 | from python_graphs import instruction as instruction_module
20 |
21 |
22 | def create_instruction(source):
23 | node = ast.parse(source)
24 | node = instruction_module._canonicalize(node)
25 | return instruction_module.Instruction(node)
26 |
27 |
28 | class InstructionTest(absltest.TestCase):
29 |
30 | def test_instruction(self):
31 | self.assertIsNotNone(instruction_module.Instruction)
32 |
33 | def test_represent_same_program_basic_positive_case(self):
34 | program1 = ast.parse('x + 1')
35 | program2 = ast.parse('x + 1')
36 | self.assertTrue(
37 | instruction_module.represent_same_program(program1, program2))
38 |
39 | def test_represent_same_program_basic_negative_case(self):
40 | program1 = ast.parse('x + 1')
41 | program2 = ast.parse('x + 2')
42 | self.assertFalse(
43 | instruction_module.represent_same_program(program1, program2))
44 |
45 | def test_represent_same_program_different_contexts(self):
46 | full_program1 = ast.parse('y = x + 1') # y is a write
47 | program1 = full_program1.body[0].targets[0] # 'y'
48 | program2 = ast.parse('y') # y is a read
49 | self.assertTrue(
50 | instruction_module.represent_same_program(program1, program2))
51 |
52 | def test_get_accesses(self):
53 | instruction = create_instruction('x + 1')
54 | self.assertEqual(instruction.get_read_names(), {'x'})
55 | self.assertEqual(instruction.get_write_names(), set())
56 |
57 | instruction = create_instruction('return x + y + z')
58 | self.assertEqual(instruction.get_read_names(), {'x', 'y', 'z'})
59 | self.assertEqual(instruction.get_write_names(), set())
60 |
61 | instruction = create_instruction('fn(a, b, c)')
62 | self.assertEqual(instruction.get_read_names(), {'a', 'b', 'c', 'fn'})
63 | self.assertEqual(instruction.get_write_names(), set())
64 |
65 | instruction = create_instruction('c = fn(a, b, c)')
66 | self.assertEqual(instruction.get_read_names(), {'a', 'b', 'c', 'fn'})
67 | self.assertEqual(instruction.get_write_names(), {'c'})
68 |
69 | def test_get_accesses_augassign(self):
70 | instruction = create_instruction('x += 1')
71 | self.assertEqual(instruction.get_read_names(), {'x'})
72 | self.assertEqual(instruction.get_write_names(), {'x'})
73 |
74 | instruction = create_instruction('x *= y')
75 | self.assertEqual(instruction.get_read_names(), {'x', 'y'})
76 | self.assertEqual(instruction.get_write_names(), {'x'})
77 |
78 | def test_get_accesses_augassign_subscript(self):
79 | instruction = create_instruction('x[0] *= y')
80 | # This is not currently considered a write of x. It is a read of x.
81 | self.assertEqual(instruction.get_read_names(), {'x', 'y'})
82 | self.assertEqual(instruction.get_write_names(), set())
83 |
84 | def test_get_accesses_augassign_attribute(self):
85 | instruction = create_instruction('x.attribute *= y')
86 | # This is not currently considered a write of x. It is a read of x.
87 | self.assertEqual(instruction.get_read_names(), {'x', 'y'})
88 | self.assertEqual(instruction.get_write_names(), set())
89 |
90 | def test_get_accesses_subscript(self):
91 | instruction = create_instruction('x[0] = y')
92 | # This is not currently considered a write of x. It is a read of x.
93 | self.assertEqual(instruction.get_read_names(), {'x', 'y'})
94 | self.assertEqual(instruction.get_write_names(), set())
95 |
96 | def test_get_accesses_attribute(self):
97 | instruction = create_instruction('x.attribute = y')
98 | # This is not currently considered a write of x. It is a read of x.
99 | self.assertEqual(instruction.get_read_names(), {'x', 'y'})
100 | self.assertEqual(instruction.get_write_names(), set())
101 |
102 | def test_access_ordering(self):
103 | instruction = create_instruction('c = fn(a, b + c, d / a)')
104 | access_names_and_kinds = [(instruction_module.access_name(access),
105 | instruction_module.access_kind(access))
106 | for access in instruction.accesses]
107 | self.assertEqual(access_names_and_kinds, [('fn', 'read'), ('a', 'read'),
108 | ('b', 'read'), ('c', 'read'),
109 | ('d', 'read'), ('a', 'read'),
110 | ('c', 'write')])
111 |
112 | instruction = create_instruction('c += fn(a, b + c, d / a)')
113 | access_names_and_kinds = [(instruction_module.access_name(access),
114 | instruction_module.access_kind(access))
115 | for access in instruction.accesses]
116 | self.assertEqual(access_names_and_kinds, [('fn', 'read'), ('a', 'read'),
117 | ('b', 'read'), ('c', 'read'),
118 | ('d', 'read'), ('a', 'read'),
119 | ('c', 'read'), ('c', 'write')])
120 |
121 |
122 | if __name__ == '__main__':
123 | absltest.main()
124 |
--------------------------------------------------------------------------------
/python_graphs/program_graph.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Creates ProgramGraphs from a program or function's AST.
16 |
17 | A ProgramGraph represents a Python program or function. The nodes in a
18 | ProgramGraph represent an Instruction (see instruction.py), an AST node, or a
19 | piece of syntax from the program. The edges in a ProgramGraph represent the
20 | relationships between these nodes.
21 | """
22 |
23 | import codecs
24 | import collections
25 | import os
26 |
27 | from absl import logging
28 | import astunparse
29 | from astunparse import unparser
30 | import gast as ast
31 | from python_graphs import control_flow
32 | from python_graphs import data_flow
33 | from python_graphs import instruction as instruction_module
34 | from python_graphs import program_graph_dataclasses as pb
35 | from python_graphs import program_utils
36 | from python_graphs import unparser_patch # pylint: disable=unused-import
37 |
38 | import six
39 | from six.moves import builtins
40 | from six.moves import filter
41 |
42 | NEWLINE_TOKEN = '#NEWLINE#'
43 | UNINDENT_TOKEN = '#UNINDENT#'
44 | INDENT_TOKEN = '#INDENT#'
45 |
46 |
47 | class ProgramGraph(object):
48 | """A ProgramGraph represents a Python program or function.
49 |
50 | Attributes:
51 | root_id: The id of the root ProgramGraphNode.
52 | nodes: Maps from node id to the ProgramGraphNode with that id.
53 | edges: A list of the edges (from_node.id, to_node.id, edge type) in the
54 | graph.
55 | child_map: Maps from node id to a list of that node's AST children node ids.
56 | parent_map: Maps from node id to that node's AST parent node id.
57 | neighbors_map: Maps from node id to a list of that node's neighboring edges.
58 | ast_id_to_program_graph_node: Maps from an AST node's object id to the
59 | corresponding AST program graph node, if it exists.
60 | root: The root ProgramGraphNode.
61 | """
62 |
63 | def __init__(self):
64 | """Constructs an empty ProgramGraph with no root."""
65 | self.root_id = None
66 |
67 | self.nodes = {}
68 | # TODO(charlessutton): Seems odd to have Edge proto objects as part of the
69 | # program graph object if node protos aren't. Consider a more consistent
70 | # treatment.
71 | self.edges = []
72 |
73 | self.ast_id_to_program_graph_node = {}
74 | self.child_map = collections.defaultdict(list)
75 | self.parent_map = collections.defaultdict(lambda: None)
76 | self.neighbors_map = collections.defaultdict(list)
77 |
78 | # Accessors
79 | @property
80 | def root(self):
81 | if self.root_id not in self.nodes:
82 | raise ValueError('Graph has no root node.')
83 | return self.nodes[self.root_id]
84 |
85 | def all_nodes(self):
86 | return self.nodes.values()
87 |
88 | def get_node(self, obj):
89 | """Returns the node in the program graph corresponding to an object.
90 |
91 | Arguments:
92 | obj: Can be an integer, AST node, ProgramGraphNode, or program graph node
93 | protobuf.
94 |
95 | Raises:
96 | ValueError: no node exists in the program graph matching obj.
97 | """
98 | if isinstance(obj, six.integer_types) and obj in self.nodes:
99 | return self.get_node_by_id(obj)
100 | elif isinstance(obj, ProgramGraphNode):
101 | # assert obj in self.nodes.values()
102 | return obj
103 | elif isinstance(obj, pb.Node):
104 | return self.get_node_by_id(obj.id)
105 | elif isinstance(obj, (ast.AST, list)):
106 | return self.get_node_by_ast_node(obj)
107 | else:
108 | raise ValueError('Unexpected value for obj.', obj)
109 |
110 | def get_node_by_id(self, obj):
111 | """Gets a ProgramGraph node for the given integer id."""
112 | return self.nodes[obj]
113 |
114 | def get_node_by_access(self, access):
115 | """Gets a ProgramGraph node for the given read or write."""
116 | if isinstance(access, ast.Name):
117 | return self.get_node(access)
118 | else:
119 | assert isinstance(access, tuple)
120 | if isinstance(access[1], ast.Name):
121 | return self.get_node(access[1])
122 | else:
123 | return self.get_node(access[2])
124 | raise ValueError('Could not find node for access.', access)
125 |
126 | def get_nodes_by_source(self, source):
127 | """Generates the nodes in the program graph containing the query source.
128 |
129 | Args:
130 | source: The query source.
131 |
132 | Returns:
133 | A generator of all nodes in the program graph with an Instruction with
134 | source that includes the query source.
135 | """
136 | module = ast.parse(source, mode='exec') # TODO(dbieber): Factor out 4 lines
137 | # TODO(dbieber): Use statements beyond the first statement from source.
138 | node = module.body[0]
139 | # If the query source is an Expression, and the matching instruction matches
140 | # the value field of that Expression, then the matching instruction is
141 | # considered a match. This allows us to match subexpressions which appear in
142 | # ast.Expr nodes in the query but not in the parent.
143 | if isinstance(node, ast.Expr):
144 | node = node.value
145 |
146 | def matches_source(pg_node):
147 | if pg_node.has_instruction():
148 | return pg_node.instruction.contains_subprogram(node)
149 | else:
150 | return instruction_module.represent_same_program(pg_node.ast_node, node)
151 |
152 | return filter(matches_source, self.nodes.values())
153 |
154 | def get_node_by_source(self, node):
155 | # We use min since nodes can contain each other and we want the most
156 | # specific one.
157 | return min(
158 | self.get_nodes_by_source(node), key=lambda x: len(ast.dump(x.node)))
159 |
160 | def get_nodes_by_function_name(self, name):
161 | return filter(
162 | lambda n: n.has_instance_of(ast.FunctionDef) and n.node.name == name,
163 | self.nodes.values())
164 |
165 | def get_node_by_function_name(self, name):
166 | return next(self.get_nodes_by_function_name(name))
167 |
168 | def get_node_by_ast_node(self, ast_node):
169 | return self.ast_id_to_program_graph_node[id(ast_node)]
170 |
171 | def contains_ast_node(self, ast_node):
172 | return id(ast_node) in self.ast_id_to_program_graph_node
173 |
174 | def get_ast_nodes_of_type(self, ast_type):
175 | for node in six.itervalues(self.nodes):
176 | if node.node_type == pb.NodeType.AST_NODE and node.ast_type == ast_type:
177 | yield node
178 |
179 | # TODO(dbieber): Unify selectors across program_graph and control_flow.
180 | def get_nodes_by_source_and_identifier(self, source, name):
181 | for pg_node in self.get_nodes_by_source(source):
182 | for node in ast.walk(pg_node.node):
183 | if isinstance(node, ast.Name) and node.id == name:
184 | if self.contains_ast_node(node):
185 | yield self.get_node_by_ast_node(node)
186 |
187 | def get_node_by_source_and_identifier(self, source, name):
188 | return next(self.get_nodes_by_source_and_identifier(source, name))
189 |
190 | # Graph Construction Methods
191 | def add_node(self, node):
192 | """Adds a ProgramGraphNode to this graph.
193 |
194 | Args:
195 | node: The ProgramGraphNode that should be added.
196 |
197 | Returns:
198 | The node that was added.
199 |
200 | Raises:
201 | ValueError: the node has already been added to this graph.
202 | """
203 | assert isinstance(node, ProgramGraphNode), 'Not a ProgramGraphNode'
204 | if node.id in self.nodes:
205 | raise ValueError('Already contains node', self.nodes[node.id], node.id)
206 | if node.ast_node is not None:
207 | if self.contains_ast_node(node.ast_node):
208 | raise ValueError('Already contains ast node', node.ast_node)
209 | self.ast_id_to_program_graph_node[id(node.ast_node)] = node
210 | self.nodes[node.id] = node
211 | return node
212 |
213 | def add_node_from_instruction(self, instruction):
214 | """Adds a node to the program graph."""
215 | node = make_node_from_instruction(instruction)
216 | return self.add_node(node)
217 |
218 | def add_edge(self, edge):
219 | """Adds an edge between two nodes in the graph.
220 |
221 | Args:
222 | edge: The edge, a pb.Edge proto.
223 | """
224 | assert isinstance(edge, pb.Edge), 'Not a pb.Edge'
225 | self.edges.append(edge)
226 |
227 | n1 = self.get_node_by_id(edge.id1)
228 | n2 = self.get_node_by_id(edge.id2)
229 | if edge.type == pb.EdgeType.FIELD: # An AST node.
230 | self.child_map[edge.id1].append(edge.id2)
231 | # TODO(charlessutton): Add the below sanity check back once Instruction
232 | # updates are complete.
233 | # pylint: disable=line-too-long
234 | # other_parent_id = self.parent_map[edge.id2]
235 | # if other_parent_id and other_parent_id != edge.id1:
236 | # raise Exception('Node {} {} with two parents\n {} {}\n {} {}'
237 | # .format(edge.id2, dump_node(self.get_node(edge.id2)),
238 | # edge.id1, dump_node(self.get_node(edge.id1)),
239 | # other_parent_id, dump_node(self.get_node(other_parent_id))))
240 | # pylint: enable=line-too-long
241 | self.parent_map[n2.id] = edge.id1
242 | self.neighbors_map[n1.id].append((edge, edge.id2))
243 | self.neighbors_map[n2.id].append((edge, edge.id1))
244 |
245 | def remove_edge(self, edge):
246 | """Removes an edge from the graph.
247 |
248 | If there are multiple copies of the same edge, only one copy is removed.
249 |
250 | Args:
251 | edge: The edge, a pb.Edge proto.
252 | """
253 | self.edges.remove(edge)
254 |
255 | n1 = self.get_node_by_id(edge.id1)
256 | n2 = self.get_node_by_id(edge.id2)
257 |
258 | if edge.type == pb.EdgeType.FIELD: # An AST node.
259 | self.child_map[edge.id1].remove(edge.id2)
260 | del self.parent_map[n2.id]
261 |
262 | self.neighbors_map[n1.id].remove((edge, edge.id2))
263 | self.neighbors_map[n2.id].remove((edge, edge.id1))
264 |
265 | def add_new_edge(self, n1, n2, edge_type=None, field_name=None):
266 | """Adds a new edge between two nodes in the graph.
267 |
268 | Both nodes must already be part of the graph.
269 |
270 | Args:
271 | n1: Specifies the from node of the edge. Can be any object type accepted
272 | by get_node.
273 | n2: Specifies the to node of the edge. Can be any object type accepted by
274 | get_node.
275 | edge_type: The type of edge. Can be any integer in the pb.Edge enum.
276 | field_name: For AST edges, a string describing the Python AST field
277 |
278 | Returns:
279 | The new edge.
280 | """
281 | n1 = self.get_node(n1)
282 | n2 = self.get_node(n2)
283 | new_edge = pb.Edge(
284 | id1=n1.id, id2=n2.id, type=edge_type, field_name=field_name)
285 | self.add_edge(new_edge)
286 | return new_edge
287 |
288 | # AST Methods
289 | # TODO(charlessutton): Consider whether AST manipulation should be moved
290 | # e.g., to a more general graph object.
291 | def to_ast(self, node=None):
292 | """Convert the program graph to a Python AST."""
293 | if node is None:
294 | node = self.root
295 | return self._build_ast(node=node, update_references=False)
296 |
297 | def reconstruct_ast(self):
298 | """Reconstruct all internal ProgramGraphNode.ast_node references.
299 |
300 | After calling this method, all nodes of type AST_NODE will have their
301 | `ast_node` property refer to subtrees of a reconstructed AST object, and
302 | self.ast_id_to_program_graph_node will contain only entries from this new
303 | AST.
304 |
305 | Note that only AST nodes reachable by fields from the root node will be
306 | converted; this should be all of them but this is not checked.
307 | """
308 | self.ast_id_to_program_graph_node.clear()
309 | self._build_ast(node=self.root, update_references=True)
310 |
311 | def _build_ast(self, node, update_references):
312 | """Helper method: builds an AST and optionally sets ast_node references.
313 |
314 | Args:
315 | node: Program graph node to build an AST for.
316 | update_references: Whether to modify this node and all of its children so
317 | that they point to the reconstructed AST node.
318 |
319 | Returns:
320 | AST node corresponding to the program graph node.
321 | """
322 | if node.node_type == pb.NodeType.AST_NODE:
323 | ast_node = getattr(ast, node.ast_type)()
324 | adjacent_edges = self.neighbors_map[node.id]
325 | for edge, other_node_id in adjacent_edges:
326 | if other_node_id == edge.id1: # it's an incoming edge
327 | continue
328 | if edge.type == pb.EdgeType.FIELD:
329 | child_id = other_node_id
330 | child = self.get_node_by_id(child_id)
331 | setattr(
332 | ast_node, edge.field_name,
333 | self._build_ast(node=child, update_references=update_references))
334 | if update_references:
335 | node.ast_node = ast_node
336 | self.ast_id_to_program_graph_node[id(ast_node)] = node
337 | return ast_node
338 | elif node.node_type == pb.NodeType.AST_LIST:
339 | list_items = {}
340 | adjacent_edges = self.neighbors_map[node.id]
341 | for edge, other_node_id in adjacent_edges:
342 | if other_node_id == edge.id1: # it's an incoming edge
343 | continue
344 | if edge.type == pb.EdgeType.FIELD:
345 | child_id = other_node_id
346 | child = self.get_node_by_id(child_id)
347 | unused_field_name, index = parse_list_field_name(edge.field_name)
348 | list_items[index] = self._build_ast(
349 | node=child, update_references=update_references)
350 |
351 | ast_list = []
352 | for index in six.moves.range(len(list_items)):
353 | ast_list.append(list_items[index])
354 | return ast_list
355 | elif node.node_type == pb.NodeType.AST_VALUE:
356 | return node.ast_value
357 | else:
358 | raise ValueError('This ProgramGraphNode does not correspond to a node in'
359 | ' an AST.')
360 |
361 | def walk_ast_descendants(self, node=None):
362 | """Yields the nodes that correspond to the descendants of node in the AST.
363 |
364 | Args:
365 | node: the node in the program graph corresponding to the root of the AST
366 | subtree that should be walked. If None, defaults to the root of the
367 | program graph.
368 |
369 | Yields:
370 | All nodes corresponding to descendants of node in the AST.
371 | """
372 | if node is None:
373 | node = self.root
374 | frontier = [node]
375 | while frontier:
376 | current = frontier.pop()
377 | for child_id in reversed(self.child_map[current.id]):
378 | frontier.append(self.get_node_by_id(child_id))
379 | yield current
380 |
381 | def parent(self, node):
382 | """Returns the AST parent of an AST program graph node.
383 |
384 | Args:
385 | node: A ProgramGraphNode.
386 |
387 | Returns:
388 | The node's AST parent, which is also a ProgramGraphNode.
389 | """
390 | parent_id = self.parent_map[node.id]
391 | if parent_id is None:
392 | return None
393 | else:
394 | return self.get_node_by_id(parent_id)
395 |
396 | def children(self, node):
397 | """Yields the (direct) AST children of an AST program graph node.
398 |
399 | Args:
400 | node: A ProgramGraphNode.
401 |
402 | Yields:
403 | The AST children of node, which are ProgramGraphNode objects.
404 | """
405 | for child_id in self.child_map[node.id]:
406 | yield self.get_node_by_id(child_id)
407 |
408 | def neighbors(self, node, edge_type=None):
409 | """Returns the incoming and outgoing neighbors of a program graph node.
410 |
411 | Args:
412 | node: A ProgramGraphNode.
413 | edge_type: If provided, only edges of this type are considered.
414 |
415 | Returns:
416 | The incoming and outgoing neighbors of node, which are ProgramGraphNode
417 | objects but not necessarily AST nodes.
418 | """
419 | adj_edges = self.neighbors_map[node.id]
420 | if edge_type is None:
421 | ids = list(tup[1] for tup in adj_edges)
422 | else:
423 | ids = list(tup[1] for tup in adj_edges if tup[0].type == edge_type)
424 | return [self.get_node_by_id(id0) for id0 in ids]
425 |
426 | def incoming_neighbors(self, node, edge_type=None):
427 | """Returns the incoming neighbors of a program graph node.
428 |
429 | Args:
430 | node: A ProgramGraphNode.
431 | edge_type: If provided, only edges of this type are considered.
432 |
433 | Returns:
434 | The incoming neighbors of node, which are ProgramGraphNode objects but not
435 | necessarily AST nodes.
436 | """
437 | adj_edges = self.neighbors_map[node.id]
438 | result = []
439 | for edge, neighbor_id in adj_edges:
440 | if edge.id2 == node.id:
441 | if (edge_type is None) or (edge.type == edge_type):
442 | result.append(self.get_node_by_id(neighbor_id))
443 | return result
444 |
445 | def outgoing_neighbors(self, node, edge_type=None):
446 | """Returns the outgoing neighbors of a program graph node.
447 |
448 | Args:
449 | node: A ProgramGraphNode.
450 | edge_type: If provided, only edges of this type are considered.
451 |
452 | Returns:
453 | The outgoing neighbors of node, which are ProgramGraphNode objects but not
454 | necessarily AST nodes.
455 | """
456 | adj_edges = self.neighbors_map[node.id]
457 | result = []
458 | for edge, neighbor_id in adj_edges:
459 | if edge.id1 == node.id:
460 | if (edge_type is None) or (edge.type == edge_type):
461 | result.append(self.get_node_by_id(neighbor_id))
462 | return result
463 |
464 | def dump_tree(self, start_node=None):
465 | """Returns a string representation for debugging."""
466 |
467 | def dump_tree_recurse(node, indent, all_lines):
468 | """Create a string representation for a subtree."""
469 | indent_str = ' ' + ('--' * indent)
470 | node_str = dump_node(node)
471 | line = ' '.join([indent_str, node_str, '\n'])
472 | all_lines.append(line)
473 | # output long distance edges
474 | for edge, neighbor_id in self.neighbors_map[node.id]:
475 | if (not is_ast_edge(edge) and not is_syntax_edge(edge) and
476 | node.id == edge.id1):
477 | type_str = edge.type.name
478 | line = [indent_str, '--((', type_str, '))-->', str(neighbor_id), '\n']
479 | all_lines.append(' '.join(line))
480 | for child in self.children(node):
481 | dump_tree_recurse(child, indent + 1, all_lines)
482 | return all_lines
483 |
484 | if start_node is None:
485 | start_node = self.root
486 | return ''.join(dump_tree_recurse(start_node, 0, []))
487 |
488 | # TODO(charlessutton): Consider whether this belongs in ProgramGraph
489 | # or in make_synthesis_problems.
490 | def copy_with_placeholder(self, node):
491 | """Returns a new program graph in which the subtree of NODE is removed.
492 |
493 | In the new graph, the subtree headed by NODE is replaced by a single
494 | node of type PLACEHOLDER, which is connected to the AST parent of NODE
495 | by the same edge type as in the original graph.
496 |
497 | The new program graph will share structure (i.e. the ProgramGraphNode
498 | objects) with the original graph.
499 |
500 | Args:
501 | node: A node in this program graph
502 |
503 | Returns:
504 | A new ProgramGraph object with NODE replaced
505 | """
506 | descendant_ids = {n.id for n in self.walk_ast_descendants(node)}
507 | new_graph = ProgramGraph()
508 | new_graph.add_node(self.root)
509 | new_graph.root_id = self.root_id
510 | for edge in self.edges:
511 | v1 = self.nodes[edge.id1]
512 | v2 = self.nodes[edge.id2]
513 | # Omit edges that are adjacent to the subtree rooted at `node` UNLESS this
514 | # is the AST edge to the root of the subtree.
515 | # In that case, create an edge to a new placeholder node
516 | adj_bad_subtree = ((edge.id1 in descendant_ids) or
517 | (edge.id2 in descendant_ids))
518 | if adj_bad_subtree:
519 | if edge.id2 == node.id and is_ast_edge(edge):
520 | placeholder = ProgramGraphNode()
521 | placeholder.node_type = pb.NodeType.PLACEHOLDER
522 | placeholder.id = node.id
523 | new_graph.add_node(placeholder)
524 | new_graph.add_new_edge(v1, placeholder, edge_type=edge.type)
525 | else:
526 | # nodes on the edge have not been added yet
527 | if edge.id1 not in new_graph.nodes:
528 | new_graph.add_node(v1)
529 | if edge.id2 not in new_graph.nodes:
530 | new_graph.add_node(v2)
531 | new_graph.add_new_edge(v1, v2, edge_type=edge.type)
532 | return new_graph
533 |
534 | def copy_subgraph(self, node):
535 | """Returns a new program graph containing only the subtree rooted at NODE.
536 |
537 | All edges that connect nodes in the subtree are included, both AST edges
538 | and other types of edges.
539 |
540 | Args:
541 | node: A node in this program graph
542 |
543 | Returns:
544 | A new ProgramGraph object whose root is NODE
545 | """
546 | descendant_ids = {n.id for n in self.walk_ast_descendants(node)}
547 | new_graph = ProgramGraph()
548 | new_graph.add_node(node)
549 | new_graph.root_id = node.id
550 | for edge in self.edges:
551 | v1 = self.nodes[edge.id1]
552 | v2 = self.nodes[edge.id2]
553 | # Omit edges that are adjacent to the subtree rooted at NODE
554 | # UNLESS this is the AST edge to the root of the subtree.
555 | # In that case, create an edge to a new placeholder node
556 | good_edge = ((edge.id1 in descendant_ids) and
557 | (edge.id2 in descendant_ids))
558 | if good_edge:
559 | if edge.id1 not in new_graph.nodes:
560 | new_graph.add_node(v1)
561 | if edge.id2 not in new_graph.nodes:
562 | new_graph.add_node(v2)
563 | new_graph.add_new_edge(v1, v2, edge_type=edge.type)
564 | return new_graph
565 |
566 |
567 | def is_ast_node(node):
568 | return node.node_type == pb.NodeType.AST_NODE
569 |
570 |
571 | def is_ast_edge(edge):
572 | # TODO(charlessutton): Expand to enumerate edge types in gast.
573 | return edge.type == pb.EdgeType.FIELD
574 |
575 |
576 | def is_syntax_edge(edge):
577 | return edge.type == pb.EdgeType.SYNTAX
578 |
579 |
580 | def dump_node(node):
581 | type_str = '[' + node.node_type.name + ']'
582 | elements = [type_str, str(node.id), node.ast_type]
583 | if node.ast_value:
584 | elements.append(str(node.ast_value))
585 | if node.syntax:
586 | elements.append(str(node.syntax))
587 | return ' '.join(elements)
588 |
589 |
590 | def get_program_graph(program):
591 | """Constructs a program graph to represent the given program."""
592 | program_node = program_utils.program_to_ast(program) # An AST node.
593 |
594 | # TODO(dbieber): Refactor sections of graph building into separate functions.
595 | program_graph = ProgramGraph()
596 |
597 | # Perform control flow analysis.
598 | control_flow_graph = control_flow.get_control_flow_graph(program_node)
599 |
600 | # Add AST_NODE program graph nodes corresponding to Instructions in the
601 | # control flow graph.
602 | for control_flow_node in control_flow_graph.get_control_flow_nodes():
603 | program_graph.add_node_from_instruction(control_flow_node.instruction)
604 |
605 | # Add AST_NODE program graph nodes corresponding to AST nodes.
606 | for ast_node in ast.walk(program_node):
607 | if not program_graph.contains_ast_node(ast_node):
608 | pg_node = make_node_from_ast_node(ast_node)
609 | program_graph.add_node(pg_node)
610 |
611 | root = program_graph.get_node_by_ast_node(program_node)
612 | program_graph.root_id = root.id
613 |
614 | # Add AST edges (FIELD). Also add AST_LIST and AST_VALUE program graph nodes.
615 | for ast_node in ast.walk(program_node):
616 | for field_name, value in ast.iter_fields(ast_node):
617 | if isinstance(value, list):
618 | pg_node = make_node_for_ast_list()
619 | program_graph.add_node(pg_node)
620 | program_graph.add_new_edge(
621 | ast_node, pg_node, pb.EdgeType.FIELD, field_name)
622 | for index, item in enumerate(value):
623 | list_field_name = make_list_field_name(field_name, index)
624 | if isinstance(item, ast.AST):
625 | program_graph.add_new_edge(pg_node, item, pb.EdgeType.FIELD,
626 | list_field_name)
627 | else:
628 | item_node = make_node_from_ast_value(item)
629 | program_graph.add_node(item_node)
630 | program_graph.add_new_edge(pg_node, item_node, pb.EdgeType.FIELD,
631 | list_field_name)
632 | elif isinstance(value, ast.AST):
633 | program_graph.add_new_edge(
634 | ast_node, value, pb.EdgeType.FIELD, field_name)
635 | else:
636 | pg_node = make_node_from_ast_value(value)
637 | program_graph.add_node(pg_node)
638 | program_graph.add_new_edge(
639 | ast_node, pg_node, pb.EdgeType.FIELD, field_name)
640 |
641 | # Add SYNTAX_NODE nodes. Also add NEXT_SYNTAX and LAST_LEXICAL_USE edges.
642 | # Add these edges using a custom AST unparser to visit leaf nodes in preorder.
643 | SyntaxNodeUnparser(program_node, program_graph)
644 |
645 | # Perform data flow analysis.
646 | analysis = data_flow.LastAccessAnalysis()
647 | for node in control_flow_graph.get_enter_control_flow_nodes():
648 | analysis.visit(node)
649 |
650 | # Add control flow edges (CFG_NEXT).
651 | for control_flow_node in control_flow_graph.get_control_flow_nodes():
652 | instruction = control_flow_node.instruction
653 | for next_control_flow_node in control_flow_node.next:
654 | next_instruction = next_control_flow_node.instruction
655 | program_graph.add_new_edge(
656 | instruction.node, next_instruction.node,
657 | edge_type=pb.EdgeType.CFG_NEXT)
658 |
659 | # Add data flow edges (LAST_READ and LAST_WRITE).
660 | for control_flow_node in control_flow_graph.get_control_flow_nodes():
661 | # Start with the most recent accesses before this instruction.
662 | last_accesses = control_flow_node.get_label('last_access_in').copy()
663 | for access in control_flow_node.instruction.accesses:
664 | # Extract the node and identifiers for the current access.
665 | pg_node = program_graph.get_node_by_access(access)
666 | access_name = instruction_module.access_name(access)
667 | read_identifier = instruction_module.access_identifier(
668 | access_name, 'read')
669 | write_identifier = instruction_module.access_identifier(
670 | access_name, 'write')
671 | # Find previous reads.
672 | for read in last_accesses.get(read_identifier, []):
673 | read_pg_node = program_graph.get_node_by_access(read)
674 | program_graph.add_new_edge(
675 | pg_node, read_pg_node, edge_type=pb.EdgeType.LAST_READ)
676 | # Find previous writes.
677 | for write in last_accesses.get(write_identifier, []):
678 | write_pg_node = program_graph.get_node_by_access(write)
679 | program_graph.add_new_edge(
680 | pg_node, write_pg_node, edge_type=pb.EdgeType.LAST_WRITE)
681 | # Update the state to refer to this access as the most recent one.
682 | if instruction_module.access_is_read(access):
683 | last_accesses[read_identifier] = [access]
684 | elif instruction_module.access_is_write(access):
685 | last_accesses[write_identifier] = [access]
686 |
687 | # Add COMPUTED_FROM edges.
688 | for node in ast.walk(program_node):
689 | if isinstance(node, ast.Assign):
690 | for value_node in ast.walk(node.value):
691 | if isinstance(value_node, ast.Name):
692 | # TODO(dbieber): If possible, improve precision of these edges.
693 | for target in node.targets:
694 | program_graph.add_new_edge(
695 | target, value_node, edge_type=pb.EdgeType.COMPUTED_FROM)
696 |
697 | # Add CALLS, FORMAL_ARG_NAME and RETURNS_TO edges.
698 | for node in ast.walk(program_node):
699 | if isinstance(node, ast.Call):
700 | if isinstance(node.func, ast.Name):
701 | # TODO(dbieber): Use data flow analysis instead of all function defs.
702 | func_defs = list(program_graph.get_nodes_by_function_name(node.func.id))
703 | # For any possible last writes that are a function definition, add the
704 | # formal_arg_name and returns_to edges.
705 | if not func_defs:
706 | # TODO(dbieber): Add support for additional classes of functions,
707 | # such as attributes of known objects and builtins.
708 | if node.func.id in dir(builtins):
709 | message = 'Function is builtin.'
710 | else:
711 | message = 'Cannot statically determine the function being called.'
712 | logging.debug('%s (%s)', message, node.func.id)
713 | for func_def in func_defs:
714 | fn_node = func_def.node
715 | # Add calls edge from the call node to the function definition.
716 | program_graph.add_new_edge(node, fn_node, edge_type=pb.EdgeType.CALLS)
717 | # Add returns_to edges from the function's return statements to the
718 | # call node.
719 | for inner_node in ast.walk(func_def.node):
720 | # TODO(dbieber): Determine if the returns_to should instead go to
721 | # the next instruction after the Call node instead.
722 | if isinstance(inner_node, ast.Return):
723 | program_graph.add_new_edge(
724 | inner_node, node, edge_type=pb.EdgeType.RETURNS_TO)
725 |
726 | # Add formal_arg_name edges from the args of the Call node to the
727 | # args in the FunctionDef.
728 | for index, arg in enumerate(node.args):
729 | formal_arg = None
730 | if index < len(fn_node.args.args):
731 | formal_arg = fn_node.args.args[index]
732 | elif fn_node.args.vararg:
733 | # Since args.vararg is a string, we use the arguments node.
734 | # TODO(dbieber): Use a node specifically for the vararg.
735 | formal_arg = fn_node.args
736 | if formal_arg is not None:
737 | # Note: formal_arg can be an AST node or a string.
738 | program_graph.add_new_edge(
739 | arg, formal_arg, edge_type=pb.EdgeType.FORMAL_ARG_NAME)
740 | else:
741 | # TODO(dbieber): If formal_arg is None, then remove all
742 | # formal_arg_name edges for this FunctionDef.
743 | logging.debug('formal_arg is None')
744 | for keyword in node.keywords:
745 | name = keyword.arg
746 | formal_arg = None
747 | for arg in fn_node.args.args:
748 | if isinstance(arg, ast.Name) and arg.id == name:
749 | formal_arg = arg
750 | break
751 | else:
752 | if fn_node.args.kwarg:
753 | # Since args.kwarg is a string, we use the arguments node.
754 | # TODO(dbieber): Use a node specifically for the kwarg.
755 | formal_arg = fn_node.args
756 | if formal_arg is not None:
757 | program_graph.add_new_edge(
758 | keyword.value, formal_arg,
759 | edge_type=pb.EdgeType.FORMAL_ARG_NAME)
760 | else:
761 | # TODO(dbieber): If formal_arg is None, then remove all
762 | # formal_arg_name edges for this FunctionDef.
763 | logging.debug('formal_arg is None')
764 | else:
765 | # TODO(dbieber): Add a special case for Attributes.
766 | logging.debug(
767 | 'Cannot statically determine the function being called. (%s)',
768 | astunparse.unparse(node.func).strip())
769 |
770 | return program_graph
771 |
772 |
773 | class SyntaxNodeUnparser(unparser.Unparser):
774 | """An Unparser class helpful for creating Syntax Token nodes for fn graphs."""
775 |
776 | def __init__(self, ast_node, graph):
777 | self.graph = graph
778 |
779 | self.current_ast_node = None # The AST node currently being unparsed.
780 | self.last_syntax_node = None
781 | self.last_lexical_uses = {}
782 | self.last_indent = 0
783 |
784 | with codecs.open(os.devnull, 'w', encoding='utf-8') as devnull:
785 | super(SyntaxNodeUnparser, self).__init__(ast_node, file=devnull)
786 |
787 | def dispatch(self, ast_node):
788 | """Dispatcher function, dispatching tree type T to method _T."""
789 | tmp_ast_node = self.current_ast_node
790 | self.current_ast_node = ast_node
791 | super(SyntaxNodeUnparser, self).dispatch(ast_node)
792 | self.current_ast_node = tmp_ast_node
793 |
794 | def fill(self, text=''):
795 | """Indent a piece of text, according to the current indentation level."""
796 | text_with_whitespace = NEWLINE_TOKEN
797 | if self.last_indent > self._indent:
798 | text_with_whitespace += UNINDENT_TOKEN * (self.last_indent - self._indent)
799 | elif self.last_indent < self._indent:
800 | text_with_whitespace += INDENT_TOKEN * (self._indent - self.last_indent)
801 | self.last_indent = self._indent
802 | text_with_whitespace += text
803 | self._add_syntax_node(text_with_whitespace)
804 | super(SyntaxNodeUnparser, self).fill(text)
805 |
806 | def write(self, text):
807 | """Append a piece of text to the current line."""
808 | if isinstance(text, ast.AST): # text may be a Name, Tuple, or List node.
809 | return self.dispatch(text)
810 | self._add_syntax_node(text)
811 | super(SyntaxNodeUnparser, self).write(text)
812 |
813 | def _add_syntax_node(self, text):
814 | text = text.strip()
815 | if not text:
816 | return
817 | syntax_node = make_node_from_syntax(six.text_type(text))
818 | self.graph.add_node(syntax_node)
819 | self.graph.add_new_edge(
820 | self.current_ast_node, syntax_node, edge_type=pb.EdgeType.SYNTAX)
821 | if self.last_syntax_node:
822 | self.graph.add_new_edge(
823 | self.last_syntax_node, syntax_node, edge_type=pb.EdgeType.NEXT_SYNTAX)
824 | self.last_syntax_node = syntax_node
825 |
826 | def _Name(self, node):
827 | if node.id in self.last_lexical_uses:
828 | self.graph.add_new_edge(
829 | node,
830 | self.last_lexical_uses[node.id],
831 | edge_type=pb.EdgeType.LAST_LEXICAL_USE)
832 | self.last_lexical_uses[node.id] = node
833 | super(SyntaxNodeUnparser, self)._Name(node)
834 |
835 |
836 | class ProgramGraphNode(object):
837 | """A single node in a Program Graph.
838 |
839 | Corresponds to either a SyntaxNode or an Instruction (as in a
840 | ControlFlowGraph).
841 |
842 | Attributes:
843 | node_type: One of the node types from pb.NodeType.
844 | id: A unique id for the node.
845 | instruction: If applicable, the corresponding Instruction.
846 | ast_node: If available, the AST node corresponding to the ProgramGraphNode.
847 | ast_type: If available, the type of the AST node, as a string.
848 | ast_value: If available, the primitive Python value corresponding to the
849 | node.
850 | syntax: For SYNTAX_NODEs, the syntax information stored in the node.
851 | node: If available, the AST node for this program graph node or its
852 | instruction.
853 | """
854 |
855 | def __init__(self):
856 | self.node_type = None
857 | self.id = None
858 |
859 | self.instruction = None
860 | self.ast_node = None
861 | self.ast_type = ''
862 | self.ast_value = ''
863 | self.syntax = ''
864 |
865 | def has_instruction(self):
866 | return self.instruction is not None
867 |
868 | def has_instance_of(self, t):
869 | """Whether the node's instruction is an instance of type `t`."""
870 | if self.instruction is None:
871 | return False
872 | return isinstance(self.instruction.node, t)
873 |
874 | @property
875 | def node(self):
876 | if self.ast_node is not None:
877 | return self.ast_node
878 | if self.instruction is None:
879 | return None
880 | return self.instruction.node
881 |
882 | def __repr__(self):
883 | return str(self.id) + ' ' + str(self.ast_type)
884 |
885 |
886 | def make_node_from_syntax(text):
887 | node = ProgramGraphNode()
888 | node.node_type = pb.NodeType.SYNTAX_NODE
889 | node.id = program_utils.unique_id()
890 | node.syntax = text
891 | return node
892 |
893 |
894 | def make_node_from_instruction(instruction):
895 | """Creates a ProgramGraphNode corresponding to an existing Instruction.
896 |
897 | Args:
898 | instruction: An Instruction object.
899 |
900 | Returns:
901 | A ProgramGraphNode corresponding to that instruction.
902 | """
903 | ast_node = instruction.node
904 | node = make_node_from_ast_node(ast_node)
905 | node.instruction = instruction
906 | return node
907 |
908 |
909 | def make_node_from_ast_node(ast_node):
910 | """Creates a program graph node for the provided AST node.
911 |
912 | This is only called when the AST node doesn't already correspond to an
913 | Instruction in the program's control flow graph.
914 |
915 | Args:
916 | ast_node: An AST node from the program being analyzed.
917 |
918 | Returns:
919 | A node in the program graph corresponding to the AST node.
920 | """
921 | node = ProgramGraphNode()
922 | node.node_type = pb.NodeType.AST_NODE
923 | node.id = program_utils.unique_id()
924 | node.ast_node = ast_node
925 | node.ast_type = type(ast_node).__name__
926 | return node
927 |
928 |
929 | def make_node_for_ast_list():
930 | node = ProgramGraphNode()
931 | node.node_type = pb.NodeType.AST_LIST
932 | node.id = program_utils.unique_id()
933 | return node
934 |
935 |
936 | def make_node_from_ast_value(value):
937 | """Creates a ProgramGraphNode for the provided value.
938 |
939 | `value` is a primitive value appearing in a Python AST.
940 |
941 | For example, the number 1 in Python has AST Num(n=1). In this, the value '1'
942 | is a primitive appearing in the AST. It gets its own ProgramGraphNode with
943 | node_type AST_VALUE.
944 |
945 | Args:
946 | value: A primitive value appearing in an AST.
947 |
948 | Returns:
949 | A ProgramGraphNode corresponding to the provided value.
950 | """
951 | node = ProgramGraphNode()
952 | node.node_type = pb.NodeType.AST_VALUE
953 | node.id = program_utils.unique_id()
954 | node.ast_value = value
955 | return node
956 |
957 |
958 | def make_list_field_name(field_name, index):
959 | return '{}:{}'.format(field_name, index)
960 |
961 |
962 | def parse_list_field_name(list_field_name):
963 | field_name, index = list_field_name.split(':')
964 | index = int(index)
965 | return field_name, index
966 |
--------------------------------------------------------------------------------
/python_graphs/program_graph_dataclasses.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """The dataclasses for representing a Program Graph."""
16 |
17 | import enum
18 | from typing import List, Optional, Text
19 | import dataclasses
20 |
21 |
22 | class NodeType(enum.Enum):
23 | UNSPECIFIED = 0
24 | AST_NODE = 1
25 | AST_LIST = 2
26 | AST_VALUE = 3
27 | SYNTAX_NODE = 4
28 | PLACEHOLDER = 5
29 |
30 |
31 | @dataclasses.dataclass
32 | class Node:
33 | """Represents a node in a program graph."""
34 | id: int
35 | type: NodeType
36 |
37 | # If an AST node, a string that identifies what type of AST node,
38 | # e.g. "Num" or "Expr". These are defined by the underlying AST for the
39 | # language.
40 | ast_type: Optional[Text] = ""
41 |
42 | # Primitive valued AST node, such as:
43 | # - the name of an identifier for a Name node
44 | # - the number attached to a Num node
45 | # The corresponding ast_type value is the Python type of ast_value, not the
46 | # type of the parent AST node.
47 | ast_value_repr: Optional[Text] = ""
48 |
49 | # For syntax nodes, the syntax attached to the node.
50 | syntax: Optional[Text] = ""
51 |
52 |
53 | class EdgeType(enum.Enum):
54 | """The different kinds of edges that can appear in a program graph."""
55 | UNSPECIFIED = 0
56 | CFG_NEXT = 1
57 | LAST_READ = 2
58 | LAST_WRITE = 3
59 | COMPUTED_FROM = 4
60 | RETURNS_TO = 5
61 | FORMAL_ARG_NAME = 6
62 | FIELD = 7
63 | SYNTAX = 8
64 | NEXT_SYNTAX = 9
65 | LAST_LEXICAL_USE = 10
66 | CALLS = 11
67 |
68 |
69 | @dataclasses.dataclass
70 | class Edge:
71 | id1: int
72 | id2: int
73 | type: EdgeType
74 | field_name: Optional[Text] = None # For FIELD edges, the field name.
75 | has_back_edge: bool = False
76 |
77 |
78 | @dataclasses.dataclass
79 | class Graph:
80 | nodes: List[Node]
81 | edges: List[Edge]
82 | root_id: int
83 |
--------------------------------------------------------------------------------
/python_graphs/program_graph_graphviz.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Graphviz visualizations of Program Graphs."""
16 |
17 | from absl import logging # pylint: disable=unused-import
18 | import pygraphviz
19 | from python_graphs import program_graph_dataclasses as pb
20 | import six
21 |
22 |
23 | def to_graphviz(graph):
24 | """Creates a graphviz representation of a ProgramGraph.
25 |
26 | Args:
27 | graph: A ProgramGraph object to visualize.
28 | Returns:
29 | A pygraphviz object representing the ProgramGraph.
30 | """
31 | g = pygraphviz.AGraph(strict=False, directed=True)
32 | for unused_key, node in graph.nodes.items():
33 | node_attrs = {}
34 | if node.ast_type:
35 | node_attrs['label'] = six.ensure_str(node.ast_type, 'utf-8')
36 | else:
37 | node_attrs['shape'] = 'point'
38 | node_type_colors = {
39 | }
40 | if node.node_type in node_type_colors:
41 | node_attrs['color'] = node_type_colors[node.node_type]
42 | node_attrs['colorscheme'] = 'svg'
43 |
44 | g.add_node(node.id, **node_attrs)
45 | for edge in graph.edges:
46 | edge_attrs = {}
47 | edge_attrs['label'] = edge.type.name
48 | edge_colors = {
49 | pb.EdgeType.LAST_READ: 'red',
50 | pb.EdgeType.LAST_WRITE: 'red',
51 | }
52 | if edge.type in edge_colors:
53 | edge_attrs['color'] = edge_colors[edge.type]
54 | edge_attrs['colorscheme'] = 'svg'
55 | g.add_edge(edge.id1, edge.id2, **edge_attrs)
56 | return g
57 |
58 |
59 | def render(graph, path='/tmp/graph.png'):
60 | g = to_graphviz(graph)
61 | g.draw(path, prog='dot')
62 |
--------------------------------------------------------------------------------
/python_graphs/program_graph_graphviz_test.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for program_graph_graphviz.py."""
16 |
17 | import inspect
18 |
19 | from absl.testing import absltest
20 | from python_graphs import control_flow_test_components as tc
21 | from python_graphs import program_graph
22 | from python_graphs import program_graph_graphviz
23 |
24 |
25 | class ControlFlowGraphvizTest(absltest.TestCase):
26 |
27 | def test_to_graphviz_for_all_test_components(self):
28 | for unused_name, fn in inspect.getmembers(tc, predicate=inspect.isfunction):
29 | graph = program_graph.get_program_graph(fn)
30 | program_graph_graphviz.to_graphviz(graph)
31 |
32 |
33 | if __name__ == '__main__':
34 | absltest.main()
35 |
--------------------------------------------------------------------------------
/python_graphs/program_graph_test.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Tests for program_graph.py."""
16 |
17 | import collections
18 | import inspect
19 | import time
20 |
21 | from absl import logging
22 | from absl.testing import absltest
23 | import gast as ast
24 |
25 | from python_graphs import control_flow_test_components as cftc
26 | from python_graphs import program_graph
27 | from python_graphs import program_graph_dataclasses as pb
28 | from python_graphs import program_graph_test_components as pgtc
29 | from python_graphs import program_utils
30 |
31 |
32 | def get_test_components():
33 | """Generates functions from two sets of test components.
34 |
35 | Yields:
36 | Functions from the program graph and control flow test components files.
37 | """
38 | for unused_name, fn in inspect.getmembers(pgtc, predicate=inspect.isfunction):
39 | yield fn
40 |
41 | for unused_name, fn in inspect.getmembers(cftc, predicate=inspect.isfunction):
42 | yield fn
43 |
44 |
45 | class ProgramGraphTest(absltest.TestCase):
46 |
47 | def assertEdge(self, graph, n1, n2, edge_type):
48 | """Asserts that an edge of type edge_type exists from n1 to n2 in graph."""
49 | edge = pb.Edge(id1=n1.id, id2=n2.id, type=edge_type)
50 | self.assertIn(edge, graph.edges)
51 |
52 | def assertNoEdge(self, graph, n1, n2, edge_type):
53 | """Asserts that no edge of type edge_type exists from n1 to n2 in graph."""
54 | edge = pb.Edge(id1=n1.id, id2=n2.id, type=edge_type)
55 | self.assertNotIn(edge, graph.edges)
56 |
57 | def test_get_program_graph_test_components(self):
58 | self.analyze_get_program_graph(get_test_components(), start=0)
59 |
60 | def analyze_get_program_graph(self, program_generator, start=0):
61 | # TODO(dbieber): Remove the counting and logging logic from this method,
62 | # and instead just get_program_graph for each program in the generator.
63 | # The counting and logging logic is for development purposes only.
64 | num_edges = 0
65 | num_edges_by_type = collections.defaultdict(int)
66 | num_nodes = 0
67 | num_graphs = 1
68 | times = {}
69 | for index, program in enumerate(program_generator):
70 | if index < start:
71 | continue
72 | start_time = time.time()
73 | graph = program_graph.get_program_graph(program)
74 | end_time = time.time()
75 | times[index] = end_time - start_time
76 | num_edges += len(graph.edges)
77 | for edge in graph.edges:
78 | num_edges_by_type[edge.type] += 1
79 | num_nodes += len(graph.nodes)
80 | num_graphs += 1
81 | if index % 100 == 0:
82 | logging.debug(sorted(times.items(), key=lambda kv: -kv[1])[:10])
83 | logging.info('%d %d %d', num_edges, num_nodes, num_graphs)
84 | logging.info('%f %f', num_edges / num_graphs, num_nodes / num_graphs)
85 | for edge_type in num_edges_by_type:
86 | logging.info('%s %f', edge_type,
87 | num_edges_by_type[edge_type] / num_graphs)
88 |
89 | logging.info(times)
90 | logging.info(sorted(times.items(), key=lambda kv: -kv[1])[:10])
91 |
92 | def test_last_lexical_use_edges_function_call(self):
93 | graph = program_graph.get_program_graph(pgtc.function_call)
94 | read = graph.get_node_by_source_and_identifier('return z', 'z')
95 | write = graph.get_node_by_source_and_identifier(
96 | 'z = function_call_helper(x, y)', 'z')
97 | self.assertEdge(graph, read, write, pb.EdgeType.LAST_LEXICAL_USE)
98 |
99 | def test_last_write_edges_function_call(self):
100 | graph = program_graph.get_program_graph(pgtc.function_call)
101 | write_z = graph.get_node_by_source_and_identifier(
102 | 'z = function_call_helper(x, y)', 'z')
103 | read_z = graph.get_node_by_source_and_identifier('return z', 'z')
104 | self.assertEdge(graph, read_z, write_z, pb.EdgeType.LAST_WRITE)
105 |
106 | write_y = graph.get_node_by_source_and_identifier('y = 2', 'y')
107 | read_y = graph.get_node_by_source_and_identifier(
108 | 'z = function_call_helper(x, y)', 'y')
109 | self.assertEdge(graph, read_y, write_y, pb.EdgeType.LAST_WRITE)
110 |
111 | def test_last_read_edges_assignments(self):
112 | graph = program_graph.get_program_graph(pgtc.assignments)
113 | write_a0 = graph.get_node_by_source_and_identifier('a, b = 0, 0', 'a')
114 | read_a0 = graph.get_node_by_source_and_identifier('c = 2 * a + 1', 'a')
115 | write_a1 = graph.get_node_by_source_and_identifier('a = c + 3', 'a')
116 | self.assertEdge(graph, write_a1, read_a0, pb.EdgeType.LAST_READ)
117 | self.assertNoEdge(graph, write_a0, read_a0, pb.EdgeType.LAST_READ)
118 |
119 | read_a1 = graph.get_node_by_source_and_identifier('return a, b, c, d', 'a')
120 | self.assertEdge(graph, read_a1, read_a0, pb.EdgeType.LAST_READ)
121 |
122 | def test_last_read_last_write_edges_repeated_identifier(self):
123 | graph = program_graph.get_program_graph(pgtc.repeated_identifier)
124 | write_x0 = graph.get_node_by_source_and_identifier('x = 0', 'x')
125 |
126 | stmt1 = graph.get_node_by_source('x = x + 1').ast_node
127 | read_x0 = graph.get_node_by_ast_node(stmt1.value.left)
128 | write_x1 = graph.get_node_by_ast_node(stmt1.targets[0])
129 |
130 | stmt2 = graph.get_node_by_source('x = (x + (x + x)) + x').ast_node
131 | read_x1 = graph.get_node_by_ast_node(stmt2.value.left.left)
132 | read_x2 = graph.get_node_by_ast_node(stmt2.value.left.right.left)
133 | read_x3 = graph.get_node_by_ast_node(stmt2.value.left.right.right)
134 | read_x4 = graph.get_node_by_ast_node(stmt2.value.right)
135 | write_x2 = graph.get_node_by_ast_node(stmt2.targets[0])
136 |
137 | read_x5 = graph.get_node_by_source_and_identifier('return x', 'x')
138 |
139 | self.assertEdge(graph, write_x1, read_x0, pb.EdgeType.LAST_READ)
140 | self.assertEdge(graph, read_x1, read_x0, pb.EdgeType.LAST_READ)
141 | self.assertEdge(graph, read_x2, read_x1, pb.EdgeType.LAST_READ)
142 | self.assertEdge(graph, read_x3, read_x2, pb.EdgeType.LAST_READ)
143 | self.assertEdge(graph, read_x4, read_x3, pb.EdgeType.LAST_READ)
144 | self.assertEdge(graph, write_x2, read_x4, pb.EdgeType.LAST_READ)
145 | self.assertEdge(graph, read_x5, read_x4, pb.EdgeType.LAST_READ)
146 |
147 | self.assertEdge(graph, read_x0, write_x0, pb.EdgeType.LAST_WRITE)
148 | self.assertEdge(graph, write_x1, write_x0, pb.EdgeType.LAST_WRITE)
149 | self.assertEdge(graph, read_x2, write_x1, pb.EdgeType.LAST_WRITE)
150 | self.assertEdge(graph, read_x3, write_x1, pb.EdgeType.LAST_WRITE)
151 | self.assertEdge(graph, read_x4, write_x1, pb.EdgeType.LAST_WRITE)
152 | self.assertEdge(graph, write_x2, write_x1, pb.EdgeType.LAST_WRITE)
153 | self.assertEdge(graph, read_x5, write_x2, pb.EdgeType.LAST_WRITE)
154 |
155 | def test_computed_from_edges(self):
156 | graph = program_graph.get_program_graph(pgtc.assignments)
157 | target_c = graph.get_node_by_source_and_identifier('c = 2 * a + 1', 'c')
158 | from_a = graph.get_node_by_source_and_identifier('c = 2 * a + 1', 'a')
159 | self.assertEdge(graph, target_c, from_a, pb.EdgeType.COMPUTED_FROM)
160 |
161 | target_d = graph.get_node_by_source_and_identifier('d = b - c + 2', 'd')
162 | from_b = graph.get_node_by_source_and_identifier('d = b - c + 2', 'b')
163 | from_c = graph.get_node_by_source_and_identifier('d = b - c + 2', 'c')
164 | self.assertEdge(graph, target_d, from_b, pb.EdgeType.COMPUTED_FROM)
165 | self.assertEdge(graph, target_d, from_c, pb.EdgeType.COMPUTED_FROM)
166 |
167 | def test_calls_edges(self):
168 | graph = program_graph.get_program_graph(pgtc)
169 | call = graph.get_node_by_source('function_call_helper(x, y)')
170 | self.assertIsInstance(call.node, ast.Call)
171 | function_call_helper_def = graph.get_node_by_function_name(
172 | 'function_call_helper')
173 | assignments_def = graph.get_node_by_function_name('assignments')
174 | self.assertEdge(graph, call, function_call_helper_def, pb.EdgeType.CALLS)
175 | self.assertNoEdge(graph, call, assignments_def, pb.EdgeType.CALLS)
176 |
177 | def test_formal_arg_name_edges(self):
178 | graph = program_graph.get_program_graph(pgtc)
179 | x = graph.get_node_by_source_and_identifier('function_call_helper(x, y)',
180 | 'x')
181 | y = graph.get_node_by_source_and_identifier('function_call_helper(x, y)',
182 | 'y')
183 | function_call_helper_def = graph.get_node_by_function_name(
184 | 'function_call_helper')
185 | arg0_ast_node = function_call_helper_def.node.args.args[0]
186 | arg0 = graph.get_node_by_ast_node(arg0_ast_node)
187 | arg1_ast_node = function_call_helper_def.node.args.args[1]
188 | arg1 = graph.get_node_by_ast_node(arg1_ast_node)
189 | self.assertEdge(graph, x, arg0, pb.EdgeType.FORMAL_ARG_NAME)
190 | self.assertEdge(graph, y, arg1, pb.EdgeType.FORMAL_ARG_NAME)
191 | self.assertNoEdge(graph, x, arg1, pb.EdgeType.FORMAL_ARG_NAME)
192 | self.assertNoEdge(graph, y, arg0, pb.EdgeType.FORMAL_ARG_NAME)
193 |
194 | def test_returns_to_edges(self):
195 | graph = program_graph.get_program_graph(pgtc)
196 | call = graph.get_node_by_source('function_call_helper(x, y)')
197 | return_stmt = graph.get_node_by_source('return arg0 + arg1')
198 | self.assertEdge(graph, return_stmt, call, pb.EdgeType.RETURNS_TO)
199 |
200 | def test_syntax_information(self):
201 | # TODO(dbieber): Test that program graphs correctly capture syntax
202 | # information. Do this once representation of syntax in program graphs
203 | # stabilizes.
204 | pass
205 |
206 | def test_ast_acyclic(self):
207 | for name, fn in inspect.getmembers(cftc, predicate=inspect.isfunction):
208 | graph = program_graph.get_program_graph(fn)
209 | ast_nodes = set()
210 | worklist = [graph.root]
211 | while worklist:
212 | current = worklist.pop()
213 | self.assertNotIn(
214 | current, ast_nodes,
215 | 'ProgramGraph AST cyclic. Function {}\nAST {}'.format(
216 | name, graph.dump_tree()))
217 | ast_nodes.add(current)
218 | worklist.extend(graph.children(current))
219 |
220 | def test_neighbors_children_consistent(self):
221 | for unused_name, fn in inspect.getmembers(
222 | cftc, predicate=inspect.isfunction):
223 | graph = program_graph.get_program_graph(fn)
224 | for node in graph.all_nodes():
225 | if node.node_type == pb.NodeType.AST_NODE:
226 | children0 = set(graph.outgoing_neighbors(node, pb.EdgeType.FIELD))
227 | children1 = set(graph.children(node))
228 | self.assertEqual(children0, children1)
229 |
230 | def test_walk_ast_descendants(self):
231 | for unused_name, fn in inspect.getmembers(
232 | cftc, predicate=inspect.isfunction):
233 | graph = program_graph.get_program_graph(fn)
234 | for node in graph.walk_ast_descendants():
235 | self.assertIn(node, graph.all_nodes())
236 |
237 | def test_roundtrip_ast(self):
238 | for unused_name, fn in inspect.getmembers(
239 | cftc, predicate=inspect.isfunction):
240 | ast_representation = program_utils.program_to_ast(fn)
241 | graph = program_graph.get_program_graph(fn)
242 | ast_reproduction = graph.to_ast()
243 | self.assertEqual(ast.dump(ast_representation), ast.dump(ast_reproduction))
244 |
245 | def test_reconstruct_missing_ast(self):
246 | for unused_name, fn in inspect.getmembers(
247 | cftc, predicate=inspect.isfunction):
248 | graph = program_graph.get_program_graph(fn)
249 | ast_original = graph.root.ast_node
250 | # Remove the AST.
251 | for node in graph.all_nodes():
252 | node.ast_node = None
253 | # Reconstruct it.
254 | graph.reconstruct_ast()
255 | ast_reproduction = graph.root.ast_node
256 | # Check reconstruction.
257 | self.assertEqual(ast.dump(ast_original), ast.dump(ast_reproduction))
258 | # Check that all AST_NODE nodes are set.
259 | for node in graph.all_nodes():
260 | if node.node_type == pb.NodeType.AST_NODE:
261 | self.assertIsInstance(node.ast_node, ast.AST)
262 | self.assertIs(graph.get_node_by_ast_node(node.ast_node), node)
263 | # Check that old AST nodes are no longer referenced.
264 | self.assertFalse(graph.contains_ast_node(ast_original))
265 |
266 | def test_remove(self):
267 | graph = program_graph.get_program_graph(pgtc.assignments)
268 |
269 | for edge in list(graph.edges)[:]:
270 | # Remove the edge.
271 | graph.remove_edge(edge)
272 | self.assertNotIn(edge, graph.edges)
273 | self.assertNotIn((edge, edge.id2), graph.neighbors_map[edge.id1])
274 | self.assertNotIn((edge, edge.id1), graph.neighbors_map[edge.id2])
275 |
276 | if edge.type == pb.EdgeType.FIELD:
277 | self.assertNotIn(edge.id2, graph.child_map[edge.id1])
278 | self.assertNotIn(edge.id2, graph.parent_map)
279 |
280 | # Add the edge again.
281 | graph.add_edge(edge)
282 | self.assertIn(edge, graph.edges)
283 | self.assertIn((edge, edge.id2), graph.neighbors_map[edge.id1])
284 | self.assertIn((edge, edge.id1), graph.neighbors_map[edge.id2])
285 |
286 | if edge.type == pb.EdgeType.FIELD:
287 | self.assertIn(edge.id2, graph.child_map[edge.id1])
288 | self.assertIn(edge.id2, graph.parent_map)
289 |
290 |
291 | if __name__ == '__main__':
292 | absltest.main()
293 |
--------------------------------------------------------------------------------
/python_graphs/program_graph_test_components.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Test components for testing program graphs."""
16 |
17 |
18 | # pylint: disable=missing-docstring
19 | # pylint: disable=pointless-statement,undefined-variable
20 | # pylint: disable=unused-variable,unused-argument
21 | # pylint: disable=bare-except,lost-exception,unreachable
22 | # pylint: disable=global-variable-undefined
23 | def function_call():
24 | x = 1
25 | y = 2
26 | z = function_call_helper(x, y)
27 | return z
28 |
29 |
30 | def function_call_helper(arg0, arg1):
31 | return arg0 + arg1
32 |
33 |
34 | def assignments():
35 | a, b = 0, 0
36 | c = 2 * a + 1
37 | d = b - c + 2
38 | a = c + 3
39 | return a, b, c, d
40 |
41 |
42 | def fn_with_globals():
43 | global global_a, global_b, global_c
44 | global_a = 10
45 | global_b = 20
46 | global_c = 30
47 | return global_a + global_b + global_c
48 |
49 |
50 | def fn_with_inner_fn():
51 |
52 | def inner_fn():
53 | while True:
54 | pass
55 |
56 |
57 | def repeated_identifier():
58 | x = 0
59 | x = x + 1
60 | x = (x + (x + x)) + x
61 | return x
62 |
--------------------------------------------------------------------------------
/python_graphs/program_graph_visualizer.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | r"""Create program graph visualizations for the test components.
16 |
17 |
18 | Usage:
19 | python -m python_graphs.program_graph_visualizer
20 | """
21 |
22 | import inspect
23 |
24 | from absl import app
25 | from absl import logging # pylint: disable=unused-import
26 |
27 | from python_graphs import control_flow_test_components as tc
28 | from python_graphs import program_graph
29 | from python_graphs import program_graph_graphviz
30 |
31 |
32 | def render_functions(functions):
33 | for name, function in functions:
34 | logging.info(name)
35 | graph = program_graph.get_program_graph(function)
36 | path = '/tmp/program_graphs/{}.png'.format(name)
37 | program_graph_graphviz.render(graph, path=path)
38 |
39 |
40 | def main(argv):
41 | del argv # Unused.
42 |
43 | functions = [
44 | (name, fn)
45 | for name, fn in inspect.getmembers(tc, predicate=inspect.isfunction)
46 | ]
47 | render_functions(functions)
48 |
49 |
50 | if __name__ == '__main__':
51 | app.run(main)
52 |
--------------------------------------------------------------------------------
/python_graphs/program_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """Program utility functions."""
16 |
17 | import inspect
18 | import textwrap
19 | import uuid
20 |
21 | import gast as ast
22 | import six
23 |
24 |
25 | def getsource(obj):
26 | """Gets the source for the given object.
27 |
28 | Args:
29 | obj: A module, class, method, function, traceback, frame, or code object.
30 | Returns:
31 | The source of the object, if available.
32 | """
33 | if inspect.ismethod(obj):
34 | func = obj.__func__
35 | else:
36 | func = obj
37 | source = inspect.getsource(func)
38 | return textwrap.dedent(source)
39 |
40 |
41 | def program_to_ast(program):
42 | """Convert a program to its AST.
43 |
44 | Args:
45 | program: Either an AST node, source string, or a function.
46 | Returns:
47 | The root AST node of the AST representing the program.
48 | """
49 | if isinstance(program, ast.AST):
50 | return program
51 | if isinstance(program, six.string_types):
52 | source = program
53 | else:
54 | source = getsource(program)
55 | module_node = ast.parse(source, mode='exec')
56 | return module_node
57 |
58 |
59 | def unique_id():
60 | """Returns a unique id that is suitable for identifying graph nodes."""
61 | return uuid.uuid4().int & ((1 << 64) - 1)
62 |
63 |
--------------------------------------------------------------------------------
/python_graphs/unparser_patch.py:
--------------------------------------------------------------------------------
1 | import astunparse
2 | import gast as ast
3 |
4 |
5 | astunparse.Unparser.boolops = {'And': 'and', 'Or': 'or', ast.And: 'and', ast.Or: 'or'}
6 |
7 |
8 | def _arguments(self, t):
9 | first = True
10 | # normal arguments
11 | all_args = getattr(t, 'posonlyargs', []) + t.args
12 | defaults = [None] * (len(all_args) - len(t.defaults)) + t.defaults
13 | for index, elements in enumerate(zip(all_args, defaults), 1):
14 | a, d = elements
15 | if first:first = False
16 | else: self.write(", ")
17 | self.dispatch(a)
18 | if d:
19 | self.write("=")
20 | self.dispatch(d)
21 | if index == len(getattr(t, 'posonlyargs', ())):
22 | self.write(", /")
23 |
24 | # varargs, or bare '*' if no varargs but keyword-only arguments present
25 | if t.vararg or getattr(t, "kwonlyargs", False):
26 | if first:first = False
27 | else: self.write(", ")
28 | self.write("*")
29 | if t.vararg:
30 | if hasattr(t.vararg, 'arg'):
31 | self.write(t.vararg.arg)
32 | if t.vararg.annotation:
33 | self.write(": ")
34 | self.dispatch(t.vararg.annotation)
35 | else:
36 | self.write(t.vararg)
37 | if getattr(t, 'varargannotation', None):
38 | self.write(": ")
39 | self.dispatch(t.varargannotation)
40 |
41 | # keyword-only arguments
42 | if getattr(t, "kwonlyargs", False):
43 | for a, d in zip(t.kwonlyargs, t.kw_defaults):
44 | if first:first = False
45 | else: self.write(", ")
46 | self.dispatch(a),
47 | if d:
48 | self.write("=")
49 | self.dispatch(d)
50 |
51 | # kwargs
52 | if t.kwarg:
53 | if first:first = False
54 | else: self.write(", ")
55 | if hasattr(t.kwarg, 'arg'):
56 | self.write("**"+t.kwarg.arg)
57 | if t.kwarg.annotation:
58 | self.write(": ")
59 | self.dispatch(t.kwarg.annotation)
60 | elif hasattr(t.kwarg, 'id'): # if this is a gast._arguments
61 | self.write("**"+t.kwarg.id)
62 | if t.kwarg.annotation:
63 | self.write(": ")
64 | self.dispatch(t.kwarg.annotation)
65 | else:
66 | self.write("**"+t.kwarg)
67 | if getattr(t, 'kwargannotation', None):
68 | self.write(": ")
69 | self.dispatch(t.kwargannotation)
70 |
71 | astunparse.Unparser._arguments = _arguments
72 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | .
2 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2021 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """The setup.py file for python_graphs."""
16 |
17 | from setuptools import setup
18 |
19 | LONG_DESCRIPTION = """
20 | python_graphs is a static analysis tool for performing control flow and data
21 | flow analyses on Python programs, and for constructing Program Graphs.
22 | Python Program Graphs are graph representations of Python programs suitable
23 | for use with graph neural networks.
24 | """.strip()
25 |
26 | SHORT_DESCRIPTION = """
27 | A library for generating graph representations of Python programs.""".strip()
28 |
29 | DEPENDENCIES = [
30 | 'absl-py',
31 | 'astunparse',
32 | 'gast',
33 | 'networkx',
34 | 'pygraphviz',
35 | 'six',
36 | ]
37 |
38 | TEST_DEPENDENCIES = [
39 | ]
40 |
41 | VERSION = '1.3.0'
42 | URL = 'https://github.com/google-research/python-graphs'
43 |
44 | setup(
45 | name='python_graphs',
46 | version=VERSION,
47 | description=SHORT_DESCRIPTION,
48 | long_description=LONG_DESCRIPTION,
49 | url=URL,
50 |
51 | author='David Bieber',
52 | author_email='dbieber@google.com',
53 | license='Apache Software License',
54 |
55 | classifiers=[
56 | 'Development Status :: 4 - Beta',
57 |
58 | 'Intended Audience :: Developers',
59 | 'Topic :: Software Development :: Libraries :: Python Modules',
60 |
61 | 'License :: OSI Approved :: Apache Software License',
62 |
63 | 'Programming Language :: Python',
64 | 'Programming Language :: Python :: 3',
65 | 'Programming Language :: Python :: 3.6',
66 | 'Programming Language :: Python :: 3.7',
67 | 'Programming Language :: Python :: 3.8',
68 | 'Programming Language :: Python :: 3.9',
69 |
70 | 'Operating System :: OS Independent',
71 | 'Operating System :: POSIX',
72 | 'Operating System :: MacOS',
73 | 'Operating System :: Unix',
74 | ],
75 |
76 | keywords='python program control flow data flow graph neural network',
77 |
78 | packages=['python_graphs'],
79 |
80 | install_requires=DEPENDENCIES,
81 | tests_require=TEST_DEPENDENCIES,
82 | )
83 |
--------------------------------------------------------------------------------