├── .gitignore ├── .readthedocs.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── docs ├── _static │ ├── customize.css │ ├── logo.ico │ └── logo.svg ├── api │ ├── cli.rst │ ├── formatting.rst │ ├── match.rst │ ├── parsed_file.rst │ ├── python │ │ ├── evaluate.rst │ │ ├── matcher.rst │ │ ├── matchers.rst │ │ └── syntactic_template.rst │ ├── search.rst │ └── substitution.rst ├── conf.py ├── guide │ ├── errors │ │ ├── index.rst │ │ └── nonname_metavariables.rst │ ├── fixers │ │ ├── attrib_default.rst │ │ ├── idiom.rst │ │ ├── index.rst │ │ ├── literal_comparison.rst │ │ ├── logging_exceptions.rst │ │ ├── merged.rst │ │ ├── mutable_constants.rst │ │ ├── return_none.rst │ │ └── superfluous_parens.rst │ ├── library.rst │ └── patterns_templates.rst ├── index.rst └── meta │ ├── about.rst │ ├── contributing.rst │ └── credits.rst ├── examples ├── example_binary.py ├── example_custom_matcher.py └── test_example_custom_matcher.py ├── poetry.lock ├── pyproject.toml └── refex ├── __init__.py ├── cli.py ├── conftest.py ├── fix ├── __init__.py ├── find_fixer.py ├── fixer.py ├── fixers │ ├── __init__.py │ ├── correctness_fixers.py │ ├── idiom_fixers.py │ ├── test_correctness_fixers.py │ ├── test_idiom_fixers.py │ └── unittest_fixers.py ├── generate_example.py ├── test_fixer.py └── test_generate_example.py ├── formatting.py ├── match.py ├── parsed_file.py ├── python ├── README.md ├── __init__.py ├── error_strings.py ├── evaluate.py ├── matcher.py ├── matcher_test_util.py ├── matchers │ ├── __init__.py │ ├── ast_matchers.py │ ├── base_matchers.py │ ├── extern_matchers.py │ ├── lexical_matchers.py │ ├── syntax_matchers.py │ ├── test_ast_matchers.py │ ├── test_base_matchers.py │ ├── test_extern_matchers.py │ ├── test_lexical_matchers.py │ └── test_syntax_matchers.py ├── python_pattern.py ├── semiliteral_eval.py ├── syntactic_template.py ├── test_error_strings.py ├── test_evaluate.py ├── test_matcher.py ├── test_python_pattern.py ├── test_semiliteral_eval.py └── test_syntactic_template.py ├── refex_doctest.py ├── rxerr_debug.py ├── search.py ├── substitution.py ├── test_binary.py ├── test_cli.py ├── test_example_binary.py ├── test_formatting.py ├── test_parsed_file.py ├── test_rxerr_debug.py ├── test_search.py └── test_substitution.py /.gitignore: -------------------------------------------------------------------------------- 1 | .pytest_cache 2 | refex.egg-info 3 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | python: 4 | install: 5 | - method: pip 6 | path: . 7 | extra_requirements: 8 | - docs 9 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | ## Community Guidelines 4 | 5 | This project follows 6 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 7 | 8 | ## Changing Refex 9 | 10 | TODO: more explanation than just this. 11 | 12 | TODO: configuration for linter, yapf, isort (pending issue 13 | [#1486](https://github.com/PyCQA/isort/issues/1486)), pre-commit hooks (?), and 14 | CI/github actions. 15 | 16 | To run the tests: 17 | 18 | ```sh 19 | $ pipx run tox -e py38 20 | ``` 21 | 22 | Due to the use of the [absltest](https://abseil.io/docs/python/guides/testing) 23 | framework, Refex cannot use many test runners. See 24 | [conftest.py](https://github.com/ssbr/refex/blob/master/refex/conftest.py). 25 | 26 | ## Code Review 27 | 28 | Finally, send a pull request! 29 | 30 | All submissions, including submissions by project members, require code review. 31 | See [GitHub Help](https://help.github.com/articles/about-pull-requests/) for 32 | information on how to make a pull request. 33 | 34 | ### Contributor License Agreement 35 | 36 | Contributions to this project must be accompanied by a Contributor License 37 | Agreement (CLA). You (or your employer) retain the copyright to your 38 | contribution; this simply gives us permission to use and redistribute your 39 | contributions as part of the project. Head over to 40 | https://cla.developers.google.com/ to see your current agreements on file or 41 | to sign a new one. 42 | 43 | You generally only need to submit a CLA once, so if you've already submitted one 44 | (even if it was for a different project), you probably don't need to do it 45 | again. 46 | 47 | 48 | ## Why is the source code so weird? 49 | 50 | ### Two Space Indents 51 | 52 | Refex uses two space indents because it originated in 53 | [Google](https://google.github.io/styleguide/pyguide.html). You get used to it. 54 | In fact, because indents are 2 spaces, and hanging indents are 4 spaces, it's 55 | much easier in Google-style Python than most code to distinguish between nested 56 | code inside of a function, vs e.g. function parameters that went on many lines. 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Refex - refactoring expressions 2 | 3 | Refex is a syntactically aware search-and-replace tool for Python, which allows you to specify code searches and rewrites using templates, or a 4 | more complex 5 | [Clang-LibASTMatcher](https://clang.llvm.org/docs/LibASTMatchersTutorial.html#intermezzo-learn-ast-matcher-basics)-like 6 | matcher interface. 7 | 8 | ## Examples 9 | 10 | **Automatic parenthesis insertion:** Refex will automatically insert parentheses 11 | to preserve the intended code structure: 12 | 13 | ```sh 14 | $ echo "a = b.foo() * c" > test.py 15 | $ refex --mode=py.expr '$x.foo()' --sub='$x.foo() + 1' -i test.py 16 | ... 17 | $ cat test.py 18 | a = (b.foo() + 1) * c 19 | ``` 20 | 21 | A naive regular expression replacement would have resulted in `b.foo() + 1 * c`, which is not 22 | equivalent, and is unrelated to the intended replacement. 23 | 24 | **Paired parentheses:** Refex is aware of the full syntax tree, and will always match parentheses correctly: 25 | 26 | ```sh 27 | $ echo "print(foo(bar(b))" > test.py 28 | $ refex --mode=py.expr 'foo($x)' --sub='foo($x + 1)' -i test.py 29 | ... 30 | $ cat test.py 31 | a = print(foo(bar(b) + 1)) 32 | ``` 33 | 34 | Here, a naive replacement using regular expressions could have resulted in 35 | either `print(foo(bar(b)) + 1)` or `print(foo(bar(b) + 1))`, depending on 36 | whether `$x` was matched greedily or non-greedily. 37 | 38 | **Combining replacements:** you can pass multiple search/replace pairs to 39 | Refex which combine to do more complex rewrites. For example: 40 | 41 | ```sh 42 | # Rewrites "self.assertTrue(x == False)" to "self.assertFalse(x)", even though 43 | # that was not explicitly called out. 44 | refex --mode=py.expr -i --iterate \ 45 | --match='self.assertTrue($x == $y)' --sub='self.assertEqual($x, $y)' \ 46 | --match='self.assertEqual($x, False)' --sub='self.assertFalse($x)' \ 47 | -R dir/ 48 | ``` 49 | 50 | TODO: also describe `--mode=py`. 51 | 52 | ## Getting started 53 | 54 | ### Installation 55 | 56 | Refex can be run via [pipx](https://pipxproject.github.io/pipx/) for one-off use 57 | with control over the Python version: 58 | 59 | ```sh 60 | $ pipx run refex --help 61 | ``` 62 | 63 | For longer-term use, or for use of Refex [as a library](https://refex.readthedocs.io/en/latest/guide/library.html), 64 | it is also pip-installable: 65 | 66 | ```sh 67 | $ python3 -m venv my_env 68 | $ source my_env/bin/activate 69 | $ pip install refex 70 | $ refex --help 71 | ``` 72 | 73 | ### Use 74 | 75 | The template syntax is almost exactly what it looks like, so the examples at the 76 | top of this page, in combination with the `--help` output, are intended to be 77 | enough to get started. 78 | 79 | For more details on the template syntax, see [Python Patterns and Templates](https://refex.readthedocs.io/en/latest/guide/patterns_templates.html). For details on how to use refex in your own code as a library, see [Using Refex as a Library](https://refex.readthedocs.io/en/latest/guide/library.html). 80 | 81 | 82 | ## Current status 83 | 84 | **Stable:** 85 | 86 | The APIs documented at https://refex.readthedocs.io/ are expected to remain 87 | mostly the same, except for trivial renames and moves. 88 | 89 | These command-line interfaces are expected to remain roughly the same, without 90 | backwards-incompatible changes: 91 | 92 | * `--mode=py.expr` 93 | * `--mode=fix` 94 | * `--mode=re` 95 | 96 | **Unstable** 97 | 98 | * All undocumented APIs (*especially* the API for creating a new matcher). 99 | * `--mode=py.stmt` is missing many safety and convenience features. 100 | * `--mode=py`, the matcher interface, will eventually need some fairly large 101 | restructuring to make it O(n), although simple uses should be unaffected. 102 | 103 | (Also, all the stable parts are unstable too. This isn't a promise, just an 104 | expectation/statement of intent.) 105 | 106 | ## Contributing 107 | 108 | See the 109 | [contribution guide](https://refex.readthedocs.io/en/latest/meta/contributing.html) 110 | 111 | ## See Also 112 | 113 | * [asttokens](https://github.com/gristlabs/asttokens): the token-preserving 114 | AST library that Refex is built on top of. 115 | * [Pasta](https://github.com/google/pasta): a code rewriting tool using AST 116 | mutation instead of string templates. 117 | * [Semgrep](https://github.com/returntocorp/semgrep): cross-language AST 118 | search using a similar approach. 119 | * [lib2to3](https://docs.python.org/3/library/2to3.html#module-lib2to3): the 120 | standard library's code rewriting tool based on the concrete syntax tree. 121 | 122 | ## Disclaimer 123 | 124 | You may have noticed Google copyright notices. This is not an officially 125 | supported Google product. 126 | -------------------------------------------------------------------------------- /docs/_static/customize.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020 Google LLC 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | /* Hide the home icon at the top-left. */ 18 | .wy-side-nav-search a.icon-home:before { 19 | display: none; 20 | } 21 | 22 | /* yikes. */ 23 | img.logo { 24 | max-width: 20% !important; 25 | display: inline; 26 | } 27 | 28 | /* rtd theme doesn't respect my radical orange on mobile >:( */ 29 | .wy-nav-top { 30 | background: #e64a19; 31 | } 32 | 33 | /* my radical orange makes the text harder to read so it needs to get bigger */ 34 | .wy-nav-top a { 35 | font-size: larger; 36 | } 37 | -------------------------------------------------------------------------------- /docs/_static/logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ssbr/refex/e5a5b8451557cdfb39719fc244844dcd5fb6263b/docs/_static/logo.ico -------------------------------------------------------------------------------- /docs/_static/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 42 | 45 | 46 | 48 | 49 | 51 | image/svg+xml 52 | 54 | 55 | 56 | 57 | Devin Jeanpier 58 | 59 | 60 | 61 | 62 | 63 | 67 | 70 | 75 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /docs/api/cli.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: refex.cli 2 | :members: 3 | -------------------------------------------------------------------------------- /docs/api/formatting.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: refex.formatting 2 | -------------------------------------------------------------------------------- /docs/api/match.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: refex.match 2 | :members: 3 | :show-inheritance: 4 | -------------------------------------------------------------------------------- /docs/api/parsed_file.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: refex.parsed_file 2 | :members: 3 | -------------------------------------------------------------------------------- /docs/api/python/evaluate.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: refex.python.evaluate 2 | :members: -------------------------------------------------------------------------------- /docs/api/python/matcher.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: refex.python.matcher 2 | -------------------------------------------------------------------------------- /docs/api/python/matchers.rst: -------------------------------------------------------------------------------- 1 | :mod:`refex.python.matchers` 2 | ============================ 3 | 4 | .. module:: refex.python.matchers 5 | 6 | .. automodule:: refex.python.matchers.base_matchers 7 | 8 | .. automodule:: refex.python.matchers.syntax_matchers 9 | 10 | .. automodule:: refex.python.matchers.lexical_matchers 11 | 12 | .. automodule:: refex.python.matchers.ast_matchers 13 | :members: 14 | 15 | .. automodule:: refex.python.matchers.extern_matchers 16 | -------------------------------------------------------------------------------- /docs/api/python/syntactic_template.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: refex.python.syntactic_template 2 | -------------------------------------------------------------------------------- /docs/api/search.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: refex.search 2 | :members: 3 | -------------------------------------------------------------------------------- /docs/api/substitution.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: refex.substitution 2 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # -- Path setup -------------------------------------------------------------- 16 | 17 | import os 18 | import sys 19 | 20 | sys.path.insert(0, os.path.abspath('..')) 21 | 22 | # -- Project information ----------------------------------------------------- 23 | 24 | project = 'Refex' 25 | copyright = '2020, Google LLC' 26 | author = 'Devin Jeanpierre' 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | extensions = [ 31 | 'sphinx.ext.autodoc', 32 | 'sphinx.ext.intersphinx', 33 | 'sphinx.ext.napoleon', 34 | 'm2r', 35 | ] 36 | 37 | intersphinx_mapping = { 38 | 'python': ('https://docs.python.org/3', None), 39 | 'attrs': ('https://www.attrs.org/en/stable/', None), 40 | 'asttokens': ('https://asttokens.readthedocs.io/en/latest/', None), 41 | } 42 | 43 | templates_path = ['_templates'] 44 | 45 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 46 | 47 | # -- Options for HTML output ------------------------------------------------- 48 | 49 | html_theme = 'sphinx_rtd_theme' 50 | html_logo = '_static/logo.svg' 51 | html_favicon = '_static/logo.ico' 52 | html_css_files = [ 53 | 'customize.css', 54 | ] 55 | 56 | globaltoc_includehidden = True 57 | add_module_names = False 58 | autodoc_member_order = 'bysource' 59 | 60 | html_theme_options = { 61 | 'includehidden': True, 62 | 'style_nav_header_background': '#e64a19', 63 | 'logo_only': False, 64 | } 65 | 66 | html_static_path = ['_static'] 67 | -------------------------------------------------------------------------------- /docs/guide/errors/index.rst: -------------------------------------------------------------------------------- 1 | Common Errors 2 | ============= 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | nonname_metavariables 8 | -------------------------------------------------------------------------------- /docs/guide/errors/nonname_metavariables.rst: -------------------------------------------------------------------------------- 1 | Non-Name Metavariables 2 | =============================================== 3 | 4 | .. TODO: b/117837631 tracks fixing this. 5 | 6 | Metavariables can only occur where ``ast.Name`` nodes can occur in the AST. 7 | For example, ``$foo.bar`` works because ``foo.bar`` is a valid expression where 8 | ``foo`` is a ``Name`` node. 9 | 10 | .. TODO: less harsh highlighting of good/bad 11 | 12 | Works:: 13 | 14 | $foo.attr 15 | $foo + 3 16 | 17 | .. error:: 18 | 19 | Doesn't work:: 20 | 21 | obj.$foo 22 | def $foo(): pass 23 | import $foo as $bar 24 | 25 | When debugging this error message, it can help to use :func:`ast.dump` with 26 | every ``$`` removed to see if the AST produces a ``Name`` node there. For 27 | example, when debugging why ``obj.$foo`` or ``import $foo`` won't work, we could 28 | print these ASTs: 29 | 30 | >>> import ast 31 | >>> print(ast.dump(ast.parse('obj.foo').body[0])) 32 | Expr(value=Attribute(value=Name(id='obj', ctx=Load()), attr='foo', ctx=Load())) 33 | 34 | >>> print(ast.dump(ast.parse('import foo').body[0])) 35 | Import(names=[alias(name='foo', asname=None)]) 36 | 37 | 38 | We can see from this the place we wanted to use a metavariable is not a ``Name`` 39 | node (``attr='foo'``, ``name='foo'``). These places cannot be matched using 40 | metavariables in a pattern. 41 | 42 | For more about metavariables and Python patterns, see 43 | :doc:`/guide/patterns_templates`. -------------------------------------------------------------------------------- /docs/guide/fixers/attrib_default.rst: -------------------------------------------------------------------------------- 1 | Mutable defaults in :func:`attr.ib()` 2 | ===================================== 3 | 4 | Mutable defaults to passed to :func:`attr.ib` should always be specified as 5 | factories instead. 6 | 7 | Attributes defined using ``attrs`` share the default value across all 8 | instantiations, unless another value is passed. For example: 9 | 10 | .. caution:: 11 | 12 | .. code-block:: 13 | 14 | import attr 15 | @attr.s 16 | class A(object): 17 | x = attr.ib(default=[]) 18 | 19 | a1 = A() 20 | a2 = A() 21 | 22 | a1.x.append(0) 23 | print(a2.x) # Output: [0] 24 | 25 | ``attrs`` lets users work around this by specifying a *factory* which 26 | is called on every instantiation, instead of a *default*, which is evaluated 27 | only once, at class definition time. The most general way to specify this is 28 | with :class:`attr.Factory`, but for simple cases, it is easier to pass a 29 | callback to the ``factory`` parameter:: 30 | 31 | class B(object): 32 | x = attr.ib(factory=list) 33 | 34 | b1 = B() 35 | b2 = B() 36 | 37 | b1.x.append(0) 38 | print(b2.x) # Output: [] 39 | 40 | 41 | Any argument which is mutable, and any argument which can change over time, 42 | should as a rule be passed as a factory. Exceptions should be shockingly rare, 43 | and documented clearly. 44 | -------------------------------------------------------------------------------- /docs/guide/fixers/idiom.rst: -------------------------------------------------------------------------------- 1 | Idiomatic Python 2 | ================ 3 | 4 | Sometimes the clearest way to spell something isn't the most obvious at first. 5 | These fixers try to reword code in a way that is more or less universally agreed 6 | to be Pythonic. By canonicalizing expressions, they also enable chaining with 7 | other fixers via the :doc:`merged fixes ` feature of Refex. 8 | -------------------------------------------------------------------------------- /docs/guide/fixers/index.rst: -------------------------------------------------------------------------------- 1 | Fix Explanations 2 | ================ 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | merged 8 | attrib_default 9 | idiom 10 | literal_comparison 11 | logging_exceptions 12 | mutable_constants 13 | superfluous_parens 14 | return_none 15 | 16 | 17 | -------------------------------------------------------------------------------- /docs/guide/fixers/literal_comparison.rst: -------------------------------------------------------------------------------- 1 | Literal Comparison 2 | ================== 3 | 4 | .. seealso:: pylint `literal-comparison (R0123) 5 | `_ 6 | 7 | **tl;dr:** it's possible for ``x is 1`` to be ``False``, but ``x == 1`` to be 8 | ``True``, and when/whether this happens depends entirely on implementation 9 | details of Python. It is always a mistake to compare using ``is`` against a 10 | literal. 11 | 12 | What the meaning of ``is`` is 13 | ----------------------------- 14 | 15 | For most types, most of the time, ``x == y`` means something like 16 | "``x`` and ``y`` are interchangeable, as long as you aren't mutating them". 17 | 18 | ``x is y``, in contrast, has a strict and narrow meaning. It means "``x`` 19 | *refers to the same object* as ``y``". As a consequence, it means ``x`` and 20 | ``y`` are interchangeable in *all* circumstances, even if you are mutating them! 21 | 22 | Mutable Literals 23 | ~~~~~~~~~~~~~~~~ 24 | 25 | Because mutable literals are defined to always evaluate to a new object, 26 | expressions like ``x is []`` or ``x is {}`` will always evaluate to ``False``, 27 | which is probably not the intended behavior. 28 | 29 | Immutable Literals 30 | ~~~~~~~~~~~~~~~~~~ 31 | 32 | Because ``is`` detects equivalence under mutation, and immutable objects cannot 33 | be mutated, it stands to reason that every equal immutable object could be 34 | identical, as long as they are really *truly* equivalent. This is the approach 35 | taken by PyPy, as well as the approach taken in e.g. JavaScript. To allow for 36 | this, Python allows for immutable literals to be the same object. For example: 37 | 38 | >>> 1e6 is 1e6 39 | True 40 | 41 | But Python also allows for them to *not* be the same object: 42 | 43 | >>> x = 1e6 44 | >>> x is 1e6 45 | False 46 | 47 | Whether the an identical immutable literal expression is the same object, or a 48 | different but equal object, is implementation-defined. As a result, expressions 49 | like ``x is ()``, ``x is 1.0``, ``x is 1``, ``x is b""``, or ``x is ""`` may 50 | evaluate to either ``True`` or ``False``, or may even choose randomly between 51 | the two. It is always incorrect to compare literals in this way, because of 52 | the implementation-defined behavior. 53 | 54 | Named Constants 55 | ~~~~~~~~~~~~~~~ 56 | 57 | ``True``, ``False``, and ``None`` are not included in this rule: if a piece of 58 | code should compare specifically against any of these three, it should use 59 | ``is``. They are *always* the same object, and ``x is None`` is not buggy. 60 | 61 | ``is`` for these values can be used to distinguish ``None`` or ``False`` from 62 | values like ``0`` or ``[]``, and ``True`` from values like ``1`` or ``[1]``. 63 | For bools, howver, this kind of explicit comparison is rare: most of the time 64 | ``x is True`` can be better phrased as just ``x``, and ``x is False`` can be 65 | better phrased as ``not x``. 66 | 67 | The Pedantic Section 68 | -------------------- 69 | 70 | ``x == y`` can't and doesn't *literally* mean that two objects are 71 | interchangeable as long as you don't mutate them. For one thing, ``x is y`` may 72 | evaluate to something different than ``y is y``. 73 | 74 | A more complete definition would be something like: "are interchangeable as long 75 | as you don't perform any identity-aware operations." 76 | 77 | But even that is not enough, as anyone can define ``__eq__``. For example, 78 | ``mock.ANY`` compares equal to everything but is not equivalent to anything but 79 | ``mock.ANY``. And the designers of floating point numbers were particularly 80 | cruel in defining a ``-0.0`` that compares equal to ``0.0``, but has subtly 81 | different arithmetic behavior. And neither ``-0.0`` nor ``0.0`` can be used as 82 | a list index, even though they both compare equal to ``0``, a valid list index. 83 | 84 | There may be fewer things that follow the rule than that don't. But in some 85 | spiritual sense, the idea behind ``==`` is interchangeability absent mutation 86 | and other identity-centered operations, and the rest is practical shortcuts. 87 | -------------------------------------------------------------------------------- /docs/guide/fixers/logging_exceptions.rst: -------------------------------------------------------------------------------- 1 | Logging Exceptions 2 | ================== 3 | 4 | Normally, none of the logging functions except :func:`logging.exception()` 5 | include a stack trace. To include a stack trace, pass ``exc_info=True`` (e.g. 6 | to log an exception + stack trace at a severity *less* than ``ERROR``), or 7 | use :func:`logging.exception()`. 8 | 9 | If there is a stack trace included, then the exception object itself is 10 | redundant. 11 | 12 | Example 13 | ------- 14 | 15 | :: 16 | 17 | try: 18 | ... 19 | except Exception as e: 20 | logging.error(e) 21 | ... 22 | 23 | This might seem innocuous at first. It's not too uncommon to catch any exception 24 | at the top of your request / event handling loop, log it, and move on. But if 25 | you actually encounter an exception, what log message do you get? One example 26 | might be:: 27 | 28 | ERROR:root:0 29 | 30 | Completely unhelpful! ``ERROR`` is the severity, ``root`` is the logger, and 31 | ``0`` could mean anything. In this case, maybe it was:: 32 | 33 | {}[0] 34 | 35 | Which raises a ``KeyError: 0``. But ``logging.error(e)`` doesn't include the 36 | ``KeyError``, because it's equivalent to ``logging.error(str(e))``, and 37 | ``str(e)`` does not include the type. 38 | 39 | One way out, if you really don't want to include the stack trace, would be to 40 | manually include the exception name:: 41 | 42 | logging.error('%s: %s', type(e).__name_, e) 43 | 44 | But since more information is better, it's more helpful to include the stack 45 | trace:: 46 | 47 | logging.error("", exc_info=True) 48 | # or: 49 | logging.exception("") 50 | 51 | Since the error message is already included in the stack trace, the log message 52 | should be something useful, rather than ``""`` or ``e``. 53 | ``logging.exception(e)`` is redundant, and ``logging.exception("")`` misses an 54 | opportunity to provide context, specify what the inputs were, etc. 55 | -------------------------------------------------------------------------------- /docs/guide/fixers/merged.rst: -------------------------------------------------------------------------------- 1 | Merged fixes 2 | ============ 3 | 4 | Sometimes, multiple fixes apply to the same span of code. For example, we might 5 | want a fixer to replace ``deprecated($x)`` with ``nondeprecated($x)``. So what 6 | if we see the line ``deprecated(deprecated(0))``? 7 | 8 | One approach, which Refex often follows, is to suggest multiple rewrites. This 9 | works great if they do not overlap at all -- perhaps if we just replace the span 10 | ``deprecated`` with ``nondeprecated``. But if they do overlap, Refex will 11 | iteratively try to apply as many fixes as it can in one go. 12 | 13 | The resulting message might be a bit confusing. We concatenate all the 14 | "important" messages together with their explanatory URLs. The "unimportant" 15 | ones are generally for trivial fixes that don't really matter in context -- e.g. 16 | a spelling correction, or a minor :doc:`idiomatic ` rewording. 17 | -------------------------------------------------------------------------------- /docs/guide/fixers/mutable_constants.rst: -------------------------------------------------------------------------------- 1 | Mutable Constants 2 | ================= 3 | 4 | Mutable globals that *look* like immutable constants (e.g. following 5 | ``ALL_CAPS`` naming convention) can lead to hard-to-find bugs. If possible, it's 6 | better to use an immutable global instead. 7 | 8 | **Alternatives:** 9 | 10 | +----------+-------------------------------------------------------------------+ 11 | | Before | After | 12 | +==========+===================================================================+ 13 | | ``list`` | ``tuple`` | 14 | +----------+-------------------------------------------------------------------+ 15 | | ``set`` | ``frozenset`` | 16 | +----------+-------------------------------------------------------------------+ 17 | | ``dict`` | frozendict_ | 18 | | | | 19 | | | See also: `PEP 603`_; `PEP 416`_ | 20 | +----------+-------------------------------------------------------------------+ 21 | 22 | .. _frozendict: https://pypi.org/project/frozendict/ 23 | .. _PEP 603: https://www.python.org/dev/peps/pep-0603/ 24 | .. _PEP 416: https://www.python.org/dev/peps/pep-0416/ 25 | -------------------------------------------------------------------------------- /docs/guide/fixers/return_none.rst: -------------------------------------------------------------------------------- 1 | How To Return None 2 | ================== 3 | 4 | `PEP-8 `_ offers guidance on how to 5 | return ``None`` ("Be consistent in return statements. [...]"), which can be 6 | slightly extended into the following rules of thumb: 7 | 8 | * If a function only returns ``None``, only "bare returns" (a ``return`` 9 | statement with no return expression or value) should be used, and only to 10 | return early. 11 | 12 | * If a function returns ``Optional[...]``, then all code paths should have a 13 | non-bare ``return`` statement. 14 | -------------------------------------------------------------------------------- /docs/guide/fixers/superfluous_parens.rst: -------------------------------------------------------------------------------- 1 | Parentheses around a single variable in string formatting 2 | ========================================================= 3 | 4 | Parentheses around a single item in Python has no effect: ``(foo)`` is exactly 5 | equivalent to ``foo``. In many cases this is harmless, but it can suggest a 6 | subtle bug when used in string formatting. A '`%`'-formatted string with a 7 | single format specifier can be formatted using a single value or a one element 8 | tuple: ``'hello %s' % name`` or ``'hello %s' % (name,)``. The latter is safer if 9 | there's a chance the `name` variable could itself be a tuple: 10 | 11 | .. code-block:: 12 | :emphasize-lines: 7 13 | 14 | name = 'World' 15 | 'hello %s' % (name,) # "hello World" 16 | 'hello %s' % name # "hello World" 17 | 18 | name = ('World', 'Universe') 19 | 'hello %s' % (name,) # "hello ('World', 'Universe') 20 | 'hello %s' % name # TypeError: not all arguments converted during string formatting 21 | 22 | Consequently, a line like ``error_msg = 'Cannot process %s' % (data)`` may leave 23 | code reviewers and future readers unsure if there is a subtle bug if ``data`` is 24 | a tuple. Did the author *mean* to write ``(data,)`` but forgot the comma? Prefer 25 | to be explicit in these cases: Either drop the parentheses or add a comma. 26 | -------------------------------------------------------------------------------- /docs/guide/library.rst: -------------------------------------------------------------------------------- 1 | Using Refex as a Library 2 | ======================== 3 | 4 | Alright, your one-line shell script change isn't enough anymore. What's next? 5 | 6 | Create a searcher 7 | ----------------- 8 | 9 | The "thing that does a search and a replace" is a searcher: any subclass of 10 | :class:`refex.search.AbstractSearcher`. You likely want 11 | :class:`~refex.search.PyExprRewritingSearcher`, and, for the replacement, an 12 | instance of :class:`refex.python.syntactic_template.PythonExprTemplate`. 13 | 14 | This ends up a bit clunky, but you can see how it works in the example at the 15 | bottom of the page. 16 | 17 | If you want to manipulate, filter, or otherwise look at the replacements 18 | being performed, this is where you can hook in: define a new searcher that 19 | wraps the old one and modifies its results. 20 | 21 | Execute the search / replace 22 | ---------------------------- 23 | 24 | Apply a searcher to a string 25 | ............................ 26 | 27 | :func:`refex.search.rewrite_string()` executes a simple rewrite. 28 | 29 | Alternatively, you can collect a list of 30 | :class:`~refex.substitution.Substitution` objects and apply them in a second 31 | pass, using :func:`refex.search.find_iter()` and 32 | :func:`refex.formatting.apply_substitutions()` -- but really, it's better 33 | to manipulate those substitutions from a custom searcher, since that 34 | searcher can also be used e.g. to create an executable, as the section below 35 | describes. 36 | 37 | Create an executable 38 | .................... 39 | 40 | The same colorization, diff display, etc. as the :command:`refex` command can 41 | be yours: instead of rewriting individual strings, you can pass the searcher 42 | to :func:`refex.cli.run`. 43 | 44 | Here's a complete example: 45 | 46 | .. literalinclude:: /../examples/example_binary.py 47 | :lines: 14- 48 | -------------------------------------------------------------------------------- /docs/guide/patterns_templates.rst: -------------------------------------------------------------------------------- 1 | Python Patterns and Templates 2 | ============================= 3 | 4 | Refex offers in several places the ability to provide a python-like 5 | pattern or template with metavariables like ``$foo``: 6 | 7 | * ``--mode=py.expr`` and ``--mode=py.stmt`` on the command line. 8 | * :class:`~refex.python.matchers.syntax_matchers.ExprPattern` and 9 | :class:`~refex.python.matchers.syntax_matchers.StmtPattern` with 10 | ``--mode=py`` or when using :mod:`refex.python.matchers.syntax_matchers` 11 | directly. 12 | 13 | These are parsed as Python code (except for the ``$metavariables``, of course), 14 | and let you specify an AST matcher by example. They are in many ways a 15 | shallow layer on top of writing out an AST by hand using 16 | :mod:`~refex.python.matchers.ast_matchers`. 17 | 18 | Rules: 19 | 20 | * A metavariable is any variable name preceded by a ``$``. 21 | 22 | * A metavariable can only be placed anywhere in the pattern that a Python 23 | ``ast.Name`` AST node is valid. For example, ``$foo.bar`` is OK, but 24 | ``foo.$bar`` is not. 25 | 26 | * A metavariable matches any AST. 27 | 28 | * If the same metavariable occurs twice in a pattern, each place must match 29 | a structurally identical AST, following the same rules as pattern 30 | matching without metavariables. 31 | 32 | * A variable name pattern always matches the same variable name in the target, 33 | even if one is an rvalue (i.e. used in an expression) and the other is an 34 | lvalue (i.e. used as the target of an assignment). 35 | 36 | For example, ``a`` matches twice in ``a = a``. 37 | 38 | * Otherwise, a pattern matches structurally in the obvious way (e.g. 39 | ``a1 + b1`` matches ``a2 + b2`` if ``a1`` matches ``a2``, and ``b1`` matches 40 | ``b2``.) 41 | 42 | .. important:: This is purely syntactic. ``{a, b}`` does not match 43 | ``{b, a}``. 44 | 45 | * Comments are completely ignored in both the template and the target. 46 | 47 | There is currently no support for n-ary wildcards, like ``{a, $..., b}``. 48 | 49 | Templates 50 | --------- 51 | 52 | Templates are syntactically identical to patterns, but represent the opposite 53 | direction: instead of an AST to match, they describe an AST to create. 54 | 55 | Rules: 56 | 57 | * Syntactically, templates are identical to patterns. (e.g. metavariables 58 | can only occur where an ``ast.Name`` could. 59 | 60 | * The result of substitution into a template will always be structurally 61 | identical to that template. In other words, if the template were 62 | reinterpreted as a pattern, it would always match the substitution result. 63 | 64 | For example, rendering ``$a * 3`` with a = ``1 + 2`` results in 65 | ``(1 + 2) * 3``. Parentheses are inserted as necessary. 66 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. mdinclude:: ../README.md 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :hidden: 7 | :caption: Guide 8 | 9 | guide/patterns_templates 10 | guide/library 11 | guide/fixers/index 12 | guide/errors/index 13 | 14 | .. toctree:: 15 | :maxdepth: 4 16 | :hidden: 17 | :caption: API 18 | 19 | api/cli 20 | api/formatting 21 | api/match 22 | api/parsed_file 23 | api/search 24 | api/substitution 25 | api/python/evaluate 26 | api/python/matcher 27 | api/python/matchers 28 | api/python/syntactic_template 29 | 30 | .. toctree:: 31 | :maxdepth: 1 32 | :hidden: 33 | :caption: Project Info 34 | 35 | meta/about 36 | meta/contributing 37 | meta/credits 38 | 39 | 40 | 41 | Indices and tables 42 | ------------------ 43 | 44 | * :ref:`genindex` 45 | * :ref:`modindex` 46 | * :ref:`search` 47 | 48 | -------------------------------------------------------------------------------- /docs/meta/about.rst: -------------------------------------------------------------------------------- 1 | About Refex 2 | =========== 3 | 4 | Goals 5 | ----- 6 | 7 | Safety: 8 | A rewrite should "do the right thing" in unexpected circumstances. 9 | 10 | For example, deleting a statement should result in ``pass`` being inserted, 11 | if that statement is the last statement in the block. Replacing an 12 | expression should result in additional parentheses if the context requires 13 | them in order to preserve the parse order. 14 | 15 | Ease-of-use: 16 | Safety aside, everything Refex does was possible beforehand, using tools 17 | such as :mod:`lib2to3`. Refex aims to be something fun to use, while still 18 | allowing power users to dig into low level code. 19 | 20 | Stdlib :mod:`ast`: 21 | Many tools write their own AST library, which then needs to be kept up to 22 | date. Refex aims to use the builtin :mod:`ast` module as the lingua franca 23 | of code analysis and rewriting. 24 | 25 | (We owe a great debt to the :mod:`asttokens` module for making this 26 | possible -- and easy.) 27 | 28 | It may be desirable at points to synthetically alter or augment the AST. 29 | For example, insertion of paren-expression nodes, or comment-line 30 | statements. All such alterations will, however, be totally ignorable, and 31 | layered on *top* of the AST as a separate information channel, rather than 32 | replacing it. 33 | 34 | Non-goals 35 | ~~~~~~~~~ 36 | 37 | Speed: 38 | It is fine if a rewrite across the whole multi-million-line codebase has to 39 | be done overnight, as long as it is possible to safely perform such a 40 | rewrite. 41 | 42 | Static Analysis: 43 | Ideally, Refex should be able to consume static analyses which annotate the 44 | AST, rather than producing such analyses itself. 45 | 46 | Multi-language support: 47 | OK, OK, supporting multiple languages would be pretty rad. This isn't 48 | ruled out forever -- especially for languages that lack such tools, and are 49 | also in addition very cool (Rust?) 50 | 51 | But there *are* tools out there, most of the time. Some of them, like 52 | semgrep, are already general across multiple languages. Where refex excels 53 | is in being very, very knowledgeable about *Python*, and trafficking in 54 | standard Python datastructures and modules like :mod:`ast`. 55 | 56 | Refex will gladly support multi-language tools calling into it. It would 57 | be fantastic if e.g. semgrep utilized Refex as a backend. But the 58 | other way around might be too ambitious and too much duplication of 59 | effort. 60 | -------------------------------------------------------------------------------- /docs/meta/contributing.rst: -------------------------------------------------------------------------------- 1 | 2 | .. mdinclude:: ../../CONTRIBUTING.md 3 | -------------------------------------------------------------------------------- /docs/meta/credits.rst: -------------------------------------------------------------------------------- 1 | Credits 2 | ======= 3 | 4 | Refex is just some 20% project, so it's been incredibly gratifying to have 5 | people's support and contribution. Thank you! 6 | 7 | Due to copyright assignment and squashed export commits, not all of the 8 | contributors are represented in the change history or copyright notice. Refex 9 | owes a huge debt of gratitude to *all* its contributors: 10 | 11 | * Chris Rebert (`@cvrebert `_) 12 | * Devin Jeanpierre (`@ssbr `_) 13 | * Gregory P. Smith (`@gpshead `_) 14 | * Joshua Morton (`@joshuamorton `_) 15 | * Matthew Suozzo 16 | * Nathan Marrow 17 | * Nick Smith (`@soupytwist `_) 18 | * Richard Levasseur (`@rickeylev `_) 19 | 20 | Refex would also not be possible without the amazing 21 | `asttokens `_ library, and of course 22 | `Python `_ itself. 23 | -------------------------------------------------------------------------------- /examples/example_binary.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """An example binary using refex.cli.run() to run a preprogrammed search/replace. 15 | 16 | This is similar in functionality to a shell script that runs: 17 | 18 | refex --mode=py.expr hello --sub=world -i "$@" 19 | """ 20 | 21 | 22 | import sys 23 | 24 | from refex import cli 25 | from refex import search 26 | from refex.python import syntactic_template 27 | from refex.python.matchers import syntax_matchers 28 | 29 | 30 | def main(): 31 | cli.run( 32 | runner=cli.RefexRunner( 33 | searcher=search.PyExprRewritingSearcher.from_matcher( 34 | # --mode=py.expr is equivalent to PyExprRewritingSearcher paired 35 | # with an ExprPattern. However, you can pass any matcher, not just 36 | # an ExprPattern. 37 | syntax_matchers.ExprPattern('hello'), 38 | { 39 | # Using ROOT_LABEL as a key is equivalent to --sub=world. 40 | # To get the equivalent of --named-sub=x=world, 41 | # it would 'x' as a key instead. 42 | # 43 | # The value type corresponds to the --sub-mode. While 44 | # refex on the command line defaults to picking the paired 45 | # --sub-mode that matches the --mode, here there are no 46 | # defaults and you must be explicit. 47 | # e.g. for unsafe textual substitutions, as with 48 | # --sub-mode=sh, you would use formatting.ShTemplate. 49 | search.ROOT_LABEL: 50 | syntactic_template.PythonExprTemplate('world') 51 | }, 52 | ), 53 | dry_run=False, 54 | ), 55 | files=sys.argv[1:], 56 | bug_report_url='', 57 | ) 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /examples/example_custom_matcher.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """A matcher which matches an AST for a sum expression, and the sum itself. 15 | 16 | For example, replacing SumMatcher() with "$sum" will replace 17 | ``1 + 2 + 3`` with ``6``. 18 | """ 19 | 20 | import ast 21 | 22 | import attr 23 | 24 | from refex.python import matcher 25 | from refex.python.matchers import base_matchers 26 | 27 | 28 | @attr.s(frozen=True) 29 | class SumMatcher(matcher.Matcher): 30 | bind_variables = frozenset({"sum"}) 31 | 32 | def _match(self, context, candidate): 33 | if not isinstance(candidate, ast.AST): 34 | return None 35 | 36 | # Walk the AST to collect the answer: 37 | values = [] 38 | for node in ast.walk(candidate): 39 | # Every node must either be a Constant/Num or an addition node. 40 | if isinstance(node, ast.Constant): 41 | values.append(node.value) 42 | elif isinstance(node, ast.Num): # older pythons 43 | values.append(node.n) 44 | elif isinstance(node, ast.BinOp) or isinstance(node, ast.Add): 45 | # Binary operator nodes are allowed, but only if they have an Add() op. 46 | pass 47 | else: 48 | return None # not a +, not a constant 49 | 50 | # For more complex tasks, or for tasks which integrate into how Refex 51 | # builds results and bindings, it can be helpful to defer work into a 52 | # submatcher, such as by running BinOp(op=Add()).match(context, candidate) 53 | 54 | # Having walked the AST, we have determined that the whole tree is addition 55 | # of constants, and have collected all of those constants in a list. 56 | if len(values) <= 1: 57 | # Don't bother emitting a replacement for e.g. 7 with itself. 58 | return None 59 | result = str(sum(values)) 60 | 61 | # Finally, we want to return the answer to Refex: 62 | # 1) bind the result to a variable 63 | # 2) return the tree itself as the matched value 64 | 65 | # We can do this by deferring to a matcher that does the right thing. 66 | # StringMatch() will produce a string literal match, and AllOf will retarget 67 | # the returned binding to the AST node which was passed in. 68 | submatcher = base_matchers.AllOf( 69 | base_matchers.Bind("sum", base_matchers.StringMatch(result))) 70 | return submatcher.match(context, candidate) 71 | -------------------------------------------------------------------------------- /examples/test_example_custom_matcher.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.examples.example_custom_matcher.""" 15 | 16 | from absl.testing import absltest 17 | 18 | from refex import search 19 | import example_custom_matcher 20 | from refex.python import syntactic_template 21 | 22 | 23 | class SumMatcherTest(absltest.TestCase): 24 | SEARCH_REPLACE = search.PyExprRewritingSearcher.from_matcher( 25 | example_custom_matcher.SumMatcher(), 26 | {search.ROOT_LABEL: syntactic_template.PythonExprTemplate('$sum')}, 27 | ) 28 | 29 | def test_sum_rewrite(self): 30 | self.assertEqual( 31 | search.rewrite_string(self.SEARCH_REPLACE, '1 + 2 + 3', 'filename.py'), 32 | '6') 33 | 34 | def test_sum_no_rewrite(self): 35 | self.assertEqual( 36 | search.rewrite_string(self.SEARCH_REPLACE, '1 + var', 'filename.py'), 37 | '1 + var') 38 | 39 | 40 | if __name__ == '__main__': 41 | absltest.main() 42 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "refex" 3 | version = "0.1.1" 4 | description = "A syntactically-aware search and replace tool." 5 | authors = ["Devin Jeanpierre "] 6 | 7 | license = "Apache-2.0" 8 | readme = "README.md" 9 | classifiers = [ 10 | "Development Status :: 3 - Alpha", 11 | "Environment :: Console", 12 | "Intended Audience :: Developers", 13 | "Operating System :: POSIX", 14 | "Topic :: Software Development", 15 | "Topic :: Software Development :: Libraries :: Python Modules", 16 | ] 17 | 18 | [tool.poetry.scripts] 19 | refex = "refex.cli:main" 20 | 21 | [tool.poetry.dependencies] 22 | python = "^3.6" 23 | 24 | absl-py = "^0.9" 25 | asttokens = "^2" 26 | attrs = "^19.2" 27 | cached-property = "^1" 28 | colorama = "^0.4" 29 | 30 | # docs 31 | m2r = { version = "^0.2", optional = true } 32 | sphinx = { version = "^2.4", optional = true } 33 | 34 | [tool.poetry.dev-dependencies] 35 | # TODO: make absl a test-only dep 36 | pytest = "^6" 37 | # rxerr_debug 38 | pygments = "^2" 39 | 40 | [tool.poetry.extras] 41 | docs = ["m2r", "sphinx"] 42 | 43 | [tool.isort] 44 | profile = "google" 45 | 46 | # https://tox.readthedocs.io/ 47 | [tool.tox] 48 | legacy_tox_ini = """ 49 | 50 | [tox] 51 | isolated_build = True 52 | envlist = py39, py38, py37, py36, pypy37, pypy36 53 | 54 | [testenv] 55 | deps = 56 | poetry 57 | commands = 58 | poetry install -v 59 | poetry run pytest 60 | """ 61 | 62 | [build-system] 63 | requires = ["poetry-core>=1.0.0"] 64 | build-backend = "poetry.core.masonry.api" 65 | -------------------------------------------------------------------------------- /refex/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /refex/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from absl import app 16 | 17 | # Hack to get flags parsed -- both absl and the test runner expect to own main(). 18 | # Fortunately, absl doesn't de-construct state after main finishes, so we 19 | # can pretend to give it what it wants. 20 | try: 21 | app.run(lambda argv: None) 22 | except SystemExit: 23 | # neener neener 24 | pass 25 | -------------------------------------------------------------------------------- /refex/fix/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /refex/fix/find_fixer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Utilities to load fixers from the built-in database. 15 | 16 | Fixers can be found in refex/fix/fixers. 17 | """ 18 | 19 | import collections 20 | 21 | from refex.fix import fixer 22 | from refex.fix.fixers import correctness_fixers 23 | from refex.fix.fixers import idiom_fixers 24 | from refex.fix.fixers import unittest_fixers 25 | 26 | _default_fixers = [] 27 | # Using OrderedDict instead of dict so that keeps the fixer order consistent 28 | # across runs for '*', and so we can tune the display order. 29 | _extra_fixers = collections.OrderedDict([('*', _default_fixers)]) 30 | 31 | def register_default_fixers(fixers): 32 | """Registers a fixer list to be included in from_pattern('*').""" 33 | _default_fixers.extend(fixers) 34 | 35 | def register_fixers(name, fixers): 36 | """Registers a fixer list to be returned by from_pattern(name).""" 37 | if name in _extra_fixers: 38 | raise ValueError('Name already registered: %r', name) 39 | _extra_fixers[name] = fixers 40 | 41 | 42 | def _register_builtins(): 43 | """Registers the built-in set of fixers. Invoked at import-time.""" 44 | register_fixers('correctness', correctness_fixers.SIMPLE_PYTHON_FIXERS) 45 | register_fixers('idiom', idiom_fixers.SIMPLE_PYTHON_FIXERS) 46 | register_fixers('unittest', unittest_fixers.SIMPLE_PYTHON_FIXERS) 47 | 48 | for fixers in _extra_fixers.values(): 49 | register_default_fixers(fixers) 50 | 51 | _register_builtins() 52 | 53 | 54 | def from_pattern(fixer_pattern: str) -> fixer.CombiningPythonFixer: 55 | """Provide a fixer that combines all the fixers specified in `fixer_pattern`. 56 | 57 | To get all the default fixers, pass '*'. Otherwise, to get a group of fixers 58 | by name, specify that name. (See _default_fixers & _extra_fixers). 59 | 60 | Args: 61 | fixer_pattern: The pattern of fixers to load. 62 | 63 | Returns: 64 | A PythonFixer. 65 | 66 | Raises: 67 | ValueError: The fixer pattern was not recognized. 68 | """ 69 | # TODO: Allow you to do set operations like '*,-FixerNameHere', etc. 70 | # or something along those lines. 71 | if fixer_pattern in _extra_fixers: 72 | return fixer.CombiningPythonFixer(_extra_fixers[fixer_pattern]) 73 | else: 74 | raise ValueError( 75 | 'Unknown fixer pattern %r: must provide one of: %s' % 76 | (fixer_pattern, ', '.join(_extra_fixers.keys()))) 77 | -------------------------------------------------------------------------------- /refex/fix/fixer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Fixers to suggest substitutions for common issues.""" 15 | 16 | 17 | import abc 18 | import string 19 | from typing import Callable, List, Mapping, Optional, Text, TypeVar, Union 20 | 21 | import attr 22 | import cached_property 23 | 24 | from refex import formatting 25 | from refex import search 26 | from refex.python import matcher 27 | from refex.python.matchers import base_matchers 28 | from refex.python.matchers import syntax_matchers 29 | 30 | 31 | class PythonFixer(metaclass=abc.ABCMeta): 32 | """Abstract base class for python-specific fixers operating via matchers.""" 33 | 34 | # Test helper methods: 35 | 36 | @abc.abstractmethod 37 | def example_fragment(self): 38 | """Returns an example fragment that this fixer would match/replace.""" 39 | 40 | @abc.abstractmethod 41 | def example_replacement(self): 42 | """Returns what replacement is expected for the example fragment.""" 43 | 44 | @abc.abstractproperty 45 | def matcher_with_meta(self): 46 | """Returns a fully-decorated Matcher which attaches all substitution metadata.""" 47 | 48 | 49 | @attr.s(frozen=True) 50 | class CombiningPythonFixer(search.FileRegexFilteredSearcher, 51 | search.BasePythonRewritingSearcher): 52 | """Combining fixer for ``PythonFixer``, sharing common work. 53 | 54 | This combines all of the matchers (``matcher_with_meta``) into one big 55 | ``AnyOf``, allowing for optimized traversal. 56 | """ 57 | fixers = attr.ib(type=List[PythonFixer]) 58 | include_regex = attr.ib(default=r'.*[.]py$', type=str) 59 | 60 | @fixers.validator 61 | def _fixers_validator(self, attribute, fixers): 62 | for i, fixer in enumerate(fixers): 63 | if fixer.matcher_with_meta.type_filter is None: 64 | raise ValueError( 65 | f'Overbroad fixer (#{i}) will try to run on EVERY ast node, instead of a small set: {fixer}' 66 | ) 67 | 68 | # Override _matcher definition, as it's now computed based on fixers. 69 | matcher = attr.ib(init=False, type=matcher.Matcher) 70 | 71 | @matcher.default 72 | def matcher_default(self): 73 | return base_matchers.AnyOf( 74 | *(fixer.matcher_with_meta for fixer in self.fixers)) 75 | 76 | 77 | @attr.s(frozen=True, eq=False) 78 | class SimplePythonFixer(PythonFixer): 79 | r"""A simple find-replace fixer. 80 | 81 | All fixers must be able to be re-applied repeatedly, so that they can be 82 | combined with other fixers. 83 | 84 | Attributes: 85 | matcher: The matcher. 86 | replacement: The replacement template for the whole match, or a mapping 87 | from label to replacement template for that label. 88 | message: The message for all suggestions this gives. 89 | url: The suggestion URL for more information. 90 | category: A name to group fixes by. 91 | example_fragment: An example of a string this would match, for tests etc. 92 | If none is provided, one can sometimes be generated 93 | automatically in the event that the matcher is a simple 94 | syntax_matchers template, by replacing $a -> a etc. 95 | example_replacement: What the replacement would be for the example 96 | fragment. If example_fragment is autogenerated, a 97 | corresponding example_replacement is as well. 98 | significant: Whether the suggestions are going to be significant. 99 | """ 100 | _matcher = attr.ib(type=matcher.Matcher) 101 | _replacement = attr.ib(type=Union[formatting.Template, 102 | Mapping[Text, formatting.Template]]) 103 | _message = attr.ib(default=None, type=Optional[str]) 104 | _url = attr.ib(default=None, type=Optional[str]) 105 | _category = attr.ib(default=None, type=str) 106 | _example_fragment = attr.ib(default=None, type=Optional[str]) 107 | _example_replacement = attr.ib(default=None, type=Optional[str]) 108 | _significant = attr.ib(default=True, type=bool) 109 | 110 | @cached_property.cached_property 111 | def matcher_with_meta(self): 112 | if isinstance(self._replacement, formatting.Template): 113 | replacements = {search.ROOT_LABEL: self._replacement} 114 | else: 115 | replacements = self._replacement 116 | 117 | if self._message is not None: 118 | replacements[search.MESSAGE_LABEL] = formatting.LiteralTemplate( 119 | self._message) 120 | if self._url is not None: 121 | replacements[search.URL_LABEL] = formatting.LiteralTemplate(self._url) 122 | if self._category is not None: 123 | replacements[search.CATEGORY_LABEL] = formatting.LiteralTemplate( 124 | self._category) 125 | if self._significant: 126 | replacements[search.SIGNIFICANT_LABEL] = formatting.LiteralTemplate( 127 | 'HACK_TRUE') 128 | 129 | return base_matchers.WithReplacements( 130 | base_matchers.SystemBind(search.ROOT_LABEL, self._matcher), 131 | replacements) 132 | 133 | def example_fragment(self): 134 | if self._example_fragment is not None: 135 | return self._example_fragment 136 | if not isinstance( 137 | self._matcher, 138 | (syntax_matchers.ExprPattern, syntax_matchers.StmtPattern)): 139 | return None 140 | if self._matcher.restrictions: 141 | return None 142 | return string.Template(self._matcher.pattern).substitute( 143 | ImmutableDefaultDict(lambda k: k)) 144 | 145 | def example_replacement(self): 146 | if self._example_fragment is not None: 147 | return self._example_replacement 148 | if self._example_replacement is not None: 149 | raise TypeError( 150 | 'Cannot manually specify a replacement for an autogenerated fragment') 151 | if not isinstance(self._replacement, formatting.Template): 152 | raise TypeError( 153 | 'Cannot autogenerate an example replacement unless the replacement' 154 | ' template applies to the whole match.') 155 | return string.Template(self._replacement.template).substitute( 156 | ImmutableDefaultDict(lambda k: k)) 157 | 158 | 159 | KeyType = TypeVar('KeyType') 160 | ValueType = TypeVar('ValueType') 161 | 162 | 163 | @attr.s(frozen=True) 164 | class ImmutableDefaultDict(Mapping[KeyType, ValueType]): 165 | """Immutable mapping that returns factory(key) as a value, always.""" 166 | _factory = attr.ib(type=Callable[[KeyType], ValueType]) 167 | 168 | def __getitem__(self, key: KeyType) -> ValueType: 169 | return self._factory(key) 170 | 171 | def __len__(self): 172 | return 0 173 | 174 | def __iter__(self): 175 | return iter([]) 176 | -------------------------------------------------------------------------------- /refex/fix/fixers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /refex/fix/fixers/correctness_fixers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Fixers for correctness issues. 15 | 16 | * Identity checks against literals are always a bug, and should be replaced by 17 | equality checks. This is a behavior change -- tests may stop passing after the 18 | fix is applied -- but the new behavior will be correct, and that's what 19 | matters. 20 | * ``yaml.load()`` includes security traps. 21 | """ 22 | 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | from __future__ import unicode_literals # for convenience 27 | 28 | from refex import formatting 29 | from refex.fix import fixer 30 | from refex.python import syntactic_template 31 | from refex.python.matchers import ast_matchers 32 | from refex.python.matchers import base_matchers 33 | from refex.python.matchers import syntax_matchers 34 | 35 | _STRING_LITERAL = base_matchers.AnyOf(ast_matchers.Str(), ast_matchers.Bytes()) 36 | 37 | # A "literal" for the purposes of buggy is/is not checks. 38 | _LITERAL = base_matchers.AnyOf(ast_matchers.Num(), _STRING_LITERAL) 39 | 40 | _YAML_MESSAGE = ( 41 | 'yaml.{function} can execute arbitrary Python code contained in the input. ' 42 | 'Use yaml.safe_load instead. This may require changing dumps to use ' 43 | 'yaml.safe_dump.') 44 | 45 | 46 | def _attrib_mutable_default_fixer(default, keyword_replacement): 47 | """Replaces an attr.ib(default=) call where the default is mutable. 48 | 49 | For example, this will replace 'default=[]' with 'default=()' in an attr.ib(): 50 | 51 | _attrib_factory_fixer('[]', 'default=()') 52 | 53 | However, most of the time, it should be replaced with factory=... to get as 54 | close to the original semantics as possible, while fixing the bug. (In 55 | particular, the user may want to mutate the object.) 56 | 57 | Args: 58 | default: The literal text of the default to match (e.g. '[]'). 59 | keyword_replacement: A replacement string. Note that it isn't a template. 60 | (It is not a full Python expression/statement, and so safe substitution is 61 | hard to guarantee.) 62 | 63 | Returns: 64 | A SimplePythonFixer for this fix. 65 | """ 66 | 67 | return fixer.SimplePythonFixer( 68 | message=( 69 | 'The default in an attr.ib() call is shared across all instances.' 70 | ' Mutable defaults should instead use factory=..., to get a unique' 71 | ' value.'), 72 | matcher=ast_matchers.Call( 73 | func=syntax_matchers.ExprPattern('attr.ib'), 74 | keywords=base_matchers.Contains( 75 | base_matchers.Bind( 76 | 'keyword', 77 | ast_matchers.keyword( 78 | arg=base_matchers.Equals('default'), 79 | value=syntax_matchers.ExprPattern(default))))), 80 | replacement={'keyword': formatting.ShTemplate(keyword_replacement)}, 81 | url='https://refex.readthedocs.io/en/latest/guide/fixers/attrib_default.html', 82 | category='refex.correctness.attrib-default', 83 | example_fragment='attr.ib(default=%s)' % default, 84 | example_replacement='attr.ib(%s)' % keyword_replacement, 85 | ) 86 | 87 | 88 | SIMPLE_PYTHON_FIXERS = [ 89 | # is/== literal 90 | fixer.SimplePythonFixer( 91 | message=('`is` only sometimes works for comparing equality with' 92 | ' literals. This is a bug that will manifest in subtle ways.' 93 | ' == is more appropriate.'), 94 | matcher=syntax_matchers.ExprPattern('$lhs is $rhs', {'rhs': _LITERAL}), 95 | replacement=syntactic_template.PythonExprTemplate('$lhs == $rhs'), 96 | url='https://refex.readthedocs.io/en/latest/guide/fixers/literal_comparison.html', 97 | category='pylint.literal-comparison', 98 | example_fragment='lhs is 42', 99 | example_replacement='lhs == 42', 100 | ), 101 | fixer.SimplePythonFixer( 102 | message=('`is not` only sometimes works for comparing inequality with' 103 | ' literals. This is a bug that will manifest in subtle ways.' 104 | ' != is more appropriate.'), 105 | matcher=syntax_matchers.ExprPattern('$lhs is not $rhs', 106 | {'rhs': _LITERAL}), 107 | replacement=syntactic_template.PythonExprTemplate('$lhs != $rhs'), 108 | url='https://refex.readthedocs.io/en/latest/guide/fixers/literal_comparison.html', 109 | category='pylint.literal-comparison', 110 | example_fragment='lhs is not 42', 111 | example_replacement='lhs != 42', 112 | ), 113 | # YAML load/safe_load 114 | fixer.SimplePythonFixer( 115 | message=_YAML_MESSAGE.format(function='load'), 116 | matcher=syntax_matchers.WithTopLevelImport( 117 | syntax_matchers.ExprPattern('yaml.load($s)'), 'yaml'), 118 | replacement=syntactic_template.PythonExprTemplate('yaml.safe_load($s)'), 119 | url='https://msg.pyyaml.org/load', 120 | category='refex.security.yaml_safe_load', 121 | 122 | # test / documentation data 123 | example_fragment='import yaml; yaml.load(x)', 124 | example_replacement='import yaml; yaml.safe_load(x)', 125 | ), 126 | fixer.SimplePythonFixer( 127 | message=_YAML_MESSAGE.format(function='load_all'), 128 | matcher=syntax_matchers.WithTopLevelImport( 129 | syntax_matchers.ExprPattern('yaml.load_all($s)'), 'yaml'), 130 | replacement=syntactic_template.PythonExprTemplate( 131 | 'yaml.safe_load_all($s)'), 132 | url='https://msg.pyyaml.org/load', 133 | category='refex.security.yaml_safe_load', 134 | 135 | # test / documentation data 136 | example_fragment='import yaml; yaml.load_all(x)', 137 | example_replacement='import yaml; yaml.safe_load_all(x)', 138 | ), 139 | fixer.SimplePythonFixer( 140 | message='Parentheses around a single variable have no effect. Did you mean to format with a tuple?', 141 | matcher=base_matchers.AllOf( 142 | # For a line like 'a % (b)', asttokens won't include the parens 143 | # around b in its token span. So to find something like that, we 144 | # look for a '%' BinOp where the left side is a literal string, the 145 | # right side of the BinOp is a name and the entire op ends with a 146 | # paren but doesn't start with one. Since the name itself cannot 147 | # include a ')', the only place the close-paren could come from is 148 | # if the name were surrounded in superfluous parens. It is possible 149 | # that $b is wrapped in parens as a hint to pyformat to break the 150 | # line, but we anyway skip any case where $b extends over multiple 151 | # lines because the `MatchesRegex` doesn't use re.DOTALL 152 | # There's still a chance $b is a variable holding a tuple that's 153 | # wrapped in parens, but that should be very rare. (And even if it 154 | # occurs, although the suggested fix would be wrong, we're still 155 | # pointing out an un-idiomatic pattern). 156 | syntax_matchers.ExprPattern( 157 | '$a % $b', 158 | dict( 159 | a=_STRING_LITERAL, 160 | b=base_matchers.AllOf( 161 | base_matchers.AnyOf(ast_matchers.Attribute(), 162 | ast_matchers.Name())))), 163 | base_matchers.MatchesRegex(r'[^(].+\)'), 164 | ), 165 | replacement=syntactic_template.PythonExprTemplate('$a % ($b,)'), 166 | url='https://refex.readthedocs.io/en/latest/guide/fixers/superfluous_parens.html', 167 | category='refex.correctness.formatting', 168 | # test / documentation data 169 | example_fragment='x = "hello %s" % (world)', 170 | example_replacement='x = "hello %s" % (world,)', 171 | ), 172 | # attr.ib mutable defaults 173 | # TODO: It Would Be Nice to handle non-empty lists/etc. and replace 174 | # with a factory=lambda: <...>, but those are uncommon and the replacement 175 | # logic is a little tricky. 176 | # Similarly, we could handle the many other cases like time.time() or flags, 177 | # but they are also rare. 178 | _attrib_mutable_default_fixer( 179 | default='[]', keyword_replacement='factory=list'), 180 | _attrib_mutable_default_fixer( 181 | default='{}', keyword_replacement='factory=dict'), 182 | _attrib_mutable_default_fixer( 183 | default='set()', keyword_replacement='factory=set'), 184 | fixer.SimplePythonFixer( 185 | message=( 186 | 'utcfromtimestamp() returns a timezone-naive UTC datetime, which ' 187 | 'is bug-prone.'), 188 | url='https://docs.python.org/3/library/datetime.html#datetime.datetime.utcfromtimestamp', 189 | category='refex.correctness.datetime.utcfromtimestamp', 190 | matcher=syntax_matchers.ExprPattern( 191 | 'datetime.datetime.utcfromtimestamp($a)'), 192 | replacement=syntactic_template.PythonExprTemplate( 193 | 'datetime.datetime.fromtimestamp($a, tz=datetime.timezone.utc)'), 194 | example_fragment='datetime.datetime.utcfromtimestamp(foo)', 195 | example_replacement=( 196 | 'datetime.datetime.fromtimestamp(foo, tz=datetime.timezone.utc)'), 197 | ), 198 | fixer.SimplePythonFixer( 199 | message=( 200 | 'utcnow() returns a timezone-naive UTC datetime, which is ' 201 | 'bug-prone.'), 202 | url='https://docs.python.org/3/library/datetime.html#datetime.datetime.utcnow', 203 | category='refex.correctness.datetime.utcnow', 204 | matcher=syntax_matchers.ExprPattern( 205 | 'datetime.datetime.utcnow()'), 206 | replacement=syntactic_template.PythonExprTemplate( 207 | 'datetime.datetime.now(tz=datetime.timezone.utc)'), 208 | example_fragment='datetime.datetime.utcnow()', 209 | example_replacement=( 210 | 'datetime.datetime.now(tz=datetime.timezone.utc)'), 211 | ), 212 | ] 213 | -------------------------------------------------------------------------------- /refex/fix/fixers/test_correctness_fixers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Lint as python2, python3 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | from __future__ import unicode_literals 20 | 21 | import textwrap 22 | 23 | from absl.testing import absltest 24 | from absl.testing import parameterized 25 | 26 | from refex import search 27 | from refex.fix import fixer 28 | from refex.fix.fixers import correctness_fixers 29 | 30 | 31 | def _rewrite(fixer_, code): 32 | return search.rewrite_string(fixer_, code, 'example.py') 33 | 34 | 35 | class SimpleFixersTest(parameterized.TestCase): 36 | fixers = fixer.CombiningPythonFixer(correctness_fixers.SIMPLE_PYTHON_FIXERS) 37 | 38 | def test_skips_number_mod(self): 39 | before = 'y = 3 % (x)' 40 | self.assertEqual(before, _rewrite(self.fixers, before)) 41 | 42 | @parameterized.parameters('(\nfoo)', '(foo\n)', '(foo\n.bar)') 43 | def test_skips_multiline_rhs(self, rhs): 44 | before = 'y = "hello %s" % {rhs}'.format(rhs=rhs) 45 | self.assertEqual(before, _rewrite(self.fixers, before)) 46 | 47 | def test_skips_formatting_when_already_using_tuple(self): 48 | before = "y = 'hello %s' % (world,)" 49 | self.assertEqual(before, _rewrite(self.fixers, before)) 50 | 51 | @parameterized.parameters('u', 'b', '') 52 | def test_changes_superfluous_parens_to_tuple_when_formatting( 53 | self, string_prefix): 54 | before = textwrap.dedent(""" 55 | y = ( 56 | {}'hello: %s\\n' % (thing.world)) 57 | """).format(string_prefix) 58 | after = textwrap.dedent(""" 59 | y = ( 60 | {}'hello: %s\\n' % (thing.world,)) 61 | """).format(string_prefix) 62 | self.assertEqual(after, _rewrite(self.fixers, before)) 63 | 64 | @parameterized.parameters('None', 'True', 'False') 65 | def test_is_named_constant(self, constant): 66 | """Named constants aren't fixed by the is check: identity is guaranteed.""" 67 | before = f'x is {constant}' 68 | self.assertEqual(before, _rewrite(self.fixers, before)) 69 | 70 | @parameterized.parameters('42', '0x42', '0b01', '6.6', '1e1', '1j', '"s"', 71 | 'u"s"', 'b"s"') 72 | def test_is_unnamed_constant(self, constant): 73 | before = f'x is {constant}' 74 | after = f'x == {constant}' 75 | self.assertEqual(after, _rewrite(self.fixers, before)) 76 | 77 | 78 | if __name__ == '__main__': 79 | absltest.main() 80 | -------------------------------------------------------------------------------- /refex/fix/fixers/unittest_fixers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Fixers for unit test specific logic. 15 | 16 | * assertions: assertTrue(arg) and assertFalse(arg) give very poor error messages 17 | compared to methods like assertEqual, assertNotEqual, etc. These fixers 18 | change the calls to the more specific assertion methods, without changing 19 | behavior at all. 20 | * Deprecated unittest assertion aliases are replaced with the newer variants. 21 | """ 22 | 23 | from __future__ import absolute_import 24 | from __future__ import division 25 | from __future__ import print_function 26 | from __future__ import unicode_literals # for convenience 27 | 28 | import string 29 | 30 | from refex.fix import fixer 31 | from refex.python import syntactic_template 32 | from refex.python.matchers import syntax_matchers 33 | 34 | 35 | def assert_alias_fixer( 36 | old_expr, 37 | new_expr, 38 | url='https://docs.python.org/3/library/unittest.html#deprecated-aliases'): 39 | """Fixer for deprecated unittest aliases. 40 | 41 | Args: 42 | old_expr: A string for an ExprPattern matching the target expr. 43 | new_expr: A string for a PythonExprTemplate to replace it with. 44 | url: The URL documenting the deprecation. 45 | 46 | Returns: 47 | A fixer that replaces old_expr with new_expr. 48 | """ 49 | dotdotdot = fixer.ImmutableDefaultDict(lambda _: '...') 50 | return fixer.SimplePythonFixer( 51 | message=('{old} is a deprecated alias for {new} in the unittest module.' 52 | .format( 53 | old=string.Template(old_expr).substitute(dotdotdot), 54 | new=string.Template(new_expr).substitute(dotdotdot))), 55 | matcher=syntax_matchers.ExprPattern(old_expr), 56 | replacement=syntactic_template.PythonExprTemplate(new_expr), 57 | url=url, 58 | significant=False, 59 | category='pylint.g-deprecated-assert', 60 | ) 61 | 62 | 63 | def assert_message_fixer(old_expr, new_expr, method, is_absl=False): 64 | """Fixer for assertTrue()/assertFalse()/etc. 65 | 66 | related error fixes. 67 | 68 | assertTrue(...) often produces less readable error information than 69 | alternative methods like assertEqual etc. 70 | 71 | Args: 72 | old_expr: a ExprPattern string for the expr to match 73 | new_expr: a template string for the replacement 74 | method: the method to link to in the docs. 75 | is_absl: Whether this is an absl method with absl docs. 76 | 77 | Returns: 78 | A fixer that replaces old_expr with new_expr. 79 | """ 80 | if is_absl: 81 | # absl doesn't have docs per se. 82 | url = f'https://github.com/abseil/abseil-py/search?q=%22def+{method}%22' 83 | else: 84 | url = f'https://docs.python.org/3/library/unittest.html#unittest.TestCase.{method}' 85 | dotdotdot = fixer.ImmutableDefaultDict(lambda _: '...') 86 | return fixer.SimplePythonFixer( 87 | message=( 88 | '%s is a more specific assertion, and may give more detailed error information than %s.' 89 | % (string.Template(new_expr).substitute(dotdotdot), 90 | string.Template(old_expr).substitute(dotdotdot))), 91 | matcher=syntax_matchers.ExprPattern(old_expr), 92 | replacement=syntactic_template.PythonExprTemplate(new_expr), 93 | url=url, 94 | category='pylint.g-generic-assert', 95 | ) 96 | 97 | 98 | SIMPLE_PYTHON_FIXERS = [ 99 | # failUnlessEqual etc. are really REALLY gone in 3.12, so if you haven't 100 | # fixed it by now, it's too late! 101 | # The only deprecated aliases left of any interest are ones defined by 102 | # absltest as a compatibility shim. 103 | assert_alias_fixer( 104 | 'self.assertItemsEqual', 105 | 'self.assertCountEqual', 106 | url='https://docs.python.org/2/library/unittest.html#unittest.TestCase.assertItemsEqual', 107 | ), 108 | # Assertion message fixers: 109 | # assertFalse(...) is excluded for now because will change which method is 110 | # called -- for example, if you're specifically testing your implementation 111 | # of __ne__, switching to assertEqual would be a bad move. 112 | # ==, != 113 | assert_message_fixer( 114 | 'self.assertTrue($lhs == $rhs)', 115 | 'self.assertEqual($lhs, $rhs)', 116 | 'assertEqual', 117 | ), 118 | assert_message_fixer( 119 | 'self.assertTrue($lhs != $rhs)', 120 | 'self.assertNotEqual($lhs, $rhs)', 121 | 'assertNotEqual', 122 | ), 123 | assert_message_fixer( 124 | 'self.assertTrue($lhs == $rhs)', 125 | 'self.assertEqual($lhs, $rhs)', 126 | 'assertEqual', 127 | ), 128 | assert_message_fixer( 129 | 'self.assertTrue($lhs != $rhs)', 130 | 'self.assertNotEqual($lhs, $rhs)', 131 | 'assertNotEqual', 132 | ), 133 | # is, is not 134 | # We could also change 'assertIs(..., None)' to 'assertIsNone(...)', 135 | # but the error messages are identical, so this suggestion would 136 | # just be a waste of programmer time and code churn. 137 | assert_message_fixer( 138 | 'self.assertTrue($lhs is $rhs)', 'self.assertIs($lhs, $rhs)', 'assertIs' 139 | ), 140 | assert_message_fixer( 141 | 'self.assertTrue($lhs is not $rhs)', 142 | 'self.assertIsNot($lhs, $rhs)', 143 | 'assertIsNot', 144 | ), 145 | assert_message_fixer( 146 | 'self.assertFalse($lhs is $rhs)', 147 | 'self.assertIsNot($lhs, $rhs)', 148 | 'assertIsNot', 149 | ), 150 | assert_message_fixer( 151 | 'self.assertFalse($lhs is not $rhs)', 152 | 'self.assertIs($lhs, $rhs)', 153 | 'assertIs', 154 | ), 155 | # in, not in 156 | assert_message_fixer( 157 | 'self.assertTrue($lhs in $rhs)', 'self.assertIn($lhs, $rhs)', 'assertIn' 158 | ), 159 | assert_message_fixer( 160 | 'self.assertTrue($lhs not in $rhs)', 161 | 'self.assertNotIn($lhs, $rhs)', 162 | 'assertNotIn', 163 | ), 164 | assert_message_fixer( 165 | 'self.assertFalse($lhs in $rhs)', 166 | 'self.assertNotIn($lhs, $rhs)', 167 | 'assertNotIn', 168 | ), 169 | assert_message_fixer( 170 | 'self.assertFalse($lhs not in $rhs)', 171 | 'self.assertIn($lhs, $rhs)', 172 | 'assertIn', 173 | ), 174 | # <, <=, >, >= 175 | assert_message_fixer( 176 | 'self.assertTrue($lhs > $rhs)', 177 | 'self.assertGreater($lhs, $rhs)', 178 | 'assertGreater', 179 | ), 180 | assert_message_fixer( 181 | 'self.assertTrue($lhs >= $rhs)', 182 | 'self.assertGreaterEqual($lhs, $rhs)', 183 | 'assertGreaterEqual', 184 | ), 185 | assert_message_fixer( 186 | 'self.assertTrue($lhs < $rhs)', 187 | 'self.assertLess($lhs, $rhs)', 188 | 'assertLess', 189 | ), 190 | assert_message_fixer( 191 | 'self.assertTrue($lhs <= $rhs)', 192 | 'self.assertLessEqual($lhs, $rhs)', 193 | 'assertLessEqual', 194 | ), 195 | # isinstance 196 | assert_message_fixer( 197 | 'self.assertTrue(isinstance($lhs, $rhs))', 198 | 'self.assertIsInstance($lhs, $rhs)', 199 | 'assertIsInstance', 200 | ), 201 | assert_message_fixer( 202 | 'self.assertTrue(not isinstance($lhs, $rhs))', 203 | 'self.assertNotIsInstance($lhs, $rhs)', 204 | 'assertNotIsInstance', 205 | ), 206 | assert_message_fixer( 207 | 'self.assertFalse(isinstance($lhs, $rhs))', 208 | 'self.assertNotIsInstance($lhs, $rhs)', 209 | 'assertNotIsInstance', 210 | ), 211 | assert_message_fixer( 212 | 'self.assertFalse(not isinstance($lhs, $rhs))', 213 | 'self.assertIsInstance($lhs, $rhs)', 214 | 'assertIsInstance', 215 | ), 216 | # TODO: suggest assertLen, and other absltest methods. 217 | # Those are slightly more complicated because we must check whether or not 218 | # the test even _is_ an absltest. 219 | # However, if we're already using one abslTest method, we can suggest 220 | # another: 221 | assert_message_fixer( 222 | 'self.assertLen($x, 0)', 223 | 'self.assertEmpty($x)', 224 | 'assertEmpty', 225 | is_absl=True, 226 | ), 227 | ] 228 | -------------------------------------------------------------------------------- /refex/fix/generate_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Outputs an example file full of things to fix.""" 15 | 16 | 17 | import sys 18 | 19 | from refex.fix import find_fixer 20 | 21 | 22 | def example_source(): 23 | lines = [] 24 | lines.append(u'# coding: utf-8') 25 | lines.append(u'# Only enable refex warnings: pylint: skip-file') 26 | for fx in find_fixer.from_pattern('*').fixers: 27 | lines.append(fx.example_fragment()) 28 | lines.append(u'') 29 | return u'\n'.join(lines).encode('utf-8') 30 | 31 | 32 | def main(): 33 | if len(sys.argv) > 1: 34 | sys.exit('Too many command-line arguments.') 35 | print(example_source().decode('utf-8'), end='') 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /refex/fix/test_generate_example.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.fix.generate_example.""" 15 | 16 | 17 | import ast 18 | 19 | from absl.testing import absltest 20 | 21 | from refex.fix import generate_example 22 | 23 | 24 | class GenerateExampleTest(absltest.TestCase): 25 | 26 | def test_example_source(self): 27 | """As a smoke test of the smoke test, check that the example parses.""" 28 | ast.parse(generate_example.example_source(), '') 29 | 30 | 31 | if __name__ == '__main__': 32 | absltest.main() 33 | -------------------------------------------------------------------------------- /refex/match.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | :mod:`refex.match` 16 | ================== 17 | 18 | The common match classes, used for representing fragments for substitution. 19 | 20 | This is the bare minimum of match information, common to all search modes. 21 | :mod:`refex.python.matcher` has matchers that carry information about the AST, 22 | which can be used for more sophisticated search/replace functionality. 23 | 24 | All match classes have the following two attributes: 25 | 26 | .. attribute:: string 27 | :type: Optional[str] 28 | 29 | If this is non-``None``, this is the value that was matched as a string. 30 | This value is used when the match is used as a *source* for a substitution. 31 | 32 | .. attribute:: span 33 | :type: Optional[Tuple[int, int]] 34 | 35 | If this is non-None, then this is the location of the match in the source 36 | file. This value is used when the match is used as a _destination_ for 37 | a substitution. 38 | 39 | The span is represented as a tuple of (start, end), which is a half-open 40 | range using unicode offsets. 41 | 42 | Every match with a span must have a string. (If nothing else, the string 43 | can be the contents at that span location.) 44 | 45 | Note that the :attr:`string` may be *different* than the actual textual content 46 | at the :attr:`span` destination. For example, consider the Python expression 47 | ``(b + c) * d``. If we have a match for the addition operation, it might have a 48 | :attr:`string` of ``"b + c"``, but a span that is ``"(b + c)"``. 49 | This is a useful thing to do: 50 | 51 | 1) If we replace this expression with ``"e"``, it would be nice for the 52 | expression to become ``e * d``, rather than ``(e) * d``. 53 | 2) If we substitute this match into a function call, it would be nice for 54 | that call to become ``foo(b + c)`` rather than ``foo((b + c))``. 55 | """ 56 | 57 | 58 | from typing import Any, Tuple 59 | 60 | import attr 61 | 62 | 63 | @attr.s(frozen=True) 64 | class Match: 65 | """A match with no accompanying information. 66 | 67 | .. attribute:: string 68 | .. attribute:: span 69 | """ 70 | 71 | string = None 72 | span = None 73 | 74 | 75 | @attr.s(frozen=True) 76 | class StringMatch(Match): 77 | """A match which can be a source for substitution. 78 | 79 | .. attribute:: string 80 | .. attribute:: span 81 | """ 82 | string = attr.ib(type=str) 83 | 84 | 85 | @attr.s(frozen=True) 86 | class SpanMatch(StringMatch): 87 | """A match which can be both a source *and* destination for substitution. 88 | 89 | .. attribute:: string 90 | .. attribute:: span 91 | """ 92 | span = attr.ib(type=Tuple[int, int]) 93 | 94 | @classmethod 95 | def from_text(cls, text: str, span: Tuple[int, int]) -> "SpanMatch": 96 | """Creates a :class:`SpanMatch` from a span within ``text``.""" 97 | start, end = span 98 | return SpanMatch(string=text[start:end], span=span) 99 | 100 | 101 | @attr.s(frozen=True) 102 | class ObjectMatch(Match): 103 | """Match that carries data with it, but has no associated span or string. 104 | 105 | .. attribute:: string 106 | .. attribute:: span 107 | """ 108 | #: An object associated with the match. 109 | matched = attr.ib(type=Any) 110 | -------------------------------------------------------------------------------- /refex/parsed_file.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | :mod:`refex.parsed_file` 16 | ------------------------ 17 | """ 18 | 19 | # No portable raw unicode literal exists without unicode_literals. 20 | # see https://stackoverflow.com/questions/33027281 21 | 22 | import re 23 | from typing import Iterable, Mapping, Optional 24 | 25 | import asttokens 26 | import attr 27 | import cached_property 28 | 29 | 30 | @attr.s(frozen=True, eq=True, order=False) 31 | class ParsedFile: 32 | """A wrapper for a file after preprocessing. 33 | 34 | May be subclassed. 35 | 36 | The base class contains common metadata and does not in fact represent the 37 | result of any parsing. Individual subclasses may contain detailed data about 38 | the structure of a file. (See, for example, 39 | :class:`refex.python.matcher.PythonParsedFile`) 40 | 41 | Attributes: 42 | text: The unparsed file contents. 43 | path: The path to the file. 44 | pragmas: The pragmas for this file, in file order. 45 | line_numbers: A cache for line number <-> codepoint offset conversion. 46 | """ 47 | 48 | text = attr.ib(type=str) 49 | path = attr.ib(type=str) 50 | pragmas = attr.ib(type=Iterable["Pragma"]) 51 | 52 | @cached_property.cached_property 53 | def line_numbers(self): 54 | return asttokens.LineNumbers(self.text) 55 | 56 | 57 | # Matches a trailing pragma in a piece of text in an re.search. 58 | _PRAGMA_RE = re.compile( 59 | r""" 60 | # Match only at the boundary (like \b) for words-including-dashes. 61 | # We'd use lookbehind, but this isn't a fixed-width pattern. 62 | (?:[^-\w]|\A) 63 | (?P[-\w]+)\s* 64 | : 65 | \s* 66 | (?P 67 | [-\w]+\s*=\s*[-\w.]+\s* # key=value 68 | (?:,\s* [-\w]+ \s* = \s* [-\w.]+ \s*)* 69 | ) 70 | (?:,\s*)? # trailing comma allowed, to try to be maximally permissive. 71 | \Z 72 | """, re.VERBOSE) 73 | 74 | 75 | @attr.s(frozen=True) 76 | class Pragma: 77 | """A pragma / directive for Refex to alter how it handles files. 78 | 79 | Attributes: 80 | tag: The pragma namespace. This should be ``"refex"`` unless the pragma is 81 | actually parsed from a comment that targets another system (e.g. pylint.) 82 | data: The pragma payload, a set of key-value pairs. 83 | start: The start (codepoint offset) of the pragma in the file. Inclusive. 84 | end: The end (codepoint offset) of the pragma in the file. Exclusive. 85 | """ 86 | tag = attr.ib(type=str) 87 | data = attr.ib(type=Mapping[str, str]) 88 | start = attr.ib(type=int) 89 | end = attr.ib(type=int) 90 | 91 | @classmethod 92 | def from_text(cls, text, start, end) -> Optional["Pragma"]: 93 | """Parses pragmas from the standard format: ``tag: key=value, ...``. 94 | 95 | For example, ``refex: disable=foo`` becomes 96 | ``Pragma(tag=refex, data={"disable": "foo"}, ...)`` 97 | 98 | The pragma must end the string, although arbitrary leading text (usually an 99 | explanation for why the pragma was used) is allowed. 100 | 101 | Args: 102 | text: The candidate pragma text. 103 | start: The start offset for the pragma. 104 | end: The end offset for the pragma. 105 | 106 | Returns: 107 | A :class:`Pragma` if text[start:end] parses as a pragma, otherwise 108 | ``None``. 109 | """ 110 | m = _PRAGMA_RE.search(text) 111 | if m is None: 112 | return None 113 | data = {} 114 | for declaration in m.group('data').split(','): 115 | key, _, value = declaration.partition('=') 116 | data[key.strip()] = value.strip() 117 | return cls(tag=m.group('tag'), data=data, start=start, end=end) 118 | -------------------------------------------------------------------------------- /refex/python/README.md: -------------------------------------------------------------------------------- 1 | Python AST node find/replace support. 2 | 3 | Subdirectories: 4 | 5 | * matchers/ contains a library of AST node matchers to use. 6 | -------------------------------------------------------------------------------- /refex/python/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /refex/python/error_strings.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Human-readable error messages for exceptions.""" 15 | 16 | 17 | 18 | def _indent(s): 19 | return ''.join(' ' + line for line in s.splitlines(True)) 20 | 21 | 22 | def user_syntax_error(e, source_code): 23 | """Returns a representation of the syntax error for human consumption. 24 | 25 | This is only meant for small user-provided strings. For input files, 26 | prefer the regular Python format. 27 | 28 | Args: 29 | e: The SyntaxError object. 30 | source_code: The source code. 31 | 32 | Returns: 33 | A multi-line error message, where the first line is the summary, and the 34 | following lines explain the error in more detail. 35 | """ 36 | 37 | summary = 'Failed to parse Python-like source code ({msg}).'.format( 38 | msg=e.msg or '') 39 | if e.text is None: 40 | # Only output the source code. 41 | return '\n'.join([summary, _indent(source_code)]) 42 | # Alternatively, we could use the middle two lines from 43 | # traceback.format_exception_only(SyntaxError, e), but it isn't clear that 44 | # this is an improvement in terms of maintainability. (e.g. we do not then 45 | # control the indent, and if the format changes in the future the output 46 | # becomes nonsense). 47 | error_information = '\n'.join([ 48 | e.text.rstrip('\r\n'), # \n is added by ast.parse but not exec/eval. 49 | ' ' * (e.offset - 1) + '^', # note: offset is 1-based. 50 | ]) 51 | 52 | if '\n' in source_code: 53 | return '\n'.join([ 54 | summary, 55 | '', 56 | 'Source:', 57 | _indent(source_code), 58 | '', 59 | 'Location:', 60 | _indent(error_information), 61 | ]) 62 | else: 63 | return '\n'.join([summary, _indent(error_information)]) 64 | -------------------------------------------------------------------------------- /refex/python/evaluate.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """ 15 | :mod:`refex.python.evaluate` 16 | ============================ 17 | 18 | Build matchers from user-provided input. 19 | 20 | For example, a user might give ``'base_matchers.Anything()'`` and this is 21 | compiled to an actual ``base_matchers.Anything()``. 22 | 23 | For convenience, these are also available without the module name (e.g. as 24 | ``Anything()``), but without any defined behavior if there is more than one 25 | matcher with the same name. 26 | """ 27 | 28 | 29 | import textwrap 30 | 31 | from refex.python import error_strings 32 | from refex.python import matcher 33 | from refex.python import semiliteral_eval 34 | from refex.python.matchers import ast_matchers 35 | from refex.python.matchers import base_matchers 36 | from refex.python.matchers import lexical_matchers 37 | from refex.python.matchers import syntax_matchers 38 | 39 | 40 | def _sorted_attributes(o): 41 | """Gets all attributes in sorted order. A replacement for vars(o).items().""" 42 | for a in sorted(dir(o)): 43 | yield a, getattr(o, a) 44 | 45 | 46 | # TODO: remove overwrite param 47 | 48 | 49 | def add_module(module, overwrite=False): 50 | """Adds a non-builtin matcher module to be available for compile_matcher.""" 51 | for global_variable, value in _sorted_attributes(module): 52 | if not isinstance(value, type): 53 | continue 54 | if not matcher.is_safe_to_eval(value): 55 | continue 56 | 57 | is_mutated = False 58 | module_name = module.__name__.rsplit('.', 1)[-1] 59 | for name in global_variable, f'{module_name}.{global_variable}': 60 | if overwrite or name not in _ALL_MATCHERS: 61 | _ALL_MATCHERS[name] = value 62 | is_mutated = True 63 | if not is_mutated: 64 | raise ValueError(f'Could not add matcher: f{value!r}') 65 | 66 | 67 | _ALL_MATCHERS = {} 68 | add_module(ast_matchers, overwrite=True) 69 | add_module(base_matchers, overwrite=True) 70 | add_module(lexical_matchers, overwrite=True) 71 | add_module(syntax_matchers, overwrite=True) 72 | 73 | 74 | def compile_matcher(user_input:str) -> matcher.Matcher: 75 | """Creates a :class:`~refex.python.matcher.Matcher` from a string.""" 76 | user_input = textwrap.dedent(user_input).strip('\n') 77 | try: 78 | return semiliteral_eval.Eval( 79 | user_input, 80 | callables=_ALL_MATCHERS, 81 | constants=matcher.registered_constants) 82 | except SyntaxError as e: 83 | raise ValueError(error_strings.user_syntax_error(e, user_input)) 84 | -------------------------------------------------------------------------------- /refex/python/matcher_test_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Base class and test-only utilities for testing matchers.""" 15 | 16 | 17 | from absl.testing import absltest 18 | 19 | from refex.python import matcher 20 | 21 | 22 | class MatcherTestCase(absltest.TestCase): 23 | 24 | def _get_match_string(self, match, source_code): 25 | if not match.span: 26 | self.fail("%r is not a lexical match, and so doesn't match" 27 | ' substrings of the source code' % (match,)) 28 | return source_code[slice(*match.span)] 29 | 30 | def _get_matchinfo_string(self, matchinfo, source_code): 31 | if matchinfo is None: 32 | self.fail('Failed to match') 33 | self.assertIsInstance(matchinfo, matcher.MatchInfo) 34 | return self._get_match_string(matchinfo.match, source_code) 35 | 36 | def get_all_match_strings(self, m, source_code): 37 | return [ 38 | self._get_matchinfo_string(matchinfo, source_code) 39 | for matchinfo in matcher.find_iter( 40 | m, matcher.parse_ast(source_code, '')) 41 | ] 42 | 43 | 44 | def empty_context(): 45 | """Returns a new match context for some empty file. 46 | 47 | The return value is suitable for use with matchers, e.g.: 48 | 49 | >>> from refex.python.matchers import base_matchers 50 | >>> base_matchers.Anything().match(empty_context(), object()) 51 | MatchInfo(...) 52 | """ 53 | return matcher.MatchContext(matcher.parse_ast('')) 54 | -------------------------------------------------------------------------------- /refex/python/matchers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /refex/python/matchers/ast_matchers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # pylint: disable=g-space-before-docstring-summary, g-no-space-after-docstring-summary, g-short-docstring-punctuation 15 | # pyformat: disable 16 | """ 17 | :mod:`~refex.python.matchers.ast_matchers` 18 | ------------------------------------------ 19 | 20 | Automatically generated low-level AST node matchers. 21 | 22 | For each AST node in the :py:mod:`ast` module, there is a matcher with the same 23 | name, which accepts submatchers for each of its attributes. 24 | 25 | For example, if the Python grammar has an entry like:: 26 | 27 | UnaryOp(unaryop op, expr operand) 28 | 29 | Then the following matcher will match any ``ast.UnaryOp``:: 30 | 31 | ast_matchers.UnaryOp() 32 | 33 | And this will match any ``ast.UnaryOp`` with an ``op`` attribute matching 34 | ``submatcher1``, and an ``operand`` attribute matching ``submatcher2``:: 35 | 36 | ast_matchers.UnaryOp(op=submatcher1, operand=submatcher2) 37 | 38 | (See the unit tests for more examples.) 39 | """ 40 | # pyformat: enable 41 | # TODO: Add pytype support once pytype gets generics: 42 | # 1) generate code in a genrule rather than generating classes at runtime. 43 | # 2) Mark non-{expr,stmt} nodes specially since they won't have token spans. 44 | 45 | from __future__ import absolute_import 46 | from __future__ import division 47 | from __future__ import print_function 48 | 49 | import ast 50 | import sys 51 | 52 | import attr 53 | 54 | from refex.python import matcher 55 | from refex.python.matchers import base_matchers 56 | 57 | _HAS_DYNAMIC_ATTRIBUTES = True 58 | 59 | 60 | @attr.s(frozen=True) 61 | class _AstNodeMatcher(matcher.Matcher): 62 | """Abstract / generic AST node matcher. 63 | 64 | Only use via the subclasses generated below. Subclasses will always have the 65 | name Ast. e.g. AstExpr. 66 | 67 | AST node matchers can be instantiated by providing matchers for the 68 | attributes. These all default to Any, so are not required. 69 | 70 | Missing fields are replaced with None in matching. 71 | """ 72 | 73 | @classmethod 74 | def _generate_syntax_matcher(cls, ast_node_type): 75 | # Generate a class with an attrs field for every AST field, passed by 76 | # keyword argument only. 77 | ty = attr.make_class( 78 | ast_node_type.__name__, 79 | { 80 | field: matcher.submatcher_attrib(default=base_matchers.Anything(),) 81 | for field in ast_node_type._fields 82 | }, 83 | bases=(cls,), 84 | frozen=True, 85 | kw_only=True, 86 | ) 87 | ty._ast_type = ast_node_type # pylint: disable=protected-access 88 | ty.type_filter = frozenset({ast_node_type}) 89 | return ty 90 | 91 | @matcher.accumulating_matcher 92 | def _match(self, context, node): 93 | """Matches a node with the correct type and matching attributes.""" 94 | if type(node) != self._ast_type: # pylint: disable=unidiomatic-typecheck 95 | yield None 96 | 97 | for field in self._ast_type._fields: 98 | submatcher = getattr(self, field) 99 | yield submatcher.match(context, getattr(node, field, None)) 100 | 101 | 102 | def _generate_syntax_matchers_for_type_tree(d, ast_node_type_root): 103 | matcher_type = _AstNodeMatcher._generate_syntax_matcher(ast_node_type_root) # pylint: disable=protected-access 104 | d[matcher_type.__name__] = matcher.safe_to_eval(matcher_type) 105 | for subclass in ast_node_type_root.__subclasses__(): 106 | _generate_syntax_matchers_for_type_tree(d, subclass) 107 | 108 | 109 | _generate_syntax_matchers_for_type_tree(globals(), ast.AST) 110 | 111 | if sys.version_info < (3, 9): 112 | # Slices pre-3.9 don't carry a col_offset, causing them to, in some cases, 113 | # be completely incorrect. 114 | # In particular, they will be incorrect for slices with no subexpressions, 115 | # such as `foo[:]``, and for extended slices, such as `foo[:,i]`. 116 | # Rather than keep support around, we disable this, with a workaround 117 | # suggested for the very danger inclined. 118 | 119 | @matcher.safe_to_eval 120 | @attr.s(frozen=True, kw_only=True) 121 | class Subscript(Subscript): # pylint: disable=undefined-variable 122 | slice = matcher.submatcher_attrib(default=base_matchers.Anything()) 123 | 124 | @slice.validator 125 | def _slice_validator(self, attribute, value): 126 | del attribute # unused 127 | if isinstance(value, base_matchers.Bind): 128 | raise ValueError( 129 | 'slice=Bind(...) not supported in Python < 3.9. It will fail to ' 130 | 'correctly match e.g. `a[:]` or `a[1,:]`. Upgrade to Python 3.9, or' 131 | ' work around this using AllOf(Bind(...)) if that is OK.') 132 | 133 | 134 | # Compatibility classes. e.g. in 3.8, isinstance(ast.Num(3), ast.Num) is false. 135 | # Instead, we replace with hand-written matchers that match an ast.Constant 136 | # in the same circumstances. Same with any other backwards-incompatible changes. 137 | if sys.version_info >= (3, 8): 138 | 139 | def _constant_match( 140 | context, 141 | candidate, 142 | value_matcher: matcher.Matcher, 143 | value_types: tuple[type[object], ...], 144 | ): 145 | """Match an ``ast.Constant`` against a matcher and type.""" 146 | if type(candidate) != ast.Constant: # pylint: disable=unidiomatic-typecheck 147 | return None 148 | # note: not isinstance. The only concrete subclass that can occur in a 149 | # Constant AST is bool (which subclasses int). And in that case, we actually 150 | # don't want to include it -- Num() should not match `True`!. 151 | # Instead, all types must be listed out explicitly. 152 | if type(candidate.value) not in value_types: 153 | return None 154 | result = value_matcher.match(context, candidate.value) 155 | if result is None: 156 | return None 157 | return matcher.MatchInfo( 158 | matcher.create_match(context.parsed_file, candidate), result.bindings) 159 | 160 | @matcher.safe_to_eval 161 | @attr.s(frozen=True, kw_only=True) 162 | class Num(matcher.Matcher): 163 | n = matcher.submatcher_attrib(default=base_matchers.Anything()) 164 | 165 | def _match(self, context, candidate): 166 | return _constant_match(context, candidate, self.n, (int, float, complex)) 167 | 168 | type_filter = frozenset({ast.Constant}) 169 | 170 | @matcher.safe_to_eval 171 | @attr.s(frozen=True, kw_only=True) 172 | class Bytes(matcher.Matcher): 173 | s = matcher.submatcher_attrib(default=base_matchers.Anything()) 174 | 175 | def _match(self, context, candidate): 176 | return _constant_match(context, candidate, self.s, (bytes,)) 177 | 178 | type_filter = frozenset({ast.Constant}) 179 | 180 | @matcher.safe_to_eval 181 | @attr.s(frozen=True, kw_only=True) 182 | class Str(matcher.Matcher): 183 | s = matcher.submatcher_attrib(default=base_matchers.Anything()) 184 | 185 | def _match(self, context, candidate): 186 | return _constant_match(context, candidate, self.s, (str,)) 187 | 188 | type_filter = frozenset({ast.Constant}) 189 | 190 | @matcher.safe_to_eval 191 | @attr.s(frozen=True, kw_only=True) 192 | class NameConstant(matcher.Matcher): 193 | value = matcher.submatcher_attrib(default=base_matchers.Anything()) 194 | 195 | def _match(self, context, candidate): 196 | return _constant_match(context, candidate, self.value, (bool, type(None))) 197 | 198 | type_filter = frozenset({ast.Constant}) 199 | 200 | @matcher.safe_to_eval 201 | @attr.s(frozen=True) 202 | class Ellipsis(matcher.Matcher): # pylint: disable=redefined-builtin 203 | 204 | def _match(self, context, candidate): 205 | return _constant_match(context, candidate, base_matchers.Equals(...), 206 | (type(...),)) 207 | 208 | type_filter = frozenset({ast.Constant}) 209 | -------------------------------------------------------------------------------- /refex/python/matchers/extern_matchers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # pyformat: disable 15 | """ 16 | :mod:`~refex.python.matchers.extern_matchers` 17 | --------------------------------------------- 18 | 19 | Matchers for integrating with external tooling. 20 | 21 | .. autoclass:: RewriteFile 22 | .. autoclass:: ExternalCommand 23 | """ 24 | # pyformat: enable 25 | 26 | import abc 27 | import subprocess 28 | from typing import Union, Sequence, Optional 29 | 30 | import attr 31 | 32 | from refex.python import matcher 33 | 34 | 35 | @attr.s(frozen=True) 36 | class RewriteFile(matcher.Matcher): 37 | """Base class for whole-file rewrites.""" 38 | _metavariable_prefix = attr.ib(type=str) 39 | 40 | def _match(self, context, candidate): 41 | rewritten = self.rewrite(context, candidate) 42 | if rewritten is None: 43 | return None 44 | else: 45 | return matcher.MatchInfo.from_diff( 46 | self._metavariable_prefix, 47 | context.parsed_file.text, 48 | rewritten, 49 | match=matcher.create_match(context.parsed_file, candidate)) 50 | 51 | type_filter = None 52 | 53 | @abc.abstractmethod 54 | def rewrite(self, context: matcher.PythonParsedFile, 55 | candidate) -> Optional[str]: 56 | pass 57 | 58 | 59 | @attr.s(frozen=True) 60 | class ExternalCommand(RewriteFile): 61 | """Runs an external command to modify a file.""" 62 | 63 | #: The command to run, which takes the input as stdin, and returns the 64 | #: replacement by printing to stdout. 65 | _command = attr.ib(type=Union[str, Sequence[str]]) 66 | 67 | #: Whether to run via the shell. Unsafe. 68 | _shell = attr.ib(type=bool, default=False) 69 | 70 | def rewrite(self, context, candidate): 71 | out = subprocess.run( 72 | self._command, 73 | check=True, 74 | stdout=subprocess.PIPE, 75 | input=context.parsed_file.text, 76 | encoding='utf-8', 77 | shell=self._shell, 78 | ) 79 | return out.stdout 80 | -------------------------------------------------------------------------------- /refex/python/matchers/lexical_matchers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # pyformat: disable 15 | """ 16 | :mod:`~refex.python.matchers.lexical_matchers` 17 | ---------------------------------------------- 18 | 19 | :mod:`~refex.python.matchers.lexical_matchers` provides lexical tweaks and 20 | filters on lexical matches. 21 | 22 | .. autoclass:: HasComments 23 | .. autoclass:: NoComments 24 | 25 | """ 26 | # pyformat: enable 27 | 28 | from __future__ import absolute_import 29 | from __future__ import division 30 | from __future__ import print_function 31 | 32 | import tokenize 33 | 34 | import attr 35 | import cached_property 36 | 37 | from refex.python import matcher 38 | 39 | 40 | @matcher.safe_to_eval 41 | @attr.s(frozen=True) 42 | class HasComments(matcher.Matcher): 43 | """Filter results to only those lexical spans that have comments inside. 44 | 45 | Args: 46 | submatcher: A Matcher matching a LexicalMatch. 47 | """ 48 | _submatcher = matcher.submatcher_attrib() # type: matcher.Matcher 49 | 50 | def _match(self, context, candidate): 51 | result = self._submatcher.match(context, candidate) 52 | if _result_has_comments(context, self._submatcher, result): 53 | return result 54 | else: 55 | return None 56 | 57 | @cached_property.cached_property 58 | def type_filter(self): 59 | return self._submatcher.type_filter 60 | 61 | 62 | @matcher.safe_to_eval 63 | @attr.s(frozen=True) 64 | class NoComments(matcher.Matcher): 65 | """Filter results to only those lexical spans that have no comments inside. 66 | 67 | Args: 68 | submatcher: A Matcher matching a LexicalMatch. 69 | """ 70 | _submatcher = matcher.submatcher_attrib() # type: matcher.Matcher 71 | 72 | def _match(self, context, candidate): 73 | result = self._submatcher.match(context, candidate) 74 | if _result_has_comments(context, self._submatcher, result): 75 | return None 76 | else: 77 | return result 78 | 79 | @cached_property.cached_property 80 | def type_filter(self): 81 | return self._submatcher.type_filter 82 | 83 | 84 | # TODO(b/64560910): Yield all the comments so that matchers can operate on them 85 | # and check what they contain. 86 | def _result_has_comments(context, m, result): 87 | """Returns whether or not there are spans in a result.""" 88 | if result is None: 89 | # Doesn't actually matter either way -- anyone checking will return either 90 | # result, or None, and here both are None. But if we ask the question, 91 | # "does there exist at least one comment node we can find here?" the answer 92 | # is no. 93 | return False 94 | if not isinstance(result.match, matcher.LexicalMatch): 95 | raise TypeError('Expected a LexicalMatch from matcher (%r), got: %r' % 96 | (m, result)) 97 | 98 | first_token = result.match.first_token 99 | last_token = result.match.last_token 100 | for token in context.parsed_file.ast_tokens.token_range( 101 | first_token, last_token, include_extra=True): 102 | if token.type == tokenize.COMMENT: 103 | return True 104 | return False 105 | -------------------------------------------------------------------------------- /refex/python/matchers/test_ast_matchers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.python.matchers.ast_matchers.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | import unittest 21 | 22 | from absl.testing import absltest 23 | from absl.testing import parameterized 24 | 25 | from refex import match 26 | from refex.python import matcher 27 | from refex.python.matchers import ast_matchers 28 | from refex.python.matchers import base_matchers 29 | 30 | 31 | def expression(e): 32 | parsed = matcher.parse_ast(e, '') 33 | return parsed, parsed.tree.body[0].value 34 | 35 | 36 | class RawAstTest(absltest.TestCase): 37 | 38 | def test_type_only(self): 39 | parsed, e = expression('~a') 40 | self.assertEqual( 41 | ast_matchers.UnaryOp().match(matcher.MatchContext(parsed), e), 42 | matcher.MatchInfo( 43 | matcher.LexicalASTMatch(e, parsed.text, e.first_token, 44 | e.last_token))) 45 | 46 | def test_explicit_anything(self): 47 | parsed, e = expression('~a') 48 | self.assertEqual( 49 | ast_matchers.UnaryOp( 50 | op=base_matchers.Anything(), 51 | operand=base_matchers.Anything()).match( 52 | matcher.MatchContext(parsed), e), 53 | matcher.MatchInfo( 54 | matcher.LexicalASTMatch(e, parsed.text, e.first_token, 55 | e.last_token))) 56 | 57 | def test_fully_specified_matcher(self): 58 | parsed, e = expression('~a') 59 | self.assertEqual( 60 | ast_matchers.UnaryOp( 61 | op=ast_matchers.Invert(), 62 | operand=ast_matchers.Name(ctx=ast_matchers.Load())).match( 63 | matcher.MatchContext(parsed), e), 64 | matcher.MatchInfo( 65 | matcher.LexicalASTMatch(e, parsed.text, e.first_token, 66 | e.last_token))) 67 | 68 | def test_type_mismatch(self): 69 | parsed, e = expression('a + b') 70 | self.assertIsNone(ast_matchers.UnaryOp().match( 71 | matcher.MatchContext(parsed), e)) 72 | 73 | def test_submatcher_fail(self): 74 | parsed, e = expression('~a') 75 | self.assertIsNone( 76 | ast_matchers.UnaryOp( 77 | op=base_matchers.Unless(base_matchers.Anything())).match( 78 | matcher.MatchContext(parsed), e)) 79 | 80 | def test_ancestor(self): 81 | """The matcher won't traverse into child nodes.""" 82 | parsed = matcher.parse_ast('~a', '') 83 | self.assertIsNone( 84 | ast_matchers.UnaryOp( 85 | op=base_matchers.Unless(base_matchers.Anything())).match( 86 | matcher.MatchContext(parsed), parsed.tree.body[0])) 87 | 88 | def test_non_lexical_node(self): 89 | """The matcher doesn't return lexical data for non-lexical AST nodes.""" 90 | parsed, binop = expression('a + b') 91 | add = binop.op 92 | self.assertEqual( 93 | ast_matchers.Add().match(matcher.MatchContext(parsed), add), 94 | matcher.MatchInfo(match.ObjectMatch(add))) 95 | 96 | def test_positional_arguments(self): 97 | """Positional arguments are reserved for later use. 98 | 99 | Clang AST matchers use them as an implicit forAll, for example. This seems 100 | useful. But the default used by attrs is to define all the fields as 101 | positional arguments as well, and this is borderline useless -- nobody is 102 | going to remember what the order of the fields is. So it is forbidden, to 103 | ensure nobody relies on it. People might otherwise be tempted by e.g. Num, 104 | which has only one parameter. (Num(3) is readable, but still banned.) 105 | """ 106 | with self.assertRaises(TypeError): 107 | ast_matchers.Num(3) # n=3 is fine though. 108 | 109 | with self.assertRaises(TypeError): 110 | ast_matchers.Constant(3) # value=3 is fine though. 111 | 112 | 113 | class ConstantTest(parameterized.TestCase): 114 | """In Python 3.8, the AST hierarchy for constants was changed dramatically. 115 | 116 | To preserve compatibility with <3.8, we implement compatibility shims that 117 | reflect the old API. They're also potentially just plain handy. 118 | """ 119 | 120 | @parameterized.parameters( 121 | ast_matchers.Num(n=0), ast_matchers.Num(n=0.0), ast_matchers.Num(n=0j), 122 | ast_matchers.Num()) 123 | def test_num(self, num_matcher): 124 | for s in '0', '0.0', '0j': 125 | with self.subTest(s=s): 126 | parsed = matcher.parse_ast(s, '') 127 | self.assertIsNotNone( 128 | num_matcher.match( 129 | matcher.MatchContext(parsed), parsed.tree.body[0].value)) 130 | 131 | @parameterized.parameters('"string"', 'b"bytes"', 'True', 'None') 132 | def test_num_non_number(self, non_number): 133 | parsed = matcher.parse_ast(non_number, '') 134 | self.assertIsNone(ast_matchers.Num().match( 135 | matcher.MatchContext(parsed), parsed.tree.body[0].value)) 136 | 137 | @parameterized.parameters(({'s': b''},), ({},)) 138 | def test_bytes(self, kwargs): 139 | bytes_matcher = ast_matchers.Bytes(**kwargs) # hack for py2 140 | parsed = matcher.parse_ast('b""', '') 141 | self.assertIsNotNone( 142 | bytes_matcher.match( 143 | matcher.MatchContext(parsed), parsed.tree.body[0].value)) 144 | 145 | def test_bytes_non_bytes(self): 146 | parsed = matcher.parse_ast('"string"', '') 147 | self.assertIsNone(ast_matchers.Bytes().match( 148 | matcher.MatchContext(parsed), parsed.tree.body[0].value)) 149 | 150 | @parameterized.parameters(ast_matchers.Str(s=''), ast_matchers.Str()) 151 | def test_string(self, str_matcher): 152 | parsed = matcher.parse_ast('""', '') 153 | self.assertIsNotNone( 154 | str_matcher.match( 155 | matcher.MatchContext(parsed), parsed.tree.body[0].value)) 156 | 157 | def test_string_non_string(self): 158 | parsed = matcher.parse_ast('2', '') 159 | self.assertIsNone(ast_matchers.Str().match( 160 | matcher.MatchContext(parsed), parsed.tree.body[0].value)) 161 | 162 | def test_ellipsis(self): 163 | parsed = matcher.parse_ast('...', '') 164 | self.assertIsNotNone(ast_matchers.Ellipsis().match( 165 | matcher.MatchContext(parsed), parsed.tree.body[0].value)) 166 | 167 | def test_ellipsis_non_ellipsis(self): 168 | parsed = matcher.parse_ast('1', '') 169 | self.assertIsNone(ast_matchers.Ellipsis().match( 170 | matcher.MatchContext(parsed), parsed.tree.body[0].value)) 171 | 172 | @parameterized.parameters(True, False, None) 173 | def test_named_constant(self, constant): 174 | parsed = matcher.parse_ast(str(constant), '') 175 | for m in ast_matchers.NameConstant(), ast_matchers.NameConstant( 176 | value=constant): 177 | with self.subTest(matcher=m): 178 | self.assertIsNotNone( 179 | m.match(matcher.MatchContext(parsed), parsed.tree.body[0].value)) 180 | 181 | def test_named_constant_non_named_constant(self): 182 | parsed = matcher.parse_ast('1', '') 183 | self.assertIsNone(ast_matchers.NameConstant().match( 184 | matcher.MatchContext(parsed), parsed.tree.body[0].value)) 185 | 186 | 187 | if __name__ == '__main__': 188 | absltest.main() 189 | -------------------------------------------------------------------------------- /refex/python/matchers/test_extern_matchers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.matchers.extern_matchers.""" 15 | 16 | from absl.testing import absltest 17 | 18 | from refex import search 19 | from refex.python import matcher_test_util 20 | from refex.python.matchers import extern_matchers 21 | 22 | 23 | def _rewrite(m, code): 24 | return search.rewrite_string( 25 | search.PyMatcherRewritingSearcher.from_matcher(m, {}), code, 'example.py') 26 | 27 | 28 | class RewriteFileTest(matcher_test_util.MatcherTestCase): 29 | 30 | def test_rewrite_fail(self): 31 | 32 | class Fail(extern_matchers.RewriteFile): 33 | 34 | def rewrite(self, context, candidate): 35 | return None 36 | 37 | self.assertEqual(_rewrite(Fail('metavar'), 'old'), 'old') 38 | 39 | def test_rewrite_succeed(self): 40 | 41 | class Succeed(extern_matchers.RewriteFile): 42 | 43 | def rewrite(self, context, candidate): 44 | return 'new' 45 | 46 | self.assertEqual(_rewrite(Succeed('metavar'), 'old'), 'new') 47 | 48 | 49 | class ExternalCommandTest(matcher_test_util.MatcherTestCase): 50 | 51 | def test_replace_noop(self): 52 | code = '1\n2\n' 53 | self.assertEqual( 54 | _rewrite(extern_matchers.ExternalCommand('cat', ['cat']), code), code) 55 | 56 | def test_replace(self): 57 | code = '1\n2\n' 58 | self.assertEqual( 59 | _rewrite( 60 | extern_matchers.ExternalCommand('echo', ['echo', 'hello']), code), 61 | 'hello\n') 62 | 63 | def test_shell_true(self): 64 | code = '1\n2\n' 65 | self.assertEqual( 66 | _rewrite( 67 | extern_matchers.ExternalCommand('echo', 'echo hello', shell=True), 68 | code), 'hello\n') 69 | 70 | 71 | if __name__ == '__main__': 72 | absltest.main() 73 | -------------------------------------------------------------------------------- /refex/python/matchers/test_lexical_matchers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.python.matchers.lexical_matchers.""" 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from absl.testing import absltest 21 | 22 | from refex.python import matcher_test_util 23 | from refex.python.matchers import ast_matchers 24 | from refex.python.matchers import lexical_matchers 25 | from refex.python.matchers import syntax_matchers 26 | 27 | 28 | class NoCommentsTest(matcher_test_util.MatcherTestCase): 29 | _comments_source = '(a # comment\n + b)' 30 | _nocomments_source = '(a + b)' 31 | _including_comments_matcher = syntax_matchers.StmtPattern('a + b') 32 | _requiring_comments_matcher = lexical_matchers.HasComments( 33 | _including_comments_matcher) 34 | _banning_comments_matcher = lexical_matchers.NoComments( 35 | _including_comments_matcher) 36 | 37 | def test_outside_comment_irrelevant(self): 38 | for prefix in ['', '# earlier comment\n']: 39 | for suffix in ['', ' # trailing comment']: 40 | source_code = prefix + self._nocomments_source + suffix 41 | for m in [ 42 | self._including_comments_matcher, self._requiring_comments_matcher, 43 | self._banning_comments_matcher 44 | ]: 45 | with self.subTest(source_code=source_code, matcher=m): 46 | self.assertEqual( 47 | self.get_all_match_strings(m, source_code), 48 | self.get_all_match_strings(m, self._nocomments_source)) 49 | 50 | def test_interior_comments(self): 51 | for m in [ 52 | self._including_comments_matcher, self._requiring_comments_matcher 53 | ]: 54 | with self.subTest(matcher=m): 55 | self.assertEqual( 56 | self.get_all_match_strings(m, self._comments_source), 57 | [self._comments_source]) 58 | for m in [self._banning_comments_matcher]: 59 | with self.subTest(matcher=m): 60 | self.assertEqual( 61 | self.get_all_match_strings(m, self._comments_source), []) 62 | 63 | def test_no_interior_comments(self): 64 | for m in [self._requiring_comments_matcher]: 65 | with self.subTest(matcher=m): 66 | self.assertEqual( 67 | self.get_all_match_strings(m, self._nocomments_source), []) 68 | for m in [self._including_comments_matcher, self._banning_comments_matcher]: 69 | with self.subTest(matcher=m): 70 | self.assertEqual( 71 | self.get_all_match_strings(m, self._nocomments_source), 72 | [self._nocomments_source]) 73 | 74 | def test_incorrect_match_type(self): 75 | nonlexical_matcher = ast_matchers.Add() 76 | for m in [ 77 | lexical_matchers.NoComments(nonlexical_matcher), 78 | lexical_matchers.HasComments(nonlexical_matcher) 79 | ]: 80 | with self.subTest(matcher=m): 81 | with self.assertRaises(TypeError): 82 | self.get_all_match_strings(m, 'a + b') 83 | 84 | 85 | if __name__ == '__main__': 86 | absltest.main() 87 | -------------------------------------------------------------------------------- /refex/python/python_pattern.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Parsing of Python patterns containing metavariables. 15 | 16 | For example: "a = $b" is a pattern for an assignment statement, where the target 17 | is a wildcard named "b". 18 | """ 19 | 20 | import io 21 | import re 22 | import token 23 | import tokenize 24 | 25 | 26 | _VARIABLE_REGEX = re.compile(r'\A[a-zA-Z_][a-zA-Z0-9_]*\Z') 27 | 28 | 29 | def token_pattern(pattern): 30 | """Tokenizes a source pattern containing metavariables like "$foo". 31 | 32 | Args: 33 | pattern: A Python source pattern. 34 | 35 | Returns: 36 | (tokenized, metavar_indices). 37 | tokenized: 38 | A list of source tokens, omitting the metavariable marker ($). 39 | metavar_indices: 40 | A set of token indexes. tokenized[i] is a metavariable token if and only 41 | if i is in metavar_indices. 42 | 43 | Raises: 44 | SyntaxError: The pattern can't be parsed. 45 | """ 46 | # Work around Python 3.6.7's newline requirement. See b/118359498. 47 | if pattern.endswith('\n'): 48 | added_newline = False 49 | else: 50 | added_newline = True 51 | pattern += '\n' 52 | 53 | try: 54 | tokens = list(tokenize.generate_tokens(io.StringIO(pattern).readline)) 55 | except tokenize.TokenError as e: 56 | raise SyntaxError("Couldn't tokenize %r: %s" % (pattern, e)) from e 57 | 58 | retokenized = [] 59 | metavar_indices = set() 60 | 61 | tokens_it = iter(tokens) 62 | for tok in tokens_it: 63 | if tok.string != '$': 64 | # Just a note: in the presence of errors, even whitespace gets added as 65 | # error tokens, so we're including that here on purpose. 66 | retokenized.append(tok) 67 | else: 68 | assert tok.type in (token.ERRORTOKEN, token.OP) 69 | try: 70 | variable_token = next(tokens_it) 71 | except StopIteration: 72 | # This should never happen, because we get an ENDMARKER token. 73 | # But who knows, the token stream may change in the future. 74 | raise SyntaxError('Expected variable after $, got EOF') 75 | variable = variable_token.string 76 | if not _VARIABLE_REGEX.match(variable): 77 | raise SyntaxError( 78 | "Expected variable after $, but next token (%r) didn't match %s" % 79 | (variable, _VARIABLE_REGEX.pattern)) 80 | 81 | start_row, start_col = variable_token.start 82 | # untokenize() uses the gap between the end_col of the last token and the 83 | # start_col of this token to decide how many spaces to put -- there is no 84 | # "space token". As a result, if we do nothing, the place where the "$" 85 | # was will become a space. This is usually fine, but causes phantom 86 | # indents and syntax errors if the $ was the first character on the line. 87 | # e.g. it could not even parse the simple expression "$foo" 88 | # To avoid this, we must remove 1 from start_col to make up for it. 89 | if tok.start[1] != start_col - 1: 90 | # newlines get a NL token, so we only need to worry about columns. 91 | raise SyntaxError('No spaces allowed between $ and variable name: %r' % 92 | pattern) 93 | metavar_indices.add(len(retokenized)) 94 | retokenized.append(variable_token._replace( 95 | start=(start_row, start_col - 1))) 96 | 97 | # Undo damage required to work around Python 3.6.7's newline requirement 98 | # See b/118359498 for details. 99 | if added_newline and len(retokenized) >= 2 and retokenized[-2][1] == '\n': 100 | del retokenized[-2] 101 | return retokenized, metavar_indices 102 | -------------------------------------------------------------------------------- /refex/python/semiliteral_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Evaluates simple literal data and limited function calls. 15 | 16 | This provides an Eval() function that's similar in concept to ast.literal_eval. 17 | Like ast.literal_eval, it can parse its input of literal data such as string, 18 | numbers, lists, etc. Unlike ast.literal_eval, Eval also permits the use of 19 | function calls, as long as the callable is present in the provided dict. 20 | 21 | This is intended to replace uses of the standard library's eval() in places 22 | where it is too powerful for the intended use case. 23 | """ 24 | # NOTE: this file is a vendored copy of semiliteral_eval from Google's internal 25 | # source code. 26 | # TODO: open-source semiliteral_eval properly, and delete this copy. 27 | import ast 28 | 29 | 30 | def _HasStarArgs(node): 31 | """Returns True if the callable node has *args or **kwargs.""" 32 | try: 33 | # Python 2. 34 | return node.starargs or node.kwargs 35 | except AttributeError: 36 | # Python 3. 37 | return (any(isinstance(arg, ast.Starred) for arg in node.args) or 38 | any(kw.arg is None for kw in node.keywords)) 39 | 40 | 41 | def Eval(s, callables=None, constants=None): 42 | """Evaluates Python strings with literals and provided callables/constants. 43 | 44 | Like ast.literal_eval, this parses its input of literal data for strings, 45 | bytes, numbers, lists, tuples, dictionaries, the constant names True, False, 46 | and None. It also supports set literals. 47 | 48 | Most importantly, this supports a dict of safe callables. A callable is 49 | restricted to be a dotted name in s, and only present in callable position. 50 | Its value must be bound in the 'callables' dictionary. 51 | 52 | Args: 53 | s: a string 54 | callables: an optional dictionary mapping a dotted name to a function. For 55 | example, the dictionary {'set': set} will allow the evaluator to call the 56 | 'set' function where it occurs in s. If you use this, we recommend you 57 | explicitly pass it as a keyword argument for readability and to avoid 58 | confusion with 'constants'. If not provided, defaults to {}. 59 | constants: an optional dictionary mapping names to constant values. If you 60 | use this, we recommend you explicitly pass it as a keyword argument for 61 | readability and to avoid confusion with 'callables'. If not provided, 62 | defaults to `{}` 63 | 64 | Returns: 65 | The evaluation of s. 66 | 67 | Raises: 68 | SyntaxError: Occurs if s does not look like valid Python literal syntax or 69 | if it refers to an unknown constant or callable. 70 | """ 71 | if callables is None: 72 | callables = {} 73 | if constants is None: 74 | constants = {} 75 | assert isinstance(callables, dict) 76 | assert isinstance(constants, dict) 77 | 78 | node = ast.parse(s, mode='eval') 79 | if isinstance(node, ast.Expression): 80 | node = node.body 81 | 82 | def _Convert(node): 83 | """Convert the literal data in the node.""" 84 | if hasattr(ast, 'Constant'): 85 | if isinstance(node, ast.Constant): 86 | return node.value 87 | else: 88 | if isinstance( 89 | node, (ast.Str, ast.Bytes if hasattr(ast, 'Bytes') else ()) 90 | ): 91 | return node.s 92 | if isinstance(node, ast.Num): 93 | return node.n 94 | if hasattr(ast, 'NameConstant') and isinstance(node, ast.NameConstant): 95 | # True/False/None on Python 3 < 3.12 96 | return node.value 97 | if isinstance(node, ast.UnaryOp): 98 | if isinstance(node.op, ast.USub) and isinstance(node.operand, ast.Num): 99 | return 0 - _Convert(node.operand) 100 | if isinstance(node, ast.Tuple): 101 | return tuple([_Convert(elt) for elt in node.elts]) 102 | if isinstance(node, ast.List): 103 | return [_Convert(elt) for elt in node.elts] 104 | if isinstance(node, ast.Dict): 105 | return {_Convert(k): _Convert(v) for k, v in zip(node.keys, node.values)} 106 | if isinstance(node, ast.Set): 107 | return {_Convert(elt) for elt in node.elts} 108 | # The following case supports calls to callable functions, supporting 109 | # positional and named arguments, but not *args or **kwargs: 110 | if isinstance(node, ast.Call) and not _HasStarArgs(node): 111 | callable_name = _GetDottedName(node.func) 112 | if callable_name is None: 113 | raise SyntaxError('malformed string: %r' % s) 114 | if callable_name not in callables: 115 | raise SyntaxError('unknown callable: %r' % callable_name) 116 | return callables[callable_name]( 117 | *[_Convert(arg) for arg in node.args], 118 | **{kw.arg: _Convert(kw.value) 119 | for kw in node.keywords}) 120 | # Try and see if it's a dotted-name constant. 121 | name = _GetDottedName(node) 122 | if name is not None: 123 | if name in constants: 124 | return constants[name] 125 | raise SyntaxError('unknown constant: %s' % name) 126 | 127 | raise SyntaxError('malformed string: %r' % s) 128 | 129 | def _GetDottedName(node): 130 | """Get the dotted name in the node.""" 131 | if isinstance(node, ast.Name): 132 | return node.id 133 | if isinstance(node, ast.Attribute): 134 | lhs = _GetDottedName(node.value) 135 | return lhs + '.' + node.attr 136 | return None 137 | 138 | return _Convert(node) 139 | -------------------------------------------------------------------------------- /refex/python/test_error_strings.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.python.error_strings.""" 15 | 16 | 17 | import ast 18 | import textwrap 19 | 20 | from absl.testing import absltest 21 | 22 | from refex.python import error_strings 23 | 24 | 25 | def _user_syntax_error_string(source_code): 26 | try: 27 | ast.parse(source_code) 28 | except SyntaxError as e: 29 | return error_strings.user_syntax_error(e, source_code) 30 | else: 31 | raise AssertionError("Didn't fail to parse: %s" % source_code) 32 | 33 | 34 | class UserSyntaxErrorTest(absltest.TestCase): 35 | 36 | def test_long(self): 37 | self.assertEqual( 38 | _user_syntax_error_string('xyz\na b'), 39 | textwrap.dedent("""\ 40 | Failed to parse Python-like source code (invalid syntax). 41 | 42 | Source: 43 | xyz 44 | a b 45 | 46 | Location: 47 | a b 48 | ^""")) 49 | 50 | def test_short(self): 51 | self.assertEqual( 52 | _user_syntax_error_string('a b'), 53 | textwrap.dedent("""\ 54 | Failed to parse Python-like source code (invalid syntax). 55 | a b 56 | ^""")) 57 | 58 | def test_synthetic_error_long(self): 59 | """User-synthesized SyntaxErrors still give nice output.""" 60 | self.assertEqual( 61 | error_strings.user_syntax_error(SyntaxError('message'), 'a\nb'), 62 | textwrap.dedent("""\ 63 | Failed to parse Python-like source code (message). 64 | a 65 | b""")) 66 | 67 | def test_synthetic_error_short(self): 68 | self.assertEqual( 69 | error_strings.user_syntax_error(SyntaxError('message'), 'a b'), 70 | textwrap.dedent("""\ 71 | Failed to parse Python-like source code (message). 72 | a b""")) 73 | 74 | def test_synthetic_error_no_msg(self): 75 | self.assertEqual( 76 | error_strings.user_syntax_error(SyntaxError(''), 'a b'), 77 | textwrap.dedent("""\ 78 | Failed to parse Python-like source code (). 79 | a b""")) 80 | 81 | 82 | if __name__ == '__main__': 83 | absltest.main() 84 | -------------------------------------------------------------------------------- /refex/python/test_evaluate.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.py.evaluate.""" 15 | 16 | 17 | from absl.testing import absltest 18 | 19 | from refex.python import evaluate 20 | from refex.python.matchers import ast_matchers 21 | from refex.python.matchers import base_matchers 22 | from refex.python.matchers import syntax_matchers 23 | 24 | 25 | class EvaluateTest(absltest.TestCase): 26 | 27 | def test_base_matchers(self): 28 | for expr in ['base_matchers.Anything()', 'Anything()']: 29 | with self.subTest(expr=expr): 30 | self.assertEqual( 31 | evaluate.compile_matcher(expr), base_matchers.Anything()) 32 | 33 | def test_ast_matchers(self): 34 | for expr in ['ast_matchers.Name()', 'Name()']: 35 | with self.subTest(expr=expr): 36 | self.assertEqual(evaluate.compile_matcher(expr), ast_matchers.Name()) 37 | 38 | def test_syntax_matchers(self): 39 | for expr in ["syntax_matchers.ExprPattern('$bar')", "ExprPattern('$bar')"]: 40 | with self.subTest(expr=expr): 41 | self.assertEqual( 42 | evaluate.compile_matcher(expr), syntax_matchers.ExprPattern('$bar')) 43 | 44 | def test_whitespace(self): 45 | """Whitespace should be ignored to let people pretty-print their inputs.""" 46 | self.assertEqual( 47 | evaluate.compile_matcher(""" 48 | _ 49 | """), base_matchers.Anything()) 50 | 51 | 52 | if __name__ == '__main__': 53 | absltest.main() 54 | -------------------------------------------------------------------------------- /refex/python/test_python_pattern.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.python.python_pattern.""" 15 | 16 | 17 | import tokenize 18 | 19 | from absl.testing import absltest 20 | from absl.testing import parameterized 21 | 22 | from refex.python import python_pattern 23 | 24 | 25 | class PythonPatternTest(parameterized.TestCase): 26 | 27 | @parameterized.parameters('', 'x', 'x y') 28 | def test_simple_nonpattern(self, pattern): 29 | tokenized, _ = python_pattern.token_pattern(pattern) 30 | self.assertEqual(tokenize.untokenize(tokenized), pattern) 31 | 32 | @parameterized.parameters('$x', 'foo + $x', 'import $x', '$x "$y"', '$x = 0') 33 | def test_simple_pattern(self, pattern): 34 | tokenized, [metavar_i] = python_pattern.token_pattern(pattern) 35 | # token text is 'x' -- that's the only variable in the pattern. 36 | self.assertEqual(tokenized[metavar_i][1], 'x') 37 | # it round trips to the same string except $x -> x 38 | self.assertEqual(tokenize.untokenize(tokenized), pattern.replace('$x', 'x')) 39 | 40 | @parameterized.parameters('$1', '$', '$\n', '$[', '$""', '$ x', '$\nx') 41 | def test_syntax_error(self, pattern): 42 | with self.assertRaises(SyntaxError): 43 | python_pattern.token_pattern(pattern) 44 | 45 | 46 | if __name__ == '__main__': 47 | absltest.main() 48 | -------------------------------------------------------------------------------- /refex/python/test_semiliteral_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from absl.testing import absltest 16 | 17 | from refex.python import semiliteral_eval 18 | 19 | # Create alias for easier tests. 20 | Eval = semiliteral_eval.Eval # pylint: disable=invalid-name 21 | 22 | 23 | class SemiliteralEvalTest(absltest.TestCase): 24 | 25 | def testString(self): 26 | self.assertEqual(Eval('"hello world"'), 'hello world') 27 | self.assertEqual(Eval('\'hello world\''), 'hello world') 28 | 29 | def testBytes(self): 30 | self.assertEqual(Eval('b"hello world"'), b'hello world') 31 | self.assertEqual(Eval('b\'hello world\''), b'hello world') 32 | 33 | def testNumber(self): 34 | self.assertEqual(Eval('42'), 42) 35 | self.assertEqual(Eval('-42'), -42) 36 | 37 | def testTuple(self): 38 | self.assertEqual(Eval('(3, "four")'), (3, 'four')) 39 | self.assertEqual(Eval('()'), ()) 40 | 41 | def testList(self): 42 | self.assertEqual(Eval('[1, 2, 3]'), [1, 2, 3]) 43 | self.assertEqual(Eval('[[]]'), [[]]) 44 | 45 | def testDict(self): 46 | self.assertEqual(Eval('{"x":16}'), {'x': 16}) 47 | self.assertEqual(Eval('{"x":{"y": "z"}}'), {'x': {'y': 'z'}}) 48 | self.assertEqual(Eval('{}'), {}) 49 | 50 | def testSet(self): 51 | self.assertEqual(Eval('{3, 4, 5}'), {3, 4, 5}) 52 | self.assertEqual(Eval('{"unity"}'), {'unity'}) 53 | 54 | def testConstant(self): 55 | self.assertEqual(Eval('True'), True) 56 | self.assertEqual(Eval('False'), False) 57 | self.assertEqual(Eval('None'), None) 58 | 59 | self.assertRaises(SyntaxError, lambda: Eval('true')) 60 | self.assertEqual(Eval('true', constants={'true': True}), True) 61 | 62 | def testCallable(self): 63 | self.assertEqual( 64 | Eval( 65 | '[1, "two", set([3])]', callables={'set': set}), 66 | [1, 'two', set([3])]) 67 | 68 | self.assertEqual( 69 | Eval( 70 | 'cons(42, (cons(43, None)))', 71 | callables={'cons': lambda x, y: [x, y]}), [42, [43, None]]) 72 | 73 | self.assertEqual( 74 | Eval( 75 | 'cons(y=42, x=(cons(y=43, x=None)))', 76 | callables={'cons': lambda x, y: [x, y]}), [[None, 43], 42]) 77 | 78 | def testDottedNamesInCallable(self): 79 | # Dotted names are allowed in callable position, but must be explicitly 80 | # listed as a fully-qualified name in the callables dictionary. This is to 81 | # curtail arbitrary attribute lookup. 82 | self.assertEqual( 83 | Eval( 84 | 'a.B(42)', callables={'a.B': 'result is {}'.format}), 85 | 'result is 42') 86 | 87 | def testDottedNamesInConstant(self): 88 | # Dotted names are allowed outside of callable position, but must be 89 | # explicitly listed as a fully-qualified name in the constants dictionary, 90 | # to curtail arbitrary attribute lookup. 91 | self.assertEqual(Eval('foo.BAR', constants={'foo.BAR': 42}), 42) 92 | 93 | def testUnknownCallables(self): 94 | self.assertRaises(SyntaxError, lambda: Eval('[1, "two", set([3])]')) 95 | self.assertRaises(SyntaxError, lambda: Eval('[1, "two", True()]')) 96 | 97 | def testCallablesOnlyInCallablePosition(self): 98 | self.assertRaises(SyntaxError, lambda: Eval('set', callables={'set': set})) 99 | 100 | def testLambdaIsSyntaxError(self): 101 | # https://mail.python.org/pipermail/tutor/2004-December/033828.html 102 | infinite_loop = '(lambda l: l(l)) (lambda l: l(l))' 103 | self.assertRaises(SyntaxError, lambda: Eval(infinite_loop)) 104 | 105 | 106 | if __name__ == '__main__': 107 | absltest.main() 108 | -------------------------------------------------------------------------------- /refex/python/test_syntactic_template.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.python.syntactic_template.""" 15 | 16 | 17 | from absl.testing import absltest 18 | from absl.testing import parameterized 19 | 20 | from refex import formatting 21 | from refex import match 22 | from refex.python import matcher 23 | from refex.python import syntactic_template 24 | from refex.python.matchers import ast_matchers 25 | from refex.python.matchers import base_matchers 26 | 27 | 28 | class LexicalTemplateTest(parameterized.TestCase): 29 | 30 | @parameterized.parameters('', 'y', '""', '#') 31 | def test_substitute(self, replacement): 32 | """Tests substitutions, as either the template or a substituted-in variable.""" 33 | for template in ['$x', replacement]: 34 | with self.subTest(template=template): 35 | replaced = syntactic_template._LexicalTemplate(template).substitute( 36 | {'x': replacement}) 37 | self.assertIsInstance(replaced, str) 38 | self.assertEqual(replaced, replacement) 39 | 40 | @parameterized.parameters('$ $ $', '\t', ' ') 41 | def test_substitute_nontemplate(self, replacement): 42 | """Tests substitution which don't work as templates.""" 43 | replaced = syntactic_template._LexicalTemplate('$x').substitute( 44 | {'x': replacement}) 45 | self.assertIsInstance(replaced, str) 46 | self.assertEqual(replaced, replacement) 47 | 48 | def test_missing_parameter(self): 49 | with self.assertRaises(KeyError): 50 | syntactic_template._LexicalTemplate('$x').substitute({}) 51 | 52 | def test_extra_parameter(self): 53 | self.assertEqual( 54 | syntactic_template._LexicalTemplate('$x').substitute({ 55 | 'x': 'v1', 56 | 'y': 'v2' 57 | }), 'v1') 58 | 59 | @parameterized.parameters('"$y"', '# y') 60 | def test_substitute_nonprogram(self, template): 61 | """Substitution doesn't affect the contents of strings or comments.""" 62 | self.assertNotIn( 63 | 'BAD', 64 | syntactic_template._LexicalTemplate(template).substitute({ 65 | 'y': 'BAD', 66 | })) 67 | 68 | def test_x_eq_x(self): 69 | self.assertEqual( 70 | syntactic_template._LexicalTemplate('$x = $x').substitute({'x': '(a)'}), 71 | '(a) = (a)', str(syntactic_template._LexicalTemplate('$x = $x'))) 72 | 73 | 74 | class PythonTemplateTest(parameterized.TestCase): 75 | 76 | @parameterized.parameters('f("$x")', 'f("$current_expr")') 77 | def test_nonpython_dollars_source(self, src): 78 | parsed = matcher.parse_ast(src) 79 | m = base_matchers.Bind('bound', ast_matchers.Call()) 80 | [matchinfo] = matcher.find_iter(m, parsed) 81 | self.assertEqual( 82 | src, 83 | syntactic_template.PythonExprTemplate('$bound').substitute_match( 84 | parsed, matchinfo.match, {'bound': matchinfo.match})) 85 | 86 | def test_nonpython_dollars_dest(self): 87 | src = 'f' 88 | parsed = matcher.parse_ast(src) 89 | m = base_matchers.Bind('bound', ast_matchers.Name()) 90 | [matchinfo] = matcher.find_iter(m, parsed) 91 | self.assertEqual( 92 | 'f("$x")', 93 | syntactic_template.PythonExprTemplate('$bound("$x")').substitute_match( 94 | parsed, matchinfo.match, {'bound': matchinfo.match})) 95 | 96 | @parameterized.parameters(('x', set()), ('$a + $b', {'a', 'b'})) 97 | def test_variables(self, template, expected_variables): 98 | self.assertEqual( 99 | syntactic_template.PythonExprTemplate(template).variables, 100 | expected_variables) 101 | 102 | def test_empty_expr(self): 103 | with self.assertRaises(ValueError): 104 | syntactic_template.PythonExprTemplate('') 105 | 106 | def test_empty_stmt(self): 107 | with self.assertRaises(ValueError): 108 | syntactic_template.PythonStmtTemplate('') 109 | 110 | @parameterized.parameters('', 'a; b') 111 | def test_nonsingular_py_ok(self, template): 112 | """Tests non-singular PythonTemplate in a context where it's acceptable. 113 | 114 | If it is not being placed into a context where it's expected to parse as 115 | an expression, then '' and even 'a; b' are fine. 116 | 117 | Args: 118 | template: the template for this test. 119 | """ 120 | parsed = matcher.parse_ast('x') 121 | m = base_matchers.Bind('bound', ast_matchers.Name()) 122 | [matchinfo] = matcher.find_iter(m, parsed) 123 | self.assertEqual( 124 | template, 125 | syntactic_template.PythonTemplate(template).substitute_match( 126 | parsed, matchinfo.match, {'bound': matchinfo.match})) 127 | 128 | @parameterized.parameters( 129 | syntactic_template.PythonTemplate(''), 130 | syntactic_template.PythonTemplate('a; b'), 131 | syntactic_template.PythonTemplate('pass'), 132 | syntactic_template.PythonStmtTemplate('pass')) 133 | def test_nonexpr_in_expr_context(self, template): 134 | parsed = matcher.parse_ast('[x]') 135 | m = base_matchers.Bind('bound', ast_matchers.Name()) 136 | [matchinfo] = matcher.find_iter(m, parsed) 137 | with self.assertRaises(formatting.RewriteError): 138 | template.substitute_match(parsed, matchinfo.match, 139 | {'bound': matchinfo.match}) 140 | 141 | 142 | class OpaqueReplacementTest(absltest.TestCase): 143 | """Tests automatic safeties even when the replacement value is a StringMatch. 144 | 145 | This cannot verify that the replacement parses back as itself, but can still 146 | verify that the surrounding structure is unchanged. 147 | """ 148 | 149 | def test_autoparen_inner(self): 150 | parsed = matcher.parse_ast('x') 151 | m = base_matchers.Bind('x', ast_matchers.Name()) 152 | [matchinfo] = matcher.find_iter(m, parsed) 153 | template = syntactic_template.PythonTemplate('[$x]') 154 | self.assertEqual( 155 | template.substitute_match(parsed, matchinfo.match, 156 | {'x': match.StringMatch('x, y')}), 157 | '[(x, y)]', 158 | ) 159 | 160 | def test_autoparen_outer(self): 161 | parsed = matcher.parse_ast('x * 2') 162 | m = base_matchers.Bind('x', ast_matchers.Name()) 163 | [matchinfo] = matcher.find_iter(m, parsed) 164 | template = syntactic_template.PythonTemplate('$x') 165 | self.assertEqual( 166 | template.substitute_match(parsed, matchinfo.match, 167 | {'x': match.StringMatch('x + y')}), 168 | '(x + y)', 169 | ) 170 | 171 | def test_invalid_syntax(self): 172 | parsed = matcher.parse_ast('x') 173 | m = base_matchers.Bind('x', ast_matchers.Name()) 174 | [matchinfo] = matcher.find_iter(m, parsed) 175 | template = syntactic_template.PythonTemplate('$x') 176 | with self.assertRaises(formatting.RewriteError): 177 | template.substitute_match(parsed, matchinfo.match, 178 | {'x': match.StringMatch('x y')}), 179 | 180 | 181 | class PythonStmtTemplateTest(parameterized.TestCase): 182 | 183 | @parameterized.parameters( 184 | '$x = $x', 185 | 'a, $x = $x', 186 | '(a, $x) = $x', 187 | '[a, $x] = $x', 188 | '$x.foo = $x', 189 | ) 190 | def test_assignment(self, template): 191 | template = syntactic_template.PythonStmtTemplate(template) 192 | # Test with different values of `ctx` for the variable being substituted. 193 | for variable_source in 'a = 1', 'a': 194 | with self.subTest(variable_souce=variable_source): 195 | [matchinfo] = matcher.find_iter( 196 | base_matchers.Bind('x', ast_matchers.Name()), 197 | matcher.parse_ast(variable_source)) 198 | substituted = template.substitute_match(None, None, 199 | {'x': matchinfo.match}) 200 | self.assertEqual(substituted, template.template.replace('$x', 'a')) 201 | 202 | if __name__ == '__main__': 203 | absltest.main() 204 | -------------------------------------------------------------------------------- /refex/refex_doctest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # python2 python3 16 | """Run doctests for all of refex's libraries.""" 17 | 18 | 19 | # isort: split 20 | # We put doctest after absltest so that it picks up the unittest monkeypatch. 21 | # Otherwise doctest tests aren't runnable at all with Bazel. 22 | 23 | import doctest 24 | import sys 25 | 26 | from absl.testing import absltest 27 | 28 | import refex.python.matcher_test_util # so that it's found by _submodules: pylint: disable=unused-import 29 | import refex.search 30 | 31 | 32 | def _submodules(package_module): 33 | """Gets submodules in a package. 34 | 35 | Args: 36 | package_module: The package itself. 37 | 38 | Yields: 39 | module objects for all modules in the package, including the root. 40 | """ 41 | package_prefix = package_module.__name__ + '.' 42 | yield package_module 43 | for name, mod in sys.modules.items(): 44 | if name.startswith(package_prefix): 45 | yield mod 46 | 47 | 48 | _REFEX_SUBMODULES = frozenset(_submodules(refex)) 49 | 50 | 51 | class SubmodulesTest(absltest.TestCase): 52 | 53 | def test_submodules(self): 54 | """_submodules should find some submodules.""" 55 | self.assertIn(refex, _REFEX_SUBMODULES) 56 | self.assertIn(refex.search, _REFEX_SUBMODULES) 57 | 58 | 59 | def load_tests(loader, tests, ignore): 60 | del loader, ignore # unused 61 | suite = absltest.unittest.TestSuite(tests) 62 | for module in _REFEX_SUBMODULES: 63 | if getattr(module, 'DOCTEST_RUN', True): 64 | suite.addTest( 65 | doctest.DocTestSuite( 66 | module, 67 | test_finder=doctest.DocTestFinder(exclude_empty=False), 68 | optionflags=(doctest.ELLIPSIS | doctest.DONT_ACCEPT_TRUE_FOR_1))) 69 | return suite 70 | 71 | 72 | if __name__ == '__main__': 73 | absltest.main() 74 | -------------------------------------------------------------------------------- /refex/rxerr_debug.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Pretty-print rxerr_*.json files.""" 16 | 17 | import json 18 | import shlex 19 | import sys 20 | import tempfile 21 | 22 | import pygments 23 | from pygments import formatters 24 | from pygments import lexers 25 | 26 | 27 | def main(argv): 28 | if len(argv) != 2: 29 | sys.exit('Expected exactly 1 argument, got: %s' % (len(argv) - 1)) 30 | debug_file = argv[-1] 31 | with open(debug_file) as f: 32 | debug_info = json.load(f) 33 | 34 | rxerr_argv = debug_info.get('argv') 35 | if rxerr_argv: 36 | print('Command:', ' '.join(shlex.quote(arg) for arg in rxerr_argv), '\n') 37 | is_first = False 38 | for f, failure in debug_info.get('failures', {}).items(): 39 | if not is_first: 40 | print('\n') 41 | is_first = False 42 | 43 | print('File:', f) 44 | try: 45 | source = failure['content'] 46 | except KeyError: 47 | pass 48 | else: 49 | with tempfile.NamedTemporaryFile( 50 | mode='w', encoding='utf-8', suffix='.py', delete=False) as out_f: 51 | out_f.write(source) 52 | print('Content:', out_f.name) 53 | try: 54 | tb = failure['traceback'] 55 | except KeyError: 56 | pass 57 | else: 58 | lexer = lexers.PythonTracebackLexer() # pytype: disable=module-attr 59 | formatter = formatters.Terminal256Formatter() # pytype: disable=module-attr 60 | print(pygments.highlight(tb, lexer, formatter)) 61 | 62 | 63 | if __name__ == '__main__': 64 | main(sys.argv) 65 | -------------------------------------------------------------------------------- /refex/test_binary.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # python3 16 | """A simple test of the refex binary.""" 17 | 18 | import subprocess 19 | 20 | from absl.testing import absltest 21 | 22 | 23 | class RefexBinaryTest(absltest.TestCase): 24 | 25 | def test_refex(self): 26 | f = self.create_tempfile(content='a') 27 | subprocess.check_call( 28 | ['refex', '--mode=re', 'a', '--sub=b', '-i', f.full_path]) 29 | self.assertEqual(f.read_text(), 'b') 30 | 31 | 32 | if __name__ == '__main__': 33 | absltest.main() 34 | -------------------------------------------------------------------------------- /refex/test_example_binary.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # python3 16 | """A simple test of the example binary.""" 17 | 18 | import subprocess 19 | import sys 20 | 21 | from absl.testing import absltest 22 | 23 | _EXECUTABLE = [sys.executable, 'examples/example_binary.py'] 24 | 25 | 26 | class RefexBinaryTest(absltest.TestCase): 27 | 28 | def test_binary(self): 29 | f = self.create_tempfile(content='hello') 30 | subprocess.check_call(_EXECUTABLE + [f.full_path]) 31 | self.assertEqual(f.read_text(), 'world') 32 | 33 | 34 | if __name__ == '__main__': 35 | absltest.main() 36 | -------------------------------------------------------------------------------- /refex/test_formatting.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.formatting.""" 15 | 16 | 17 | import collections 18 | 19 | from absl.testing import absltest 20 | from absl.testing import parameterized 21 | import colorama 22 | 23 | from refex import formatting 24 | from refex import match 25 | from refex import parsed_file 26 | from refex import substitution 27 | 28 | 29 | class LineExpandedSpanTest(absltest.TestCase): 30 | 31 | def test_none(self): 32 | self.assertEqual((0, 12), 33 | formatting.line_expanded_span('012\n456\n890\n', None, 34 | None)) 35 | 36 | def test_negative(self): 37 | self.assertEqual((8, 11), 38 | formatting.line_expanded_span('012\n456\n890\n', -2, -1)) 39 | 40 | def test_empty_span(self): 41 | self.assertEqual((4, 7), 42 | formatting.line_expanded_span('012\n456\n890\n', 5, 4)) 43 | self.assertEqual((4, 7), 44 | formatting.line_expanded_span('012\n456\n890\n', 5, 5)) 45 | 46 | def test_line_subset_(self): 47 | self.assertEqual((4, 7), 48 | formatting.line_expanded_span('012\n456\n890\n', 5, 6)) 49 | 50 | def test_string_start(self): 51 | self.assertEqual((0, 3), 52 | formatting.line_expanded_span('012\n456\n890\n', 1, 2)) 53 | 54 | def test_string_end(self): 55 | self.assertEqual((8, 11), 56 | formatting.line_expanded_span('012\n456\n890\n', 9, 10)) 57 | 58 | def test_line_start(self): 59 | self.assertEqual((4, 7), 60 | formatting.line_expanded_span('012\n456\n890\n', 4, 5)) 61 | 62 | def test_line_start_2(self): 63 | self.assertEqual((4, 7), 64 | formatting.line_expanded_span('012\n456\n890\n', 4, 4)) 65 | 66 | def test_line_end(self): 67 | self.assertEqual((4, 11), 68 | formatting.line_expanded_span('012\n456\n890\n', 7, 8)) 69 | 70 | def test_noop(self): 71 | self.assertEqual((4, 7), 72 | formatting.line_expanded_span('012\n456\n890\n', 4, 7)) 73 | 74 | def test_line_overlap_span(self): 75 | self.assertEqual((0, 11), 76 | formatting.line_expanded_span('012\n456\n890\n', 3, 9)) 77 | 78 | 79 | class RendererTest(absltest.TestCase): 80 | 81 | def test_empty_style(self): 82 | """Tests that the empty string is unstyled. 83 | 84 | We want to preserve colors for the _non_ empty spans. 85 | """ 86 | sub = substitution.Substitution( 87 | matched_spans={'primary': (0, 0)}, 88 | primary_label='primary', 89 | ) 90 | self.assertEqual( 91 | formatting.Renderer(match_format='{match}').render('abc', sub, {}), 92 | (False, '\n')) 93 | 94 | def test_saves_style(self): 95 | """Tests that the same style is reused for the same label.""" 96 | renderer = formatting.Renderer(match_format='{match}') 97 | sub_x = substitution.Substitution( 98 | matched_spans={'x': (0, 3)}, 99 | primary_label='x', 100 | ) 101 | is_diff, out_1 = renderer.render('abc', sub_x, {}) 102 | self.assertFalse(is_diff) 103 | is_diff, out_2 = renderer.render('abc', sub_x, {}) 104 | self.assertFalse(is_diff) 105 | 106 | # The same labels always get the same styling: 107 | self.assertEqual(out_1, out_2) 108 | # But a different label gets a new styling: 109 | sub_y = substitution.Substitution( 110 | matched_spans={'y': (0, 3)}, 111 | primary_label='y', 112 | ) 113 | is_diff, out_3 = renderer.render('abc', sub_y, {}) 114 | self.assertFalse(is_diff) 115 | self.assertNotEqual(out_1, out_3) 116 | 117 | def test_loops_styles(self): 118 | """When we run out of styles we reuse the old ones. 119 | 120 | (Instead of, e.g., StopIteration.) 121 | """ 122 | renderer = formatting.Renderer(match_format='{match}') 123 | 124 | def next_out(label): 125 | sub = substitution.Substitution( 126 | matched_spans={label: (0, 3)}, 127 | primary_label=label, 128 | ) 129 | is_diff, out = renderer.render('abc', sub, {}) 130 | self.assertFalse(is_diff) 131 | return out 132 | 133 | first = next_out('x') 134 | for label in range(10): 135 | last = next_out(label) 136 | if first == last: 137 | break 138 | else: 139 | self.fail('Never repeated: {!r} (last: {!r}'.format(first, last)) 140 | 141 | def test_nonsolo_primary_style(self): 142 | # exploit a weird edge case for testing: if the other label has zero-width, 143 | # it is not styled, but this still affects how the primary label is treated. 144 | sub = substitution.Substitution( 145 | matched_spans={ 146 | 'primary': (0, 3), 147 | 'other': (3, 3) 148 | }, 149 | primary_label='primary', 150 | ) 151 | self.assertEqual( 152 | formatting.Renderer(match_format='{match}').render('abc', sub, {}), 153 | (False, '{colorama.Style.BRIGHT}abc{colorama.Style.RESET_ALL}\n'.format( 154 | colorama=colorama))) 155 | 156 | def test_diff(self): 157 | """Tests a basic diff rendering.""" 158 | sub = substitution.Substitution( 159 | matched_spans={'primary': (0, 3)}, 160 | replacements={'primary': u'new'}, 161 | primary_label='primary', 162 | ) 163 | 164 | renderer = formatting.Renderer(match_format='{match}', color=False) 165 | is_diff, out = renderer.render('old', sub, {}) 166 | self.assertTrue(is_diff) 167 | self.assertEqual(out, '-old\n+new\n') 168 | 169 | 170 | class ShTemplateTest(parameterized.TestCase): 171 | 172 | @parameterized.parameters(('', set()), ('$a $b', {'a', 'b'})) 173 | def test_variables(self, template, expected_variables): 174 | self.assertEqual( 175 | formatting.ShTemplate(template).variables, expected_variables) 176 | 177 | 178 | class RegexTemplateTest(parameterized.TestCase): 179 | 180 | def test_empty(self): 181 | self.assertEqual( 182 | formatting.RegexTemplate('').substitute_match( 183 | parsed_file.ParsedFile('', path='path', pragmas=()), 184 | match.SpanMatch('', (0, 0)), {}), '') 185 | 186 | def test_extra(self): 187 | self.assertEqual( 188 | formatting.RegexTemplate('').substitute_match( 189 | parsed_file.ParsedFile('b', path='path', pragmas=()), 190 | match.SpanMatch('', (0, 0)), {'a': match.SpanMatch('b', (0, 1))}), 191 | '') 192 | 193 | def test_missing(self): 194 | for template in [r'\1', r'\g']: 195 | with self.subTest(template=template): 196 | with self.assertRaises(KeyError): 197 | formatting.RegexTemplate(r'\1').substitute_match( 198 | parsed_file.ParsedFile('', path='path', pragmas=()), 199 | match.SpanMatch('', (0, 0)), {}) 200 | 201 | def test_present_numeric(self): 202 | self.assertEqual( 203 | formatting.RegexTemplate(r'\1').substitute_match( 204 | parsed_file.ParsedFile('a', path='path', pragmas=()), 205 | match.SpanMatch('', (0, 0)), {1: match.SpanMatch('a', (0, 1))}), 206 | 'a') 207 | 208 | def test_present_numeric_by_name(self): 209 | self.assertEqual( 210 | formatting.RegexTemplate(r'\g<1>').substitute_match( 211 | parsed_file.ParsedFile('a', path='path', pragmas=()), 212 | match.SpanMatch('', (0, 0)), {1: match.SpanMatch('a', (0, 1))}), 213 | 'a') 214 | 215 | def test_present_named(self): 216 | self.assertEqual( 217 | formatting.RegexTemplate(r'\g').substitute_match( 218 | parsed_file.ParsedFile('a', path='path', pragmas=()), 219 | match.SpanMatch('', (0, 0)), {'x': match.SpanMatch('a', (0, 1))}), 220 | 'a') 221 | 222 | @parameterized.parameters(('', set()), (r'\1 \3', {1, 3}), 223 | (r'\g \g<4> \7', {'foo', 4, 7})) 224 | def test_variables(self, template, expected_variables): 225 | self.assertEqual( 226 | formatting.RegexTemplate(template).variables, expected_variables) 227 | 228 | 229 | class TemplateRewriterTest(absltest.TestCase): 230 | 231 | def test_empty(self): 232 | self.assertEqual( 233 | formatting.rewrite_templates( 234 | parsed_file.ParsedFile('abc', path='path', pragmas=()), {}, {}), {}) 235 | 236 | def test_named_template(self): 237 | self.assertEqual( 238 | formatting.rewrite_templates( 239 | parsed_file.ParsedFile('abc', path='path', pragmas=()), 240 | collections.OrderedDict([('foo', match.SpanMatch('b', (1, 2)))]), 241 | {'foo': formatting.RegexTemplate(r'x\gx')}), {'foo': 'xbx'}) 242 | 243 | def test_missing_template(self): 244 | self.assertEqual( 245 | formatting.rewrite_templates( 246 | parsed_file.ParsedFile('abc', path='path', pragmas=()), 247 | collections.OrderedDict([('foo', match.SpanMatch('', (-1, -1))), 248 | ('bar', match.SpanMatch('a', (0, 1)))]), 249 | { 250 | # swap foo and bar 251 | 'foo': formatting.RegexTemplate(r'bar=\g'), 252 | 'bar': formatting.RegexTemplate(r'foo=\g'), 253 | }), 254 | # foo is never matched, bar is replaced with foo=, 255 | # which is treated as ''. 256 | {'bar': 'foo='}) 257 | 258 | def test_labels_empty(self): 259 | self.assertEqual(formatting.template_variables({}), set()) 260 | 261 | def test_labels_nonempty(self): 262 | self.assertEqual( 263 | formatting.template_variables( 264 | {'key': formatting.RegexTemplate(r'\g')}), 265 | {'key', 'template_variable'}) 266 | 267 | def test_string_match(self): 268 | self.assertEqual( 269 | formatting.rewrite_templates( 270 | parsed_file.ParsedFile('abc', path='path', pragmas=()), 271 | collections.OrderedDict([('foo', match.SpanMatch('abc', (0, 3))), 272 | ('bar', match.StringMatch('xyz'))]), 273 | {'foo': formatting.ShTemplate(r'$bar')}), {'foo': 'xyz'}) 274 | 275 | 276 | class ConcatenateReplacementsTest(parameterized.TestCase, absltest.TestCase): 277 | 278 | def test_null_concatenation(self): 279 | self.assertEqual( 280 | formatting.concatenate_replacements('xyz', []), 281 | ('', 0, 0), 282 | ) 283 | 284 | @parameterized.parameters( 285 | (('', 0, 0),), 286 | (('', 0, 3),), 287 | (('abc', 0, 3),), 288 | (('b', 1, 2),), 289 | ) 290 | def test_noop(self, replacement): 291 | self.assertEqual( 292 | formatting.concatenate_replacements('xyz', [replacement]), 293 | replacement, 294 | ) 295 | 296 | def test_adjacent(self): 297 | self.assertEqual( 298 | formatting.concatenate_replacements('xyz', [('b', 1, 2), ('c', 2, 3)]), 299 | ('bc', 1, 3), 300 | ) 301 | 302 | def test_gap(self): 303 | self.assertEqual( 304 | formatting.concatenate_replacements('xyz', [('a', 0, 1), ('c', 2, 3)]), 305 | ('ayc', 0, 3), 306 | ) 307 | 308 | def test_gap_weirdsizes(self): 309 | self.assertEqual( 310 | formatting.concatenate_replacements('xyz', [('abc', 0, 0), ('', 2, 3)]), 311 | ('abcxy', 0, 3), 312 | ) 313 | 314 | # Failure tests 315 | 316 | def test_bad_swapped_slice(self): 317 | with self.assertRaises(ValueError): 318 | formatting.concatenate_replacements('xyz', [('a', 1, 0)]) 319 | 320 | def test_bad_overlapping_spans(self): 321 | string = '01234' 322 | fixed_start = 1 323 | fixed_end = len(string) - 1 324 | for start in (fixed_start - 1, fixed_start, fixed_start + 1): 325 | for end in (fixed_end - 1, fixed_end, fixed_end + 1): 326 | # fixed and dynamic are two overlapping spans. 327 | fixed = ('fixed', fixed_start, fixed_end) 328 | dynamic = ('dynamic', start, end) 329 | with self.subTest(start=start, end=end, fixed='first'): 330 | with self.assertRaises(ValueError): 331 | formatting.concatenate_replacements(string, [fixed, dynamic]) 332 | with self.subTest(start=start, end=end, fixed='second'): 333 | with self.assertRaises(ValueError): 334 | formatting.concatenate_replacements(string, [dynamic, fixed]) 335 | 336 | 337 | class ApplySubstitutionsTest(absltest.TestCase): 338 | 339 | def test_0_matches(self): 340 | self.assertEqual( 341 | formatting.apply_substitutions('abc', []), 342 | 'abc', 343 | ) 344 | 345 | def test_1_match(self): 346 | sub = substitution.Substitution( 347 | matched_spans={'x': (1, 2)}, 348 | replacements={'x': u'x'}, 349 | primary_label='x', 350 | ) 351 | self.assertEqual( 352 | formatting.apply_substitutions('abc', [sub]), 353 | 'axc', 354 | ) 355 | 356 | def test_2_matches(self): 357 | sub1 = substitution.Substitution( 358 | matched_spans={'x': (0, 1)}, 359 | replacements={'x': u'x'}, 360 | primary_label='x', 361 | ) 362 | sub2 = substitution.Substitution( 363 | matched_spans={'x': (2, 3)}, 364 | replacements={'x': u'x'}, 365 | primary_label='x', 366 | ) 367 | self.assertEqual( 368 | formatting.apply_substitutions('abc', [sub1, sub2]), 369 | 'xbx', 370 | ) 371 | 372 | def test_noreplacements(self): 373 | sub = substitution.Substitution( 374 | matched_spans={'x': (1, 2)}, 375 | primary_label='x', 376 | ) 377 | self.assertEqual( 378 | formatting.apply_substitutions('abc', [sub]), 379 | 'abc', 380 | ) 381 | 382 | 383 | if __name__ == '__main__': 384 | absltest.main() 385 | -------------------------------------------------------------------------------- /refex/test_parsed_file.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.parsed_file.""" 15 | 16 | 17 | from absl.testing import absltest 18 | from absl.testing import parameterized 19 | 20 | from refex import parsed_file 21 | 22 | 23 | class PragmaTest(parameterized.TestCase): 24 | 25 | @parameterized.parameters( 26 | u'stuff before the tag: t:a=b,c=d', 27 | u't:a=b,c=d', 28 | u' t:a=b,c=d', 29 | u't :a=b,c=d', 30 | u't: a=b,c=d', 31 | u't:a =b,c=d', 32 | u't:a= b,c=d', 33 | u't:a=b ,c=d', 34 | u't:a=b, c=d', 35 | u't:a=b,c =d', 36 | u't:a=b,c= d', 37 | u't:a=b,c=d ', 38 | u't:a=b,c=d,', 39 | u't:a=b,c=d ,', 40 | u't:a=b,c=d, ', 41 | ) 42 | def test_from_text(self, text): 43 | pragma = parsed_file.Pragma.from_text(text, 0, 100) 44 | self.assertIsNotNone(pragma) 45 | self.assertEqual(pragma.tag, u't') 46 | self.assertEqual(pragma.data, {u'a': u'b', u'c': u'd'}) 47 | 48 | def test_from_text_long(self): 49 | """Tests multi-character words in from_text, using a realistic example.""" 50 | pragma = parsed_file.Pragma.from_text( 51 | u'foo bar baz: pylint: disable=broad-except', 0, 100) 52 | self.assertIsNotNone(pragma) 53 | self.assertEqual(pragma.tag, u'pylint') 54 | self.assertEqual(pragma.data, {u'disable': u'broad-except'}) 55 | 56 | def test_from_text_dotted(self): 57 | pragma = parsed_file.Pragma.from_text( 58 | u'refex: disable=pylint.broad-except', 0, 100) 59 | self.assertIsNotNone(pragma) 60 | self.assertEqual(pragma.tag, u'refex') 61 | self.assertEqual(pragma.data, {u'disable': u'pylint.broad-except'}) 62 | 63 | @parameterized.parameters( 64 | u't a=b', 65 | u':a=b', 66 | u't:', 67 | u't:a a=b', 68 | u't:a=b b', 69 | u't:a=b=b', 70 | u't:ab', 71 | u't:a=', 72 | u't:=b', 73 | ) 74 | def test_from_text_fails(self, text): 75 | self.assertIsNone(parsed_file.Pragma.from_text(text, 0, 100)) 76 | 77 | 78 | if __name__ == '__main__': 79 | absltest.main() 80 | -------------------------------------------------------------------------------- /refex/test_rxerr_debug.py: -------------------------------------------------------------------------------- 1 | """Tests for refex.rxerr_debug.""" 2 | 3 | import contextlib 4 | import io 5 | import json 6 | import shlex 7 | 8 | from absl.testing import absltest 9 | 10 | from refex import rxerr_debug 11 | 12 | 13 | class RxerrDebugTest(absltest.TestCase): 14 | 15 | def test_argv(self): 16 | """Tests that argv is output in a copy-pasteable way (best as possible).""" 17 | argv = ['refex', """complex\n"arg'ument"""] 18 | path = self.create_tempfile(content=json.dumps({'argv': argv})).full_path 19 | stdout = io.StringIO() 20 | with contextlib.redirect_stdout(stdout): 21 | rxerr_debug.main(['rxerr_debug', path]) 22 | 23 | # not hardcoding the string because there's many different ways to do it, 24 | # and shlex.quote has bad-ish formatting that may improve in future. 25 | # For example, on Python 3.8, I get: 26 | # >>> import shlex; print(shlex.join(['a', 'b" c' "'"])) 27 | # a 'b" c'"'"'' 28 | # (the trailing '' is superfluous.) 29 | # Instead, we can just run shlex.split() over it as a quick safety check. 30 | self.assertEqual(shlex.split(stdout.getvalue()), ['Command:'] + argv) 31 | 32 | def test_traceback(self): 33 | """Tests that the traceback shows up, ish.""" 34 | tb = ('Traceback (most recent call last):\n' 35 | ' File "", line 1, in \n' 36 | 'SomeError: description\n') 37 | path = self.create_tempfile( 38 | content=json.dumps({'failures': { 39 | 'path': { 40 | 'traceback': tb 41 | } 42 | }})).full_path 43 | stdout = io.StringIO() 44 | with contextlib.redirect_stdout(stdout): 45 | rxerr_debug.main(['rxerr_debug', path]) 46 | stdout = stdout.getvalue() 47 | self.assertIn('SomeError', stdout) 48 | self.assertIn('description', stdout) 49 | 50 | 51 | if __name__ == '__main__': 52 | absltest.main() 53 | -------------------------------------------------------------------------------- /refex/test_search.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.search.""" 15 | 16 | 17 | import re 18 | 19 | from absl.testing import absltest 20 | from absl.testing import parameterized 21 | 22 | from refex import formatting 23 | from refex import parsed_file 24 | from refex import search 25 | from refex.fix import fixer 26 | from refex.python import matcher 27 | from refex.python import syntactic_template 28 | from refex.python.matchers import syntax_matchers 29 | 30 | 31 | class ExcludedRangesTest(parameterized.TestCase): 32 | """Tests range exclusion pragmas, using Python for convenience.""" 33 | 34 | @parameterized.parameters( 35 | '\nhello # pylint: disable=foo', 36 | '\nhello # refex: disable=pylint.foo', 37 | ) 38 | def test_correct_offset(self, source): 39 | parsed_file = matcher.parse_ast(source) 40 | # the covered range is everything but the first byte (a newline) 41 | self.assertEqual( 42 | search._pragma_excluded_ranges(parsed_file), 43 | {'pylint.foo': [(1, len(source))]}) 44 | 45 | def test_bad_tag(self): 46 | parsed_file = matcher.parse_ast(' # foo: disable=bar') 47 | self.assertEqual(search._pragma_excluded_ranges(parsed_file), {}) 48 | 49 | 50 | class TrimRemovedStatementsTest(parameterized.TestCase): 51 | 52 | @parameterized.parameters( 53 | # Module emptying. 54 | ('0xbad', ''), 55 | ('0xbad; 0xbad;', ''), 56 | ('0xbad\n0xbad;', ''), 57 | # Suite emptying. 58 | ('if 1: 0xbad', 'if 1: pass'), 59 | ('if 1: 0xbad;', 'if 1: pass;'), 60 | ('if 1:\n 0xbad', 'if 1:\n pass'), 61 | ('if 1:\n 0xbad\n 0xbad', 'if 1:\n pass'), 62 | ('if 1:\n 0xbad; 0xbad;', 'if 1:\n pass'), 63 | # Module prefix collapse. 64 | ('0xbad;\n2', '2'), 65 | ('0xbad; 2;', '2;'), 66 | ('0xbad;\n2', '2'), 67 | ('0xbad;\n2;', '2;'), 68 | # NOTE: Replacements ending a suite will strip off the semicolon. 69 | ('2; 0xbad', '2'), 70 | ('2; 0xbad;', '2'), 71 | # Suite prefix collapse. 72 | ('if 1: 0xbad; 2', 'if 1: 2'), 73 | ('if 1:\n 0xbad; 2', 'if 1:\n 2'), 74 | ('if 1:\n 0xbad\n 2', 'if 1:\n 2'), 75 | ('if 1:\n 0xbad\n 2', 'if 1:\n 2'), 76 | # Suite multi-statement collapse. 77 | ('if 1: 2; 0xbad; 3;', 'if 1: 2; 3;'), 78 | ('if 1:\n 2; 0xbad\n 3', 'if 1:\n 2; 3'), 79 | ('if 1:\n 0xbad; 2\n 3', 'if 1:\n 2\n 3'), 80 | ('if 1:\n 2; 0xbad; 3', 'if 1:\n 2; 3'), 81 | ('if 1: 0xbad; 0xbad; 2', 'if 1: 2'), 82 | ('if 1:\n 0xbad; 2\n 0xbad;', 'if 1:\n 2'), 83 | # NOTE: Adjacent replacements ending a suite cause excess whitespace. 84 | ('if 1:\n 2\n 0xbad\n 0xbad', 'if 1:\n 2\n '), 85 | ('if 1: 2; 0xbad; 0xbad; 0xbad', 'if 1: 2; '), 86 | ('if 1: 0xbad; 2; 0xbad; 0xbad', 'if 1: 2; '), 87 | # Adjacent comment behavior. 88 | ('0xbad #trailing', ' #trailing'), 89 | ('#block\n0xbad', '#block\n'), 90 | ('#block\n0xbad\n2', '#block\n2'), 91 | ('2; #trailing\n0xbad\n3', '2; #trailing\n3'), 92 | # NOTE: Replacements ending a suite will strip off preceding comments. 93 | ('2 #trailing\n0xbad', '2'), 94 | ('2\n #block\n0xbad', '2'), 95 | # Other suite types. 96 | ('if 1: pass\nelse: 0xbad', 'if 1: pass\nelse: pass'), 97 | ('for _ in []: 0xbad', 'for _ in []: pass'), 98 | ('while 1: 0xbad', 'while 1: pass'), 99 | ('with 1: 0xbad', 'with 1: pass'), 100 | ) 101 | def test_single_statement(self, before, after): 102 | searcher = search.PyStmtRewritingSearcher.from_pattern( 103 | '0xbad', 104 | {search.ROOT_LABEL: syntactic_template.PythonTemplate('')}) 105 | substitutions = list(search.find_iter(searcher, before, 'a.py')) 106 | self.assertEqual(after, 107 | formatting.apply_substitutions(before, substitutions)) 108 | 109 | 110 | class RewriteStringTest(absltest.TestCase): 111 | 112 | def test_replace(self): 113 | fix = fixer.SimplePythonFixer( 114 | matcher=syntax_matchers.ExprPattern('$obj.attr'), 115 | replacement=syntactic_template.PythonTemplate(u'$obj.other'), 116 | ) 117 | searcher = fixer.CombiningPythonFixer([fix]) 118 | 119 | source = 'my_obj.attr + other_obj.attr' 120 | self.assertEqual('my_obj.other + other_obj.other', 121 | search.rewrite_string(searcher, source, 'example.py')) 122 | 123 | 124 | def _sub_string(s, sub): 125 | start, end = sub.primary_span 126 | return s[start:end] 127 | 128 | 129 | def _sub_strings(s, subs): 130 | return [_sub_string(s, sub) for sub in subs] 131 | 132 | 133 | class CombinedSearcherTest(parameterized.TestCase): 134 | 135 | @parameterized.parameters( 136 | search.RegexSearcher.from_pattern('x', {}), 137 | search.PyExprRewritingSearcher.from_pattern('x', {}), 138 | ) 139 | def test_compatible_searchers(self, x_searcher): 140 | src = 'x, y' 141 | searcher = search.CombinedSearcher([ 142 | x_searcher, 143 | search.RegexSearcher.from_pattern('y', {}), 144 | ]) 145 | 146 | self.assertEqual( 147 | _sub_strings(src, search.find_iter(searcher, src, '')), 148 | ['x', 'y'], 149 | ) 150 | 151 | def test_incompatible_searchers(self): 152 | 153 | class IncompatibleParsedFile(parsed_file.ParsedFile): 154 | pass 155 | 156 | class IncompatibleSearcher(search.RegexSearcher): 157 | 158 | def parse(self, data, filename): 159 | return IncompatibleParsedFile(data, filename) 160 | 161 | def test_approximate_regex(self): 162 | searcher = search.CombinedSearcher([ 163 | search.RegexSearcher.from_pattern('x', {}), 164 | search.RegexSearcher.from_pattern('y', {}), 165 | ]) 166 | 167 | self.assertEqual(searcher.approximate_regex(), '(?:x)|(?:y)') 168 | # doesn't crash 169 | re.compile(searcher.approximate_regex()) 170 | 171 | def test_null_approximate_regex(self): 172 | searcher = search.CombinedSearcher([ 173 | search.PyExprRewritingSearcher.from_pattern('x', {}), 174 | search.RegexSearcher.from_pattern('y', {}), 175 | ]) 176 | 177 | self.assertIsNone(searcher.approximate_regex()) 178 | 179 | 180 | if __name__ == '__main__': 181 | absltest.main() 182 | -------------------------------------------------------------------------------- /refex/test_substitution.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """Tests for refex.substitution.""" 15 | 16 | 17 | from absl.testing import absltest 18 | from absl.testing import parameterized 19 | 20 | from refex import substitution 21 | 22 | 23 | def _substitution_with_span(start, end, **kwargs): 24 | kwargs.setdefault('message', '') 25 | kwargs.setdefault('url', '') 26 | return substitution.Substitution( 27 | matched_spans={'label': (start, end)}, 28 | primary_label='label', 29 | replacements={'label': u''}, 30 | **kwargs) 31 | 32 | 33 | class SubstitutionTest(parameterized.TestCase): 34 | 35 | def test_validate_primary_label(self): 36 | with self.assertRaises(ValueError): 37 | substitution.Substitution( 38 | message='', 39 | matched_spans={'label': (0, 0)}, 40 | primary_label='label_dne', 41 | replacements={'label': u''}, 42 | url='', 43 | ) 44 | 45 | def test_validate_span_replacements(self): 46 | with self.assertRaises(ValueError): 47 | substitution.Substitution( 48 | message='', 49 | matched_spans={'label': (0, 0)}, 50 | primary_label='label', 51 | replacements={'label_dne': u''}, 52 | url='', 53 | ) 54 | 55 | def test_validate_span_not_in_replacements(self): 56 | """It is OK to only replace a subset of matched spans.""" 57 | # does not raise: 58 | substitution.Substitution( 59 | message='', 60 | matched_spans={'label': (0, 0)}, 61 | primary_label='label', 62 | replacements={}, 63 | url='', 64 | ) 65 | 66 | def test_validate_replacement_not_in_spans(self): 67 | with self.assertRaises(ValueError): 68 | substitution.Substitution( 69 | message='', 70 | matched_spans={'label': (0, 0)}, 71 | primary_label='label', 72 | replacements={ 73 | 'label': u'', 74 | 'label2': u'' 75 | }, 76 | url='', 77 | ) 78 | 79 | def test_validate_no_replacements(self): 80 | substitution.Substitution( 81 | primary_label='label', 82 | matched_spans={'label': (0, 0)}, 83 | replacements=None, 84 | ) 85 | 86 | @parameterized.parameters('.foo', 'foo.', '.foo.', 'foo ', 'foo bar', 87 | 'foo..bar', '-foo') 88 | def test_validate_category_failure(self, category): 89 | with self.assertRaises(ValueError): 90 | substitution.Substitution( 91 | primary_label='label', 92 | matched_spans={'label': (0, 0)}, 93 | category=category, 94 | ) 95 | 96 | @parameterized.parameters('foo', 'foo.bar', 'foo-', '_foo', 'foo.bar') 97 | def test_validate_category_success(self, category): 98 | substitution.Substitution( 99 | primary_label='label', 100 | matched_spans={'label': (0, 0)}, 101 | category=category, 102 | ) 103 | 104 | def test_relative_identical(self): 105 | self.assertEqual( 106 | _substitution_with_span(10, 20).relative_to_span(10, 20), 107 | _substitution_with_span(0, 10)) 108 | 109 | def test_relative_subset(self): 110 | self.assertEqual( 111 | _substitution_with_span(10, 20).relative_to_span(5, 25), 112 | _substitution_with_span(5, 15)) 113 | 114 | def test_out_of_bounds(self): 115 | for out_of_bounds_span in [(0, 10), (0, 15), (15, 30), (20, 30), (12, 18)]: 116 | with self.subTest(relative_to=out_of_bounds_span): 117 | self.assertIsNone( 118 | _substitution_with_span(10, 119 | 20).relative_to_span(*out_of_bounds_span)) 120 | 121 | def test_all_categories(self): 122 | self.assertEqual( 123 | list( 124 | _substitution_with_span(0, 1, 125 | category='foo.bar.baz').all_categories()), 126 | [None, 'foo', 'foo.bar', 'foo.bar.baz']) 127 | 128 | def test_all_categories_none(self): 129 | 130 | self.assertEqual( 131 | list(_substitution_with_span(0, 1, category=None).all_categories()), 132 | [None]) 133 | 134 | 135 | class SuppressTest(parameterized.TestCase): 136 | 137 | @parameterized.parameters((0, 1), (2, 3), (5, 6)) 138 | def test_nointersect(self, start, end): 139 | sub = _substitution_with_span(start, end) 140 | self.assertEqual( 141 | list(substitution.suppress_exclude_bytes([sub], {None: [(1, 2)]})), 142 | [sub]) 143 | 144 | @parameterized.parameters((0, 2), (1, 2), (2, 3), (5, 6), (5, 7), (0, 7)) 145 | def test_intersect(self, start, end): 146 | sub = _substitution_with_span(start, end) 147 | self.assertEqual( 148 | list(substitution.suppress_exclude_bytes([sub], {None: [(1, 6)]})), []) 149 | 150 | @parameterized.parameters('foo.bar', 'foo.bar.baz') 151 | def test_category_match(self, category): 152 | sub = _substitution_with_span(0, 2, category=category) 153 | self.assertEqual( 154 | list(substitution.suppress_exclude_bytes([sub], {'foo.bar': [(0, 2)]})), 155 | []) 156 | 157 | @parameterized.parameters('foo', 'foo.not_bar', 'not_foo') 158 | def test_category_nomatch(self, category): 159 | sub = _substitution_with_span(0, 2, category=category) 160 | self.assertEqual( 161 | list(substitution.suppress_exclude_bytes([sub], {'foo.bar': [(0, 2)]})), 162 | [sub]) 163 | 164 | 165 | class LabeledSpanTest(absltest.TestCase): 166 | 167 | def test_empty_range(self): 168 | self.assertEqual( 169 | list( 170 | substitution.labeled_spans( 171 | substitution.Substitution( 172 | matched_spans={'a': (0, 0)}, primary_label='a'))), 173 | [substitution.LabeledSpan(labels={'a'}, span=(0, 0))]) 174 | 175 | def test_empty_range_next(self): 176 | self.assertEqual( 177 | list( 178 | substitution.labeled_spans( 179 | substitution.Substitution( 180 | matched_spans={ 181 | 'a': (0, 0), 182 | 'b': (1, 1) 183 | }, primary_label='a'))), [ 184 | substitution.LabeledSpan(labels={'a'}, span=(0, 0)), 185 | substitution.LabeledSpan(labels=set(), span=(0, 1)), 186 | substitution.LabeledSpan(labels={'b'}, span=(1, 1)) 187 | ]) 188 | 189 | def test_adjacent(self): 190 | self.assertEqual( 191 | list( 192 | substitution.labeled_spans( 193 | substitution.Substitution( 194 | matched_spans={ 195 | 'a': (0, 10), 196 | 'b': (10, 20) 197 | }, 198 | primary_label='a'))), 199 | [ 200 | substitution.LabeledSpan(labels={'a'}, span=(0, 10)), 201 | substitution.LabeledSpan(labels={'a', 'b'}, span=(10, 10)), 202 | substitution.LabeledSpan(labels={'b'}, span=(10, 20)) 203 | ]) 204 | 205 | def test_gap(self): 206 | self.assertEqual( 207 | list( 208 | substitution.labeled_spans( 209 | substitution.Substitution( 210 | matched_spans={ 211 | 'a': (0, 10), 212 | 'b': (20, 30) 213 | }, 214 | primary_label='a'))), [ 215 | substitution.LabeledSpan(labels={'a'}, span=(0, 10)), 216 | substitution.LabeledSpan(labels=set(), span=(10, 20)), 217 | substitution.LabeledSpan(labels={'b'}, span=(20, 30)) 218 | ]) 219 | 220 | def test_overlap(self): 221 | self.assertEqual( 222 | list( 223 | substitution.labeled_spans( 224 | substitution.Substitution( 225 | matched_spans={ 226 | 'a': (0, 10), 227 | 'b': (5, 15) 228 | }, 229 | primary_label='a'))), 230 | [ 231 | substitution.LabeledSpan(labels={'a'}, span=(0, 5)), 232 | substitution.LabeledSpan(labels={'a', 'b'}, span=(5, 10)), 233 | substitution.LabeledSpan(labels={'b'}, span=(10, 15)) 234 | ]) 235 | 236 | def test_total_overlap(self): 237 | self.assertEqual( 238 | list( 239 | substitution.labeled_spans( 240 | substitution.Substitution( 241 | matched_spans={ 242 | 'a': (0, 10), 243 | 'b': (0, 10) 244 | }, 245 | primary_label='a'))), 246 | [substitution.LabeledSpan(labels={'a', 'b'}, span=(0, 10))]) 247 | 248 | def test_total_overlap_start(self): 249 | self.assertEqual( 250 | list( 251 | substitution.labeled_spans( 252 | substitution.Substitution( 253 | matched_spans={ 254 | 'a': (0, 10), 255 | 'b': (0, 15) 256 | }, 257 | primary_label='a'))), 258 | [ 259 | substitution.LabeledSpan(labels={'a', 'b'}, span=(0, 10)), 260 | substitution.LabeledSpan(labels={'b'}, span=(10, 15)) 261 | ]) 262 | 263 | def test_total_overlap_end(self): 264 | self.assertEqual( 265 | list( 266 | substitution.labeled_spans( 267 | substitution.Substitution( 268 | matched_spans={ 269 | 'a': (0, 10), 270 | 'b': (5, 10) 271 | }, 272 | primary_label='a'))), 273 | [ 274 | substitution.LabeledSpan(labels={'a'}, span=(0, 5)), 275 | substitution.LabeledSpan(labels={'a', 'b'}, span=(5, 10)) 276 | ]) 277 | 278 | def test_swapped_order_empty(self): 279 | """Test what 'shouldn't happen'.""" 280 | with self.assertRaises(AssertionError): 281 | list( 282 | substitution.labeled_spans( 283 | substitution.Substitution( 284 | matched_spans={'a': (10, 5)}, primary_label='a'))) 285 | 286 | 287 | if __name__ == '__main__': 288 | absltest.main() 289 | --------------------------------------------------------------------------------