├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── Justfile ├── LICENSE ├── README.md ├── pyproject.toml ├── symbex ├── __init__.py ├── __main__.py ├── cli.py └── lib.py └── tests ├── example_symbols.py ├── replace_tests.yaml ├── test_filters.py ├── test_imports.py ├── test_output.py ├── test_replace.py ├── test_symbex.py └── test_symbols.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | permissions: 8 | contents: read 9 | id-token: write 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | cache: pip 24 | cache-dependency-path: pyproject.toml 25 | - name: Install dependencies 26 | run: | 27 | pip install -e '.[test]' 28 | - name: Run tests 29 | run: | 30 | python -m pytest 31 | deploy: 32 | runs-on: ubuntu-latest 33 | needs: [test] 34 | environment: release 35 | permissions: 36 | id-token: write 37 | steps: 38 | - uses: actions/checkout@v4 39 | - name: Set up Python 40 | uses: actions/setup-python@v5 41 | with: 42 | python-version: "3.13" 43 | cache: pip 44 | cache-dependency-path: pyproject.toml 45 | - name: Install dependencies 46 | run: | 47 | pip install setuptools wheel build 48 | - name: Build 49 | run: | 50 | python -m build 51 | - name: Publish 52 | uses: pypa/gh-action-pypi-publish@release/v1 53 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | cache: pip 21 | cache-dependency-path: pyproject.toml 22 | - name: Install dependencies 23 | run: | 24 | pip install -e '.[test]' 25 | - name: Run tests 26 | run: | 27 | python -m pytest 28 | - name: Lint with ruff 29 | run: | 30 | ruff check symbex 31 | - name: Check if cog needs to be run 32 | run: | 33 | cog README.md 34 | cog --check README.md 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | venv 6 | .eggs 7 | .pytest_cache 8 | *.egg-info 9 | .DS_Store 10 | -------------------------------------------------------------------------------- /Justfile: -------------------------------------------------------------------------------- 1 | # Run tests and linters 2 | @default: test lint 3 | 4 | # Run pytest with supplied options 5 | @test *options: 6 | pipenv run pytest {{options}} 7 | 8 | # Run linters 9 | @lint: 10 | pipenv run black . --check 11 | pipenv run cog --check README.md 12 | 13 | # Rebuild docs with cog 14 | @cog: 15 | pipenv run cog -r README.md 16 | 17 | # Apply Black 18 | @black: 19 | pipenv run black . 20 | 21 | # Auto-format and fix things 22 | @fix: cog black 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Symbex 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/symbex.svg)](https://pypi.org/project/symbex/) 4 | [![Changelog](https://img.shields.io/github/v/release/simonw/symbex?include_prereleases&label=changelog)](https://github.com/simonw/symbex/releases) 5 | [![Tests](https://github.com/simonw/symbex/workflows/Test/badge.svg)](https://github.com/simonw/symbex/actions?query=workflow%3ATest) 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/symbex/blob/master/LICENSE) 7 | 8 | Find the Python code for specified symbols 9 | 10 | Read [Symbex: search Python code for functions and classes, then pipe them into a LLM](https://simonwillison.net/2023/Jun/18/symbex/) for background on this project. 11 | 12 | ## Installation 13 | 14 | Install this tool using `pip`: 15 | ```bash 16 | pip install symbex 17 | ``` 18 | Or using Homebrew: 19 | ```bash 20 | brew install simonw/llm/symbex 21 | ``` 22 | ## Usage 23 | 24 | `symbex` can search for names of functions and classes that occur at the top level of a Python file. 25 | 26 | To search every `.py` file in your current directory and all subdirectories, run like this: 27 | 28 | ```bash 29 | symbex my_function 30 | ``` 31 | You can search for more than one symbol at a time: 32 | ```bash 33 | symbex my_function MyClass 34 | ``` 35 | Wildcards are supported - to search for every `test_` function run this (note the single quotes to avoid the shell interpreting the `*` as a wildcard): 36 | ```bash 37 | symbex 'test_*' 38 | ``` 39 | To search for methods within classes, use `class.method` notation: 40 | ```bash 41 | symbex Entry.get_absolute_url 42 | ``` 43 | Wildcards are supported here as well: 44 | ```bash 45 | symbex 'Entry.*' 46 | symbex '*.get_absolute_url' 47 | symbex '*.get_*' 48 | ``` 49 | Or to view every method of every class: 50 | ```bash 51 | symbex '*.*' 52 | ``` 53 | To search within a specific file, pass that file using the `-f` option. You can pass this more than once to search multiple files. 54 | 55 | ```bash 56 | symbex MyClass -f my_file.py 57 | ``` 58 | To search within a specific directory and all of its subdirectories, use the `-d/--directory` option: 59 | ```bash 60 | symbex Database -d ~/projects/datasette 61 | ``` 62 | If you know that you want to inspect one or more modules that can be imported by Python, you can use the `-m/--module name` option. This example shows the signatures for every symbol available in the `asyncio` package: 63 | ```bash 64 | symbex -m asyncio -s --imports 65 | ``` 66 | You can search the directory containing the Python standard library using `--stdlib`. This can be useful for quickly looking up the source code for specific Python library functions: 67 | ```bash 68 | symbex --stdlib -in to_thread 69 | ``` 70 | `-in` is explained below. If you provide `--stdlib` without any `-d` or `-f` options then `--silent` will be turned on automatically, since the standard library otherwise produces a number of different warnings. 71 | 72 | The output starts like this: 73 | ```python 74 | # from asyncio.threads import to_thread 75 | async def to_thread(func, /, *args, **kwargs): 76 | """Asynchronously run function *func* in a separate thread. 77 | # ... 78 | ``` 79 | You can exclude files in specified directories using the `-x/--exclude` option: 80 | ```bash 81 | symbex Database -d ~/projects/datasette -x ~/projects/datasette/tests 82 | ``` 83 | If `symbex` encounters any Python code that it cannot parse, it will print a warning message and continue searching: 84 | ``` 85 | # Syntax error in path/badcode.py: expected ':' (, line 1) 86 | ``` 87 | Pass `--silent` to suppress these warnings: 88 | ```bash 89 | symbex MyClass --silent 90 | ``` 91 | ### Filters 92 | 93 | In addition to searching for symbols, you can apply filters to the results. 94 | 95 | The following filters are available: 96 | 97 | - `--function` - only functions 98 | - `--class` - only classes 99 | - `--async` - only `async def` functions 100 | - `--unasync` - only non-async functions 101 | - `--documented` - functions/classes that have a docstring 102 | - `--undocumented` - functions/classes that do not have a docstring 103 | - `--public` - functions/classes that are public - don't have a `_name` prefix (or are `__*__` methods) 104 | - `--private` - functions/classes that are private - have a `_name` prefix and are not `__*__` 105 | - `--dunder` - functions matching `__*__` - this should usually be used with `*.*` to find all dunder methods 106 | - `--typed` - functions that have at least one type annotation 107 | - `--untyped` - functions that have no type annotations 108 | - `--partially-typed` - functions that have some type annotations but not all 109 | - `--fully-typed` - functions that have type annotations for every argument and the return value 110 | - `--no-init` - Exclude `__init__(self)` methods. This is useful when combined with `--fully-typed '*.*'` to avoid returning `__init__(self)` methods that would otherwise be classified as fully typed, since `__init__` doesn't need argument or return type annotations. 111 | 112 | For example, to see the signatures of every `async def` function in your project that doesn't have any type annotations: 113 | 114 | ```bash 115 | symbex -s --async --untyped 116 | ``` 117 | 118 | For class methods instead of functions, you can combine filters with a symbol search argument of `*.*`. 119 | 120 | This example shows the full source code of every class method in the Python standard library that has type annotations for all of the arguments and the return value: 121 | 122 | ```bash 123 | symbex --fully-typed --no-init '*.*' --stdlib 124 | ``` 125 | 126 | To find all public functions and methods that lack documentation, just showing the signature of each one: 127 | 128 | ```bash 129 | symbex '*' '*.*' --public --undocumented --signatures 130 | ``` 131 | 132 | ### Example output 133 | 134 | In a fresh checkout of [Datasette](https://github.com/simonw/datasette) I ran this command: 135 | 136 | ```bash 137 | symbex MessagesDebugView get_long_description 138 | ``` 139 | Here's the output of the command: 140 | ```python 141 | # File: setup.py Line: 5 142 | def get_long_description(): 143 | with open( 144 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"), 145 | encoding="utf8", 146 | ) as fp: 147 | return fp.read() 148 | 149 | # File: datasette/views/special.py Line: 60 150 | class PatternPortfolioView(View): 151 | async def get(self, request, datasette): 152 | await datasette.ensure_permissions(request.actor, ["view-instance"]) 153 | return Response.html( 154 | await datasette.render_template( 155 | "patterns.html", 156 | request=request, 157 | view_name="patterns", 158 | ) 159 | ) 160 | ``` 161 | ### Just the signatures 162 | 163 | The `-s/--signatures` option will list just the signatures of the functions and classes, for example: 164 | ```bash 165 | symbex -s -f symbex/lib.py 166 | ``` 167 | 185 | ```python 186 | # File: symbex/lib.py Line: 107 187 | def function_definition(function_node: AST): 188 | 189 | # File: symbex/lib.py Line: 13 190 | def find_symbol_nodes(code: str, filename: str, symbols: Iterable[str]) -> List[Tuple[(AST, Optional[str])]]: 191 | 192 | # File: symbex/lib.py Line: 175 193 | def class_definition(class_def): 194 | 195 | # File: symbex/lib.py Line: 209 196 | def annotation_definition(annotation: AST) -> str: 197 | 198 | # File: symbex/lib.py Line: 227 199 | def read_file(path): 200 | 201 | # File: symbex/lib.py Line: 253 202 | class TypeSummary: 203 | 204 | # File: symbex/lib.py Line: 258 205 | def type_summary(node: AST) -> Optional[TypeSummary]: 206 | 207 | # File: symbex/lib.py Line: 304 208 | def quoted_string(s): 209 | 210 | # File: symbex/lib.py Line: 315 211 | def import_line_for_function(function_name: str, filepath: str, possible_root_dirs: List[str]) -> str: 212 | 213 | # File: symbex/lib.py Line: 37 214 | def code_for_node(code: str, node: AST, class_name: str, signatures: bool, docstrings: bool) -> Tuple[(str, int)]: 215 | 216 | # File: symbex/lib.py Line: 71 217 | def add_docstring(definition: str, node: AST, docstrings: bool, is_method: bool) -> str: 218 | 219 | # File: symbex/lib.py Line: 82 220 | def match(name: str, symbols: Iterable[str]) -> bool: 221 | ``` 222 | 223 | This can be combined with other options, or you can run `symbex -s` to see every symbol in the current directory and its subdirectories. 224 | 225 | To include estimated import paths, such as `# from symbex.lib import match`, use `--imports`. These will be calculated relative to the directory you specified, or you can pass one or more `--sys-path` options to request that imports are calculated relative to those directories as if they were on `sys.path`: 226 | 227 | ```bash 228 | ~/dev/symbex/symbex match --imports -s --sys-path ~/dev/symbex 229 | ``` 230 | Example output: 231 | 239 | ```python 240 | # File: symbex/lib.py Line: 82 241 | # from symbex.lib import match 242 | def match(name: str, symbols: Iterable[str]) -> bool: 243 | ``` 244 | 245 | To suppress the `# File: ...` comments, use `--no-file` or `-n`. 246 | 247 | So to both show import paths and suppress File comments, use `-in` as a shortcut: 248 | ```bash 249 | symbex -in match 250 | ``` 251 | Output: 252 | 260 | ```python 261 | # from symbex.lib import match 262 | def match(name: str, symbols: Iterable[str]) -> bool: 263 | ``` 264 | 265 | 266 | To include docstrings in those signatures, use `--docstrings`: 267 | ```bash 268 | symbex match --docstrings -f symbex/lib.py 269 | ``` 270 | Example output: 271 | 277 | ```python 278 | # File: symbex/lib.py Line: 82 279 | def match(name: str, symbols: Iterable[str]) -> bool: 280 | "Returns True if name matches any of the symbols, resolving wildcards" 281 | ``` 282 | 283 | 284 | ## Counting symbols 285 | 286 | If you just want to count the number of functions and classes that match your filters, use the `--count` option. Here's how to count your classes: 287 | 288 | ```bash 289 | symbex --class --count 290 | ``` 291 | Or to count every async test function: 292 | ```bash 293 | symbex --async 'test_*' --count 294 | ``` 295 | ## Structured output 296 | 297 | LLM defaults to outputting plain text (actually valid Python code, thanks to the way it uses comments). 298 | 299 | You can request output in CSV, TSV, JSON or newline-delimited JSON instead, using the following options: 300 | 301 | - `--json`: a JSON array, `[{"id": "...", "code": "..."}]` 302 | - `--nl`: newline-delimited JSON, `{"id": "...", "code": "..."}` per line 303 | - `--csv`: CSV with `id,code` as the heading row 304 | - `--tsv`: TSV with `id\tcode` as the heading row 305 | 306 | In each case the ID will be the path to the file containing the symbol, followed by a colon, followed by the line number of the symbol, for example: 307 | 308 | ```json 309 | { 310 | "id": "symbex/lib.py:82", 311 | "code": "def match(name: str, symbols: Iterable[str]) -> bool:" 312 | } 313 | ``` 314 | If you pass `-i/--imports` the ID will be the import line instead: 315 | ```json 316 | { 317 | "id": "from symbex.lib import match", 318 | "code": "def match(name: str, symbols: Iterable[str]) -> bool:" 319 | } 320 | ``` 321 | Pass `--id-prefix 'something:'` to add the specified prefix to the start of each ID. 322 | 323 | This example will generate a CSV file of all of your test functions, using the import style of IDs and a prefix of `test:`: 324 | 325 | ```bash 326 | symbex 'test_*' \ 327 | --function \ 328 | --imports \ 329 | --csv > tests.csv 330 | ``` 331 | 332 | ## Using with LLM 333 | 334 | This tool is primarily designed to be used with [LLM](https://llm.datasette.io/), a CLI tool for working with Large Language Models. 335 | 336 | `symbex` makes it easy to grab a specific class or function and pass it to the `llm` command. 337 | 338 | For example, I ran this in the Datasette repository root: 339 | 340 | ```bash 341 | symbex Response | llm --system 'Explain this code, succinctly' 342 | ``` 343 | And got back this: 344 | 345 | > This code defines a custom `Response` class with methods for returning HTTP responses. It includes methods for setting cookies, returning HTML, text, and JSON responses, and redirecting to a different URL. The `asgi_send` method sends the response to the client using the ASGI (Asynchronous Server Gateway Interface) protocol. 346 | 347 | The structured output feature is designed to be used with [LLM embeddings](https://llm.datasette.io/en/stable/embeddings/index.html). You can generate embeddings for every symbol in your codebase using [llm embed-multi](https://llm.datasette.io/en/stable/embeddings/cli.html#llm-embed-multi) like this: 348 | 349 | ```bash 350 | symbex '*' '*:*' --nl | \ 351 | llm embed-multi symbols - \ 352 | --format nl --database embeddings.db --store 353 | ``` 354 | This creates a database in `embeddings.db` containing all of your symbols along with embedding vectors. 355 | 356 | You can then search your code like this: 357 | ```bash 358 | llm similar symbols -d embeddings.db -c 'test csv' | jq 359 | ``` 360 | 361 | ## Replacing a matched symbol 362 | 363 | The `--replace` option can be used to replace a single matched symbol with content piped in to standard input. 364 | 365 | Given a file called `my_code.py` with the following content: 366 | ```python 367 | def first_function(): 368 | # This will be ignored 369 | pass 370 | 371 | def second_function(): 372 | # This will be replaced 373 | pass 374 | ``` 375 | Run the following: 376 | ```bash 377 | echo "def second_function(a, b): 378 | # This is a replacement implementation 379 | return a + b + 3 380 | " | symbex second_function --replace 381 | ``` 382 | The result will be an updated-in-place `my_code.py` containing the following: 383 | 384 | ```python 385 | def first_function(): 386 | # This will be ignored 387 | pass 388 | 389 | def second_function(a, b): 390 | # This is a replacement implementation 391 | return a + b + 3 392 | ``` 393 | This feature should be used with care! I recommend only using this feature against code that is already checked into Git, so you can review changes it makes using `git diff` and revert them using `git checkout my_code.py`. 394 | 395 | ## Replacing a matched symbol by running a command 396 | 397 | The `--rexec COMMAND` option can be used to replace a single matched symbol by running a command and using its output. 398 | 399 | The command will be run with the matched symbol's definition piped to its standard input. The output of that command will be used as the replacement text. 400 | 401 | Here's an example that uses `sed` to add a `# ` to the beginning of each matching line, effectively commenting out the matched function: 402 | 403 | ```bash 404 | symbex first_function --rexec "sed 's/^/# /'" 405 | ``` 406 | This modified the first function in place to look like this: 407 | ```python 408 | # def first_function(): 409 | # # This will be ignored 410 | # pass 411 | ``` 412 | A much more exciting example uses LLM. This example will use the `gpt-3.5-turbo` model to add type hints and generate a docstring: 413 | 414 | ```bash 415 | symbex second_function \ 416 | --rexec "llm --system 'add type hints and a docstring'" 417 | ``` 418 | I ran this against this code: 419 | ```python 420 | def first_function(): 421 | # This will be ignored 422 | pass 423 | 424 | def second_function(a, b): 425 | return a + b + 3 426 | ``` 427 | And the second function was updated in place to look like this: 428 | ```python 429 | def second_function(a: int, b: int) -> int: 430 | """ 431 | Returns the sum of two integers (a and b) plus 3. 432 | 433 | Parameters: 434 | a (int): The first integer. 435 | b (int): The second integer. 436 | 437 | Returns: 438 | int: The sum of a and b plus 3. 439 | """ 440 | return a + b + 3 441 | ``` 442 | ## Using in CI 443 | 444 | The `--check` option causes `symbex` to return a non-zero exit code if any matches are found for your query. 445 | 446 | You can use this in CI to guard against things like public functions being added without documentation: 447 | 448 | ```bash 449 | symbex --function --public --undocumented --check 450 | ``` 451 | This will fail silently but set a `1` exit code if there are any undocumented functions. 452 | 453 | Using this as a step in a CI tool such as GitHub Actions should result in a test failure. 454 | 455 | Run this to see the exit code from the last command: 456 | ```bash 457 | echo $? 458 | ``` 459 | 460 | `--check` will not output anything by default. Add `--count` to output a count of matching symbols, or `-s/--signatures` to output the signatures of the matching symbols, for example: 461 | ```bash 462 | symbex --function --public --undocumented --check --count 463 | ``` 464 | 465 | ## Similar tools 466 | 467 | - [pyastgrep](https://github.com/spookylukey/pyastgrep) by Luke Plant offers advanced capabilities for viewing and searching through Python ASTs using XPath. 468 | - [cq](https://github.com/fullstackio/cq) is a tool thet lets you "extract code snippets using CSS-like selectors", built using [Tree-sitter](https://tree-sitter.github.io/tree-sitter/) and primarily targetting JavaScript and TypeScript. 469 | 470 | ## symbex --help 471 | 472 | 479 | ``` 480 | Usage: symbex [OPTIONS] [SYMBOLS]... 481 | 482 | Find symbols in Python code and print the code for them. 483 | 484 | Example usage: 485 | 486 | # Search current directory and subdirectories 487 | symbex my_function MyClass 488 | 489 | # Search using a wildcard 490 | symbex 'test_*' 491 | 492 | # Find a specific class method 493 | symbex 'MyClass.my_method' 494 | 495 | # Find class methods using wildcards 496 | symbex '*View.handle_*' 497 | 498 | # Search a specific file 499 | symbex MyClass -f my_file.py 500 | 501 | # Search within a specific directory and its subdirectories 502 | symbex Database -d ~/projects/datasette 503 | 504 | # View signatures for all symbols in current directory and subdirectories 505 | symbex -s 506 | 507 | # View signatures for all test functions 508 | symbex 'test_*' -s 509 | 510 | # View signatures for all async functions with type definitions 511 | symbex --async --typed -s 512 | 513 | # Count the number of --async functions in the project 514 | symbex --async --count 515 | 516 | # Replace my_function with a new implementation: 517 | echo "def my_function(a, b): 518 | # This is a replacement implementation 519 | return a + b + 3 520 | " | symbex my_function --replace 521 | 522 | # Replace my_function with the output of a command: 523 | symbex first_function --rexec "sed 's/^/# /'" 524 | # This uses sed to comment out the function body 525 | 526 | Options: 527 | --version Show the version and exit. 528 | -f, --file FILE Files to search 529 | -d, --directory DIRECTORY Directories to search 530 | --stdlib Search the Python standard library 531 | -x, --exclude DIRECTORY Directories to exclude 532 | -s, --signatures Show just function and class signatures 533 | -n, --no-file Don't include the # File: comments in the output 534 | -i, --imports Show 'from x import y' lines for imported symbols 535 | -m, --module TEXT Modules to search within 536 | --sys-path TEXT Calculate imports relative to these on sys.path 537 | --docs, --docstrings Show function and class signatures plus docstrings 538 | --count Show count of matching symbols 539 | --silent Silently ignore Python files with parse errors 540 | --function Filter functions 541 | --async Filter async functions 542 | --unasync Filter non-async functions 543 | --class Filter classes 544 | --documented Filter functions with docstrings 545 | --undocumented Filter functions without docstrings 546 | --public Filter for symbols without a _ prefix 547 | --private Filter for symbols with a _ prefix 548 | --dunder Filter for symbols matching __*__ 549 | --typed Filter functions with type annotations 550 | --untyped Filter functions without type annotations 551 | --partially-typed Filter functions with partial type annotations 552 | --fully-typed Filter functions with full type annotations 553 | --no-init Filter to exclude any __init__ methods 554 | --check Exit with non-zero code if any matches found 555 | --replace Replace matching symbol with text from stdin 556 | --rexec TEXT Replace with the result of piping to this tool 557 | --csv Output as CSV 558 | --tsv Output as TSV 559 | --json Output as JSON 560 | --nl Output as newline-delimited JSON 561 | --id-prefix TEXT Prefix to use for symbol IDs 562 | --help Show this message and exit. 563 | 564 | ``` 565 | 566 | 567 | ## Development 568 | 569 | To contribute to this tool, first checkout the code. Then create a new virtual environment: 570 | ```bash 571 | cd symbex 572 | python -m venv venv 573 | source venv/bin/activate 574 | ``` 575 | Now install the dependencies and test dependencies: 576 | ```bash 577 | pip install -e '.[test]' 578 | ``` 579 | To run the tests: 580 | ```bash 581 | pytest 582 | ``` 583 | ### just 584 | 585 | You can also install [just](https://github.com/casey/just) and use it to run the tests and linters like this: 586 | 587 | ```bash 588 | just 589 | ``` 590 | Or to list commands: 591 | ```bash 592 | just -l 593 | ``` 594 | ``` 595 | Available recipes: 596 | black # Apply Black 597 | cog # Rebuild docs with cog 598 | default # Run tests and linters 599 | lint # Run linters 600 | test *options # Run pytest with supplied options 601 | ``` 602 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "symbex" 3 | version = "2.0" 4 | description = "Find the Python code for specified symbols" 5 | readme = "README.md" 6 | authors = [{name = "Simon Willison"}] 7 | license = "Apache-2.0" 8 | requires-python = ">=3.8" 9 | dependencies = [ 10 | "click" 11 | ] 12 | 13 | [project.urls] 14 | Homepage = "https://github.com/simonw/symbex" 15 | Issues = "https://github.com/simonw/symbex/issues" 16 | CI = "https://github.com/simonw/symbex/actions" 17 | Changelog = "https://github.com/simonw/symbex/releases" 18 | 19 | [project.scripts] 20 | symbex = "symbex.cli:cli" 21 | 22 | [project.optional-dependencies] 23 | test = ["pytest", "pytest-icdiff", "cogapp", "PyYAML", "ruff"] 24 | 25 | [build-system] 26 | requires = ["setuptools"] 27 | build-backend = "setuptools.build_meta" 28 | 29 | [tool.setuptools.packages.find] 30 | where = ["."] 31 | include = ["symbex"] 32 | 33 | -------------------------------------------------------------------------------- /symbex/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simonw/symbex/81c4b9c042f271f0d8b9a4d50095876cd54e05a2/symbex/__init__.py -------------------------------------------------------------------------------- /symbex/__main__.py: -------------------------------------------------------------------------------- 1 | from .cli import cli 2 | 3 | if __name__ == "__main__": 4 | cli() 5 | -------------------------------------------------------------------------------- /symbex/cli.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import click 3 | import csv 4 | import dataclasses 5 | import importlib 6 | import inspect 7 | import json 8 | import pathlib 9 | import site 10 | import subprocess 11 | import sys 12 | from typing import TextIO, Iterable, Literal, Tuple 13 | 14 | from .lib import ( 15 | code_for_node, 16 | find_symbol_nodes, 17 | import_line_for_function, 18 | read_file, 19 | type_summary, 20 | ) 21 | 22 | 23 | @dataclasses.dataclass 24 | class Output: 25 | symbol_id: str 26 | output_identifier_line: str 27 | output_import_line: str 28 | snippet: str 29 | 30 | 31 | @click.command() 32 | @click.version_option() 33 | @click.argument("symbols", nargs=-1) 34 | @click.option( 35 | "files", 36 | "-f", 37 | "--file", 38 | type=click.Path(file_okay=True, dir_okay=False), 39 | multiple=True, 40 | help="Files to search", 41 | ) 42 | @click.option( 43 | "directories", 44 | "-d", 45 | "--directory", 46 | type=click.Path(file_okay=False, dir_okay=True, resolve_path=True), 47 | multiple=True, 48 | help="Directories to search", 49 | ) 50 | @click.option("--stdlib", is_flag=True, help="Search the Python standard library") 51 | @click.option( 52 | "excludes", 53 | "-x", 54 | "--exclude", 55 | type=click.Path(file_okay=False, dir_okay=True, resolve_path=True), 56 | multiple=True, 57 | help="Directories to exclude", 58 | ) 59 | @click.option( 60 | "-s", 61 | "--signatures", 62 | is_flag=True, 63 | help="Show just function and class signatures", 64 | ) 65 | @click.option( 66 | "-n", 67 | "--no-file", 68 | is_flag=True, 69 | help="Don't include the # File: comments in the output", 70 | ) 71 | @click.option( 72 | "-i", 73 | "--imports", 74 | is_flag=True, 75 | help="Show 'from x import y' lines for imported symbols", 76 | ) 77 | @click.option( 78 | "modules", "-m", "--module", multiple=True, help="Modules to search within" 79 | ) 80 | @click.option( 81 | "sys_paths", 82 | "--sys-path", 83 | multiple=True, 84 | help="Calculate imports relative to these on sys.path", 85 | ) 86 | @click.option( 87 | "--docs", 88 | "--docstrings", 89 | is_flag=True, 90 | help="Show function and class signatures plus docstrings", 91 | ) 92 | @click.option( 93 | "--count", 94 | is_flag=True, 95 | help="Show count of matching symbols", 96 | ) 97 | @click.option( 98 | "--silent", 99 | is_flag=True, 100 | help="Silently ignore Python files with parse errors", 101 | ) 102 | @click.option( 103 | "--function", 104 | is_flag=True, 105 | help="Filter functions", 106 | ) 107 | @click.option( 108 | "async_", 109 | "--async", 110 | is_flag=True, 111 | help="Filter async functions", 112 | ) 113 | @click.option( 114 | "unasync", 115 | "--unasync", 116 | is_flag=True, 117 | help="Filter non-async functions", 118 | ) 119 | @click.option( 120 | "class_", 121 | "--class", 122 | is_flag=True, 123 | help="Filter classes", 124 | ) 125 | @click.option( 126 | "--documented", 127 | is_flag=True, 128 | help="Filter functions with docstrings", 129 | ) 130 | @click.option( 131 | "--undocumented", 132 | is_flag=True, 133 | help="Filter functions without docstrings", 134 | ) 135 | @click.option( 136 | "--public", 137 | is_flag=True, 138 | help="Filter for symbols without a _ prefix", 139 | ) 140 | @click.option( 141 | "--private", 142 | is_flag=True, 143 | help="Filter for symbols with a _ prefix", 144 | ) 145 | @click.option( 146 | "--dunder", 147 | is_flag=True, 148 | help="Filter for symbols matching __*__", 149 | ) 150 | @click.option( 151 | "--typed", 152 | is_flag=True, 153 | help="Filter functions with type annotations", 154 | ) 155 | @click.option( 156 | "--untyped", 157 | is_flag=True, 158 | help="Filter functions without type annotations", 159 | ) 160 | @click.option( 161 | "--partially-typed", 162 | is_flag=True, 163 | help="Filter functions with partial type annotations", 164 | ) 165 | @click.option( 166 | "--fully-typed", 167 | is_flag=True, 168 | help="Filter functions with full type annotations", 169 | ) 170 | @click.option( 171 | "--no-init", 172 | is_flag=True, 173 | help="Filter to exclude any __init__ methods", 174 | ) 175 | @click.option( 176 | "--check", is_flag=True, help="Exit with non-zero code if any matches found" 177 | ) 178 | @click.option( 179 | "--replace", 180 | is_flag=True, 181 | help="Replace matching symbol with text from stdin", 182 | ) 183 | @click.option("--rexec", help="Replace with the result of piping to this tool") 184 | # Output options 185 | @click.option("csv_", "--csv", is_flag=True, help="Output as CSV") 186 | @click.option("--tsv", is_flag=True, help="Output as TSV") 187 | @click.option("json_", "--json", is_flag=True, help="Output as JSON") 188 | @click.option("--nl", is_flag=True, help="Output as newline-delimited JSON") 189 | @click.option("--id-prefix", help="Prefix to use for symbol IDs") 190 | def cli( 191 | symbols, 192 | files, 193 | directories, 194 | stdlib, 195 | excludes, 196 | signatures, 197 | no_file, 198 | imports, 199 | modules, 200 | sys_paths, 201 | docs, 202 | count, 203 | silent, 204 | function, 205 | async_, 206 | unasync, 207 | class_, 208 | documented, 209 | undocumented, 210 | public, 211 | private, 212 | dunder, 213 | typed, 214 | untyped, 215 | partially_typed, 216 | fully_typed, 217 | no_init, 218 | check, 219 | replace, 220 | rexec, 221 | csv_, 222 | tsv, 223 | json_, 224 | nl, 225 | id_prefix, 226 | ): 227 | """ 228 | Find symbols in Python code and print the code for them. 229 | 230 | Example usage: 231 | 232 | \b 233 | # Search current directory and subdirectories 234 | symbex my_function MyClass 235 | 236 | \b 237 | # Search using a wildcard 238 | symbex 'test_*' 239 | 240 | \b 241 | # Find a specific class method 242 | symbex 'MyClass.my_method' 243 | 244 | \b 245 | # Find class methods using wildcards 246 | symbex '*View.handle_*' 247 | 248 | \b 249 | # Search a specific file 250 | symbex MyClass -f my_file.py 251 | 252 | \b 253 | # Search within a specific directory and its subdirectories 254 | symbex Database -d ~/projects/datasette 255 | 256 | \b 257 | # View signatures for all symbols in current directory and subdirectories 258 | symbex -s 259 | 260 | \b 261 | # View signatures for all test functions 262 | symbex 'test_*' -s 263 | 264 | \b 265 | # View signatures for all async functions with type definitions 266 | symbex --async --typed -s 267 | 268 | \b 269 | # Count the number of --async functions in the project 270 | symbex --async --count 271 | 272 | \b 273 | # Replace my_function with a new implementation: 274 | echo "def my_function(a, b): 275 | # This is a replacement implementation 276 | return a + b + 3 277 | " | symbex my_function --replace 278 | 279 | \b 280 | # Replace my_function with the output of a command: 281 | symbex first_function --rexec "sed 's/^/# /'" 282 | # This uses sed to comment out the function body 283 | """ 284 | # Only one of --json, --csv, --tsv, --nl 285 | output_formats = [csv_, tsv, json_, nl] 286 | if sum(output_formats) > 1: 287 | raise click.ClickException("Only one of --csv, --tsv, --json, --nl can be used") 288 | if id_prefix and not sum(output_formats): 289 | raise click.ClickException( 290 | "--id-prefix can only be used with --csv, --tsv, --json or --nl" 291 | ) 292 | if id_prefix is None: 293 | id_prefix = "" 294 | 295 | if modules: 296 | module_dirs = [] 297 | module_files = [] 298 | for module in modules: 299 | try: 300 | mod = importlib.import_module(module) 301 | mod_path = pathlib.Path(inspect.getfile(mod)) 302 | if mod_path.stem == "__init__": 303 | module_dirs.append(mod_path.parent) 304 | else: 305 | module_files.append(mod_path) 306 | except ModuleNotFoundError: 307 | raise click.ClickException("Module not found: {}".format(module)) 308 | directories = [*directories, *module_dirs] 309 | files = [*files, *module_files] 310 | if module_dirs or module_files: 311 | if not symbols: 312 | symbols = ["*"] 313 | site_packages_dirs = site.getsitepackages() 314 | stdlib_dir = pathlib.Path(pathlib.__file__).parent 315 | sys_paths = [*site_packages_dirs, str(stdlib_dir), *sys_paths] 316 | 317 | if no_init: 318 | fully_typed = True 319 | if stdlib and not directories and not files: 320 | silent = True 321 | if stdlib: 322 | stdlib_folder = pathlib.Path(pathlib.__file__).parent.resolve() 323 | directories = [*directories, *[stdlib_folder]] 324 | if str(stdlib_folder) not in sys_paths: 325 | sys_paths = [*[str(stdlib_folder)], *sys_paths] 326 | if count or docs: 327 | signatures = True 328 | if imports and not symbols: 329 | signatures = True 330 | # Show --help if no filter options are provided: 331 | if not any( 332 | [ 333 | symbols, 334 | signatures, 335 | async_, 336 | unasync, 337 | function, 338 | class_, 339 | documented, 340 | undocumented, 341 | public, 342 | private, 343 | dunder, 344 | typed, 345 | untyped, 346 | partially_typed, 347 | fully_typed, 348 | no_init, 349 | modules, 350 | ] 351 | ): 352 | ctx = click.get_current_context() 353 | click.echo(ctx.get_help()) 354 | ctx.exit() 355 | 356 | if rexec: 357 | replace = True 358 | no_file = True 359 | 360 | if replace and signatures: 361 | raise click.ClickException("--replace cannot be used with --signatures") 362 | if replace: 363 | no_file = True 364 | # Default to '*' if --signatures or filters are provided without symbols 365 | if ( 366 | any( 367 | [ 368 | signatures, 369 | async_, 370 | unasync, 371 | function, 372 | class_, 373 | documented, 374 | undocumented, 375 | public, 376 | private, 377 | dunder, 378 | typed, 379 | untyped, 380 | partially_typed, 381 | fully_typed, 382 | no_init, 383 | modules, 384 | ] 385 | ) 386 | and not symbols 387 | ): 388 | symbols = ["*"] 389 | if not files and not directories: 390 | directories = ["."] 391 | 392 | excludes = [pathlib.Path(exclude) for exclude in excludes] 393 | 394 | def iterate_files(): 395 | yield from (pathlib.Path(f) for f in files) 396 | for directory in directories: 397 | for path in pathlib.Path(directory).rglob("*.py"): 398 | # Skip if path is inside any of 'excludes' 399 | if any(path.resolve().is_relative_to(exclude) for exclude in excludes): 400 | continue 401 | if path.is_file(): 402 | yield path 403 | 404 | # If any --filters were supplied, handle them: 405 | if any( 406 | [ 407 | async_, 408 | unasync, 409 | function, 410 | class_, 411 | documented, 412 | undocumented, 413 | public, 414 | private, 415 | dunder, 416 | typed, 417 | untyped, 418 | partially_typed, 419 | fully_typed, 420 | no_init, 421 | ] 422 | ): 423 | # Return just nodes matching filters 424 | def filter(node: ast.AST) -> bool: 425 | # Filters must ALL match 426 | if async_ and not isinstance(node, ast.AsyncFunctionDef): 427 | return False 428 | if function and not isinstance( 429 | node, (ast.FunctionDef, ast.AsyncFunctionDef) 430 | ): 431 | return False 432 | if unasync and not isinstance(node, ast.FunctionDef): 433 | return False 434 | if class_ and not isinstance(node, ast.ClassDef): 435 | return False 436 | if documented and not ast.get_docstring(node): 437 | return False 438 | if undocumented and ast.get_docstring(node): 439 | return False 440 | if public and node.name.startswith("_") and not is_dunder(node.name): 441 | return False 442 | if private and (is_dunder(node.name) or not node.name.startswith("_")): 443 | return False 444 | if dunder and not is_dunder(node.name): 445 | return False 446 | summary = type_summary(node) 447 | # if no summary, type filters all fail 448 | if not summary and ( 449 | typed or untyped or partially_typed or fully_typed or no_init 450 | ): 451 | return False 452 | # Apply type filters 453 | if typed and not summary.partially: 454 | return False 455 | if untyped and summary.partially: 456 | return False 457 | if partially_typed and not (summary.partially and not summary.fully): 458 | return False 459 | if no_init and node.name == "__init__": 460 | return False 461 | if fully_typed and not summary.fully: 462 | return False 463 | return True 464 | 465 | else: 466 | # All nodes are allowed 467 | def filter(node: ast.AST) -> bool: 468 | return True 469 | 470 | pwd = pathlib.Path(".").resolve() 471 | num_matches = 0 472 | replace_matches = [] 473 | 474 | def stuff_to_output(): 475 | nonlocal num_matches 476 | for file in iterate_files(): 477 | try: 478 | code = read_file(file) 479 | except UnicodeDecodeError as ex: 480 | if not silent: 481 | click.secho( 482 | f"# Unicode error in {file}: {ex}", err=True, fg="yellow" 483 | ) 484 | continue 485 | try: 486 | nodes = find_symbol_nodes(code, str(file), symbols) 487 | except SyntaxError as ex: 488 | if not silent: 489 | click.secho( 490 | f"# Syntax error in {file}: {ex}", err=True, fg="yellow" 491 | ) 492 | continue 493 | for node, class_name in nodes: 494 | if not filter(node): 495 | continue 496 | if count or check: 497 | num_matches += 1 498 | if count or not signatures: 499 | continue 500 | # If file is within pwd, print relative path 501 | if pwd in file.resolve().parents: 502 | path = file.resolve().relative_to(pwd) 503 | else: 504 | # else print absolute path 505 | path = file.resolve() 506 | snippet, line_no = code_for_node( 507 | code, node, class_name, signatures, docs 508 | ) 509 | if replace: 510 | replace_matches.append((file.resolve(), snippet, line_no)) 511 | continue 512 | 513 | output_identifier_line = None 514 | output_import_line = None 515 | symbol_id = None 516 | 517 | if not no_file: 518 | bits = ["# File:", path] 519 | if class_name: 520 | bits.extend(["Class:", class_name]) 521 | bits.extend(["Line:", line_no]) 522 | symbol_id = "{}:{}".format(path, line_no) 523 | output_identifier_line = " ".join(str(bit) for bit in bits) 524 | if imports: 525 | import_line = import_line_for_function( 526 | node.name, path, sys_paths or directories 527 | ) 528 | # If it's a class then output '# from x import Class' instead 529 | if class_name: 530 | import_line = ( 531 | import_line.split(" import ")[0] + " import " + class_name 532 | ) 533 | symbol_id = import_line 534 | output_import_line = "# " + import_line 535 | 536 | yield Output( 537 | symbol_id, output_identifier_line, output_import_line, snippet 538 | ) 539 | 540 | if sum(output_formats) == 0: 541 | seen_imports = set() 542 | for item in stuff_to_output(): 543 | if item.output_identifier_line: 544 | click.echo(item.output_identifier_line) 545 | if item.output_import_line and item.output_import_line not in seen_imports: 546 | click.echo(item.output_import_line) 547 | seen_imports.add(item.output_import_line) 548 | click.echo(item.snippet) 549 | click.echo() 550 | else: 551 | # Do the fancy output formats thing 552 | to_output( 553 | sys.stdout, 554 | ((id_prefix + item.symbol_id, item.snippet) for item in stuff_to_output()), 555 | format="csv" if csv_ else "tsv" if tsv else "json" if json_ else "nl", 556 | ) 557 | return 558 | 559 | if count: 560 | click.echo(num_matches) 561 | 562 | if check and num_matches > 0: 563 | sys.exit(1) 564 | 565 | if replace: 566 | # Only works if we got a single match 567 | if len(replace_matches) != 1: 568 | raise click.ClickException( 569 | "--replace only works with a single match, got {}".format( 570 | len(replace_matches) 571 | ) 572 | ) 573 | filepath, to_replace = replace_matches[0][:2] 574 | if rexec: 575 | # Run to_replace through that command 576 | p = subprocess.Popen( 577 | rexec, 578 | stdin=subprocess.PIPE, 579 | stdout=subprocess.PIPE, 580 | stderr=subprocess.PIPE, 581 | shell=True, 582 | ) 583 | stdout, stderr = p.communicate(input=to_replace.encode()) 584 | if p.returncode != 0: 585 | raise click.ClickException( 586 | f"Command '{rexec}' failed with exit code {p.returncode}" 587 | f", stderr: {stderr.decode()}" 588 | ) 589 | 590 | replacement = stdout.decode() 591 | else: 592 | if sys.stdin.isatty(): 593 | raise click.ClickException( 594 | "--replace only works with text piped to it on stdin" 595 | ) 596 | new_lines = sys.stdin.readlines() 597 | # Check if any lines were read 598 | if len(new_lines) == 0: 599 | raise click.ClickException("No input for --replace found on stdin") 600 | replacement = "".join(new_lines) 601 | old = filepath.read_text("utf-8") 602 | new = old.replace(to_replace, replacement) 603 | filepath.write_text(new, "utf-8") 604 | 605 | 606 | def is_dunder(name): 607 | return name.startswith("__") and name.endswith("__") 608 | 609 | 610 | def to_output( 611 | fp: TextIO, 612 | lines: Iterable[Tuple[str, str]], 613 | format: Literal["csv", "tsv", "json", "nl"] = "csv", 614 | ) -> None: 615 | if format == "nl": 616 | for id, content in lines: 617 | line = json.dumps({"id": id, "code": content}) 618 | fp.write(line + "\n") 619 | return 620 | 621 | elif format == "json": 622 | fp.write("[") 623 | first = True 624 | for id, content in lines: 625 | line = json.dumps({"id": id, "code": content}) 626 | if first: 627 | fp.write(line) 628 | first = False 629 | else: 630 | fp.write(",\n " + line) 631 | fp.write("]\n") 632 | return 633 | 634 | dialect = "excel" if format == "csv" else "excel-tab" 635 | writer = csv.writer(fp, dialect=dialect) 636 | 637 | # Write header 638 | writer.writerow(["id", "code"]) 639 | 640 | # Write content 641 | for id, content in lines: 642 | writer.writerow([id, content]) 643 | -------------------------------------------------------------------------------- /symbex/lib.py: -------------------------------------------------------------------------------- 1 | import fnmatch 2 | import ast 3 | from ast import literal_eval, parse, AST, AsyncFunctionDef, FunctionDef, ClassDef 4 | import codecs 5 | from dataclasses import dataclass 6 | from itertools import zip_longest 7 | from pathlib import Path 8 | import re 9 | import textwrap 10 | from typing import Iterable, List, Optional, Tuple 11 | 12 | 13 | def find_symbol_nodes( 14 | code: str, filename: str, symbols: Iterable[str] 15 | ) -> List[Tuple[AST, Optional[str]]]: 16 | "Returns ast Nodes matching symbols" 17 | # list of (AST, None-or-class-name) 18 | matches = [] 19 | module = parse(code) 20 | for node in module.body: 21 | if not isinstance(node, (ClassDef, FunctionDef, AsyncFunctionDef)): 22 | continue 23 | name = getattr(node, "name", None) 24 | if match(name, symbols): 25 | matches.append((node, None)) 26 | # If it's a class search its methods too 27 | if isinstance(node, ClassDef): 28 | for child in node.body: 29 | if isinstance(child, (FunctionDef, AsyncFunctionDef)): 30 | qualified_name = f"{name}.{child.name}" 31 | if match(qualified_name, symbols): 32 | matches.append((child, name)) 33 | 34 | return matches 35 | 36 | 37 | def code_for_node( 38 | code: str, node: AST, class_name: str, signatures: bool, docstrings: bool 39 | ) -> Tuple[str, int]: 40 | "Returns the code for a given node" 41 | lines = code.split("\n") 42 | start = None 43 | end = None 44 | if signatures: 45 | if isinstance(node, (FunctionDef, AsyncFunctionDef)): 46 | definition, lineno = function_definition(node), node.lineno 47 | if class_name: 48 | definition = " " + definition 49 | definition = add_docstring(definition, node, docstrings, bool(class_name)) 50 | return definition, lineno 51 | elif isinstance(node, ClassDef): 52 | definition, lineno = class_definition(node), node.lineno 53 | definition = add_docstring(definition, node, docstrings, bool(class_name)) 54 | return definition, lineno 55 | else: 56 | # Not a function or class, fall back on just the line 57 | start = node.lineno - 1 58 | end = node.lineno 59 | else: 60 | # If the node has decorator_list, include those too 61 | if getattr(node, "decorator_list", None): 62 | start = node.decorator_list[0].lineno - 1 63 | else: 64 | start = node.lineno - 1 65 | end = node.end_lineno 66 | output = "\n".join(lines[start:end]) 67 | # If it's in a class, indent it 4 spaces 68 | return output, start + 1 69 | 70 | 71 | def add_docstring(definition: str, node: AST, docstrings: bool, is_method: bool) -> str: 72 | if not docstrings: 73 | return definition 74 | docstring = ast.get_docstring(node) 75 | if not docstring: 76 | return definition 77 | docstring = quoted_string(docstring) 78 | wrapped = textwrap.indent(docstring, " " if is_method else " ") 79 | return f"{definition}\n{wrapped}" 80 | 81 | 82 | def match(name: str, symbols: Iterable[str]) -> bool: 83 | "Returns True if name matches any of the symbols, resolving wildcards" 84 | if name is None: 85 | return False 86 | for search in symbols: 87 | if "*" not in search: 88 | # Exact matches only 89 | if name == search: 90 | return True 91 | elif search.count(".") == 1: 92 | # wildcards are supported either side of the dot 93 | if "." in name: 94 | class_match, method_match = search.split(".") 95 | class_name, method_name = name.split(".") 96 | if fnmatch.fnmatch(class_name, class_match) and fnmatch.fnmatch( 97 | method_name, method_match 98 | ): 99 | return True 100 | else: 101 | if fnmatch.fnmatch(name, search) and "." not in name: 102 | return True 103 | 104 | return False 105 | 106 | 107 | def function_definition(function_node: AST): 108 | function_name = function_node.name 109 | 110 | all_args = [ 111 | *function_node.args.posonlyargs, 112 | *function_node.args.args, 113 | *function_node.args.kwonlyargs, 114 | ] 115 | 116 | # For position only args like "def foo(a, /, b, c)" 117 | # we can look at the length of args.posonlyargs to see 118 | # if any are set and, if so, at what index the `/` should go 119 | position_of_slash = len(function_node.args.posonlyargs) 120 | 121 | # For func_keyword_only_args(a, *, b, c) the length of 122 | # the kwonlyargs tells us how many spaces back from the 123 | # end the star should be displayed 124 | position_of_star = len(all_args) - len(function_node.args.kwonlyargs) 125 | 126 | # function_node.args.defaults may have defaults 127 | # corresponding to function_node.args.args - but 128 | # if defaults has 2 and args has 3 then those 129 | # defaults correspond to the last two args 130 | defaults = [None] * (len(all_args) - len(function_node.args.defaults)) 131 | for default in function_node.args.defaults: 132 | try: 133 | value = literal_eval(default) 134 | if isinstance(value, str): 135 | value = f'"{value}"' 136 | except ValueError: 137 | value = getattr(default, "id", "...") 138 | defaults.append(value) 139 | 140 | arguments = [] 141 | 142 | for i, (arg, default) in enumerate(zip_longest(all_args, defaults)): 143 | if position_of_slash and i == position_of_slash: 144 | arguments.append("/") 145 | if position_of_star and i == position_of_star: 146 | arguments.append("*") 147 | arg_str = arg.arg 148 | if arg.annotation: 149 | arg_str += f": {annotation_definition(arg.annotation)}" 150 | 151 | if default: 152 | arg_str = f"{arg_str}={default}" 153 | 154 | arguments.append(arg_str) 155 | 156 | if function_node.args.vararg: 157 | arguments.append(f"*{function_node.args.vararg.arg}") 158 | 159 | if function_node.args.kwarg: 160 | arguments.append(f"**{function_node.args.kwarg.arg}") 161 | 162 | arguments_str = ", ".join(arguments) 163 | 164 | return_annotation = "" 165 | if function_node.returns: 166 | return_annotation = f" -> {annotation_definition(function_node.returns)}" 167 | 168 | def_ = "def " 169 | if isinstance(function_node, AsyncFunctionDef): 170 | def_ = "async def " 171 | 172 | return f"{def_}{function_name}({arguments_str}){return_annotation}:" 173 | 174 | 175 | def class_definition(class_def): 176 | # Base classes 177 | base_classes = [] 178 | for base in class_def.bases: 179 | if getattr(base, "id", None): 180 | base_classes.append(base.id) 181 | base_classes_str = ", ".join(base_classes) 182 | 183 | # Keywords (including metaclass) 184 | keywords = {k.arg: getattr(k.value, "id", str(k.value)) for k in class_def.keywords} 185 | metaclass = keywords.pop("metaclass", None) 186 | keyword_str = ", ".join([f"{k}=..." for k in keywords]) 187 | 188 | if base_classes_str and keyword_str: 189 | signature = f"{base_classes_str}, {keyword_str}" 190 | elif base_classes_str: 191 | signature = base_classes_str 192 | elif keyword_str: 193 | signature = keyword_str 194 | else: 195 | signature = "" 196 | 197 | if metaclass: 198 | sep = ", " if signature else "" 199 | signature = f"{signature}{sep}metaclass={metaclass}" 200 | 201 | if signature: 202 | signature = f"({signature})" 203 | 204 | class_definition = f"class {class_def.name}{signature}:" 205 | 206 | return class_definition 207 | 208 | 209 | def annotation_definition(annotation: AST) -> str: 210 | if annotation is None: 211 | return "" 212 | elif isinstance(annotation, ast.Name): 213 | return annotation.id 214 | elif isinstance(annotation, ast.Subscript): 215 | value = annotation_definition(annotation.value) 216 | slice = annotation_definition(annotation.slice) 217 | return f"{value}[{slice}]" 218 | elif isinstance(annotation, ast.Index): 219 | return annotation_definition(annotation.value) 220 | elif isinstance(annotation, ast.Tuple): 221 | elements = ", ".join(annotation_definition(e) for e in annotation.elts) 222 | return f"({elements})" 223 | else: 224 | return "?" 225 | 226 | 227 | def read_file(path): 228 | encoding_pattern = r"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)" 229 | default_encoding = "utf-8" 230 | 231 | with open(path, "r", encoding=default_encoding, errors="ignore") as f: 232 | first_512_bytes = f.read(512) 233 | first_two_lines = "\n".join(first_512_bytes.split("\n")[:2]) 234 | 235 | match = re.search(encoding_pattern, first_two_lines, re.MULTILINE) 236 | if match: 237 | encoding = match.group(1) 238 | else: 239 | encoding = default_encoding 240 | 241 | try: 242 | with codecs.open(path, "r", encoding=encoding) as f: 243 | content = f.read() 244 | except LookupError: 245 | # If the detected encoding is not valid, try again with utf-8 246 | with codecs.open(path, "r", encoding=default_encoding) as f: 247 | content = f.read() 248 | 249 | return content 250 | 251 | 252 | @dataclass 253 | class TypeSummary: 254 | fully: bool 255 | partially: bool 256 | 257 | 258 | def type_summary(node: AST) -> Optional[TypeSummary]: 259 | if not isinstance(node, (FunctionDef, AsyncFunctionDef)): 260 | return None 261 | all_args = [ 262 | *node.args.posonlyargs, 263 | *node.args.args, 264 | *node.args.kwonlyargs, 265 | ] 266 | num_arguments = len(all_args) 267 | has_untyped_self = False 268 | typed_args = [] 269 | first = True 270 | for arg in all_args: 271 | # Special case if the first argument is self - note that we do not 272 | # check that we are a class method but ideally we would do that 273 | if first and arg.arg == "self": 274 | has_untyped_self = True 275 | continue 276 | if arg.annotation: 277 | typed_args.append(arg) 278 | first = False 279 | 280 | return_is_typed = bool(node.returns) 281 | 282 | partially = len(typed_args) > 0 or return_is_typed 283 | fully = False 284 | if len(typed_args) == num_arguments and return_is_typed: 285 | fully = True 286 | # Something is fully typed if either EVERY arg is typed 287 | # or all arguments except for the untyped self are typed 288 | if has_untyped_self and len(typed_args) == num_arguments - 1 and return_is_typed: 289 | fully = True 290 | # Another special case: __init__() doesn't need a return type 291 | if node.name == "__init__": 292 | if (has_untyped_self and len(typed_args) == num_arguments - 1) or len( 293 | typed_args 294 | ) == num_arguments: 295 | # Doesn't matter if we have a return type 296 | fully = True 297 | 298 | return TypeSummary( 299 | fully=fully, 300 | partially=partially, 301 | ) 302 | 303 | 304 | def quoted_string(s): 305 | if "\n" in s: 306 | # Escape triple double quotes 307 | s = s.replace('"""', '\\"\\"\\"') 308 | return f'"""{s}"""' 309 | else: 310 | # Escape double quotes 311 | s = s.replace('"', '\\"') 312 | return f'"{s}"' 313 | 314 | 315 | def import_line_for_function( 316 | function_name: str, filepath: str, possible_root_dirs: List[str] 317 | ) -> str: 318 | """ 319 | Returns eg 'from foo.bar import baz' if filepath is /Users/dev/foo/bar.py 320 | and function_name is baz and possible_root_dirs is a list that contains 321 | /Users/dev 322 | """ 323 | filepath = Path(filepath).resolve() 324 | filename_without_extension = filepath.stem 325 | 326 | # Check for matches in possible_root_dirs 327 | for root_dir in possible_root_dirs: 328 | root_dir = Path(root_dir).resolve() 329 | try: 330 | relative_path = filepath.relative_to(root_dir) 331 | # Convert path separators to dots and assemble import line 332 | import_path = ".".join( 333 | relative_path.parts[:-1] + (filename_without_extension,) 334 | ) 335 | return f"from {import_path} import {function_name}" 336 | except ValueError: 337 | # If ValueError is raised, the filepath is not under the root_dir 338 | continue 339 | 340 | # If none of the root_dirs matched return a relative import 341 | return f"from .{filename_without_extension} import {function_name}" 342 | -------------------------------------------------------------------------------- /tests/example_symbols.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | 4 | # Function with no arguments 5 | def func_no_args(): 6 | "This has a single line docstring" 7 | pass 8 | 9 | 10 | # Function with positional arguments 11 | def func_positional_args(a, b, c): 12 | """This has a 13 | multi-line docstring""" 14 | pass 15 | 16 | 17 | # Async function 18 | async def async_func(a, b, c): 19 | pass 20 | 21 | 22 | # Function with default arguments 23 | def func_default_args(a, b=2, c=3): 24 | pass 25 | 26 | 27 | # Function with arbitrary number of positional arguments 28 | def func_arbitrary_positional_args(*args): 29 | pass 30 | 31 | 32 | # Function with arbitrary number of keyword arguments 33 | def func_arbitrary_keyword_args(**kwargs): 34 | pass 35 | 36 | 37 | # Function with both arbitrary positional and keyword arguments 38 | def func_arbitrary_args(*args, **kwargs): 39 | pass 40 | 41 | 42 | # Function with positional-only arguments (Python 3.8 and above) 43 | def func_positional_only_args(a, /, b, c): 44 | pass 45 | 46 | 47 | # Function with keyword-only arguments 48 | def func_keyword_only_args(a, *, b, c): 49 | pass 50 | 51 | 52 | # Function with type annotations (Python 3.5 and above) 53 | def func_type_annotations(a: int, b: str) -> bool: 54 | pass 55 | 56 | 57 | # Class with no base classes 58 | class ClassNoBase: 59 | pass 60 | 61 | 62 | # Class with a single base class 63 | class ClassSingleBase(int): 64 | pass 65 | 66 | 67 | # Class with multiple base classes 68 | class ClassMultipleBase(int, str): 69 | pass 70 | 71 | 72 | # Class with a metaclass 73 | class ClassWithMeta(metaclass=type): 74 | pass 75 | 76 | 77 | # Class with methods 78 | class ClassWithMethods: 79 | def __init__(self, a): 80 | pass 81 | 82 | def method_types(self, b: int) -> bool: 83 | return True 84 | 85 | def method_positional_only_args(a, /, b, c): 86 | pass 87 | 88 | def method_keyword_only_args(a, *, b, c): 89 | pass 90 | 91 | async def async_method(a, b, c): 92 | pass 93 | 94 | 95 | # Borrowed from Jedi 96 | # https://github.com/simonw/symbex/issues/16 97 | def function_with_non_pep_0484_annotation( 98 | x: "I can put anything here", 99 | xx: "", 100 | yy: "\r\n\0;+*&^564835(---^&*34", 101 | y: 3 + 3, 102 | zz: float, 103 | ) -> int("42"): 104 | pass 105 | 106 | 107 | def complex_annotations( 108 | code: str, symbols: Iterable[str] 109 | ) -> List[Tuple[AST, Optional[str]]]: 110 | pass 111 | 112 | 113 | # For testing --typed/--untyped/etc 114 | 115 | 116 | def func_fully_typed(a: int, b: str) -> bool: 117 | pass 118 | 119 | 120 | async def async_func_fully_typed(a: int, b: str) -> bool: 121 | pass 122 | 123 | 124 | def func_partially_typed(a: int, b) -> bool: 125 | pass 126 | 127 | 128 | def func_partially_typed_no_typed_return(a: int, b: int): 129 | pass 130 | 131 | 132 | def func_partially_typed_only_typed_return(a, b) -> int: 133 | pass 134 | 135 | 136 | def func_typed_no_params() -> None: 137 | pass 138 | 139 | 140 | def _private() -> None: 141 | pass 142 | 143 | 144 | class ClassForTypedTests: 145 | def __init__(self, a: int): 146 | pass 147 | 148 | def method_fully_typed(self, a: int, b: str) -> bool: 149 | "Single line" 150 | pass 151 | 152 | def method_partially_typed(self, a: int, b) -> bool: 153 | """Multiple 154 | lines""" 155 | pass 156 | 157 | def method_untyped(self, a, b): 158 | pass 159 | 160 | def _private_method(self): 161 | pass 162 | 163 | 164 | class _PrivateClass: 165 | pass 166 | -------------------------------------------------------------------------------- /tests/replace_tests.yaml: -------------------------------------------------------------------------------- 1 | - original: | 2 | def one(): 3 | pass 4 | 5 | def two(): 6 | "Two" 7 | stdin: | 8 | def two(): pass 9 | args: ["two", "--replace"] 10 | expected: | 11 | def one(): 12 | pass 13 | 14 | def two(): pass 15 | 16 | - original: | 17 | import os 18 | 19 | @decorated 20 | def one(): 21 | "This has multiple lines and a decorator" 22 | return 1 + 2 23 | 24 | def two(): 25 | "Two" 26 | stdin: | 27 | def one(): 28 | # No decorator now, but multiple lines 29 | a = 1 + 2 30 | return a * 3 31 | args: ["one", "--replace"] 32 | expected: | 33 | import os 34 | 35 | def one(): 36 | # No decorator now, but multiple lines 37 | a = 1 + 2 38 | return a * 3 39 | 40 | 41 | def two(): 42 | "Two" 43 | -------------------------------------------------------------------------------- /tests/test_filters.py: -------------------------------------------------------------------------------- 1 | # Tests for "symbex --async / --class / --typed etc" 2 | import pathlib 3 | import pytest 4 | from click.testing import CliRunner 5 | 6 | from symbex.cli import cli 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "args,expected", 11 | ( 12 | ( 13 | ["--function"], 14 | [ 15 | "def func_no_args", 16 | "def func_positional_args", 17 | "async def async_func", 18 | "def func_default_args", 19 | "def func_arbitrary_positional_args", 20 | "def func_arbitrary_keyword_args", 21 | "def func_arbitrary_args", 22 | "def func_positional_only_args", 23 | "def func_keyword_only_args", 24 | "def func_type_annotations", 25 | "def function_with_non_pep_0484_annotation", 26 | "def complex_annotations", 27 | "def func_fully_typed", 28 | "async def async_func_fully_typed", 29 | "def func_partially_typed", 30 | "def func_partially_typed_no_typed_return", 31 | "def func_partially_typed_only_typed_return", 32 | "def func_typed_no_params", 33 | "def _private", 34 | ], 35 | ), 36 | ( 37 | ["--class"], 38 | [ 39 | "class ClassNoBase", 40 | "class ClassSingleBase", 41 | "class ClassMultipleBase", 42 | "class ClassWithMeta", 43 | "class ClassWithMethods", 44 | "class ClassForTypedTests", 45 | "class _PrivateClass", 46 | ], 47 | ), 48 | ( 49 | ["--async"], 50 | [ 51 | "async def async_func", 52 | "async def async_func_fully_typed", 53 | ], 54 | ), 55 | ( 56 | ["--unasync"], 57 | [ 58 | "def func_no_args", 59 | "def func_positional_args", 60 | "def func_default_args", 61 | "def func_arbitrary_positional_args", 62 | "def func_arbitrary_keyword_args", 63 | "def func_arbitrary_args", 64 | "def func_positional_only_args", 65 | "def func_keyword_only_args", 66 | "def func_type_annotations", 67 | "def function_with_non_pep_0484_annotation", 68 | "def complex_annotations", 69 | "def func_fully_typed", 70 | "def func_partially_typed", 71 | "def func_partially_typed_no_typed_return", 72 | "def func_partially_typed_only_typed_return", 73 | "def func_typed_no_params", 74 | "def _private", 75 | ], 76 | ), 77 | # This doesn't make sense, so should return [] 78 | ( 79 | ["--async", "--class"], 80 | [], 81 | ), 82 | # Various typing options 83 | ( 84 | ["--typed"], 85 | [ 86 | "def func_type_annotations", 87 | "def function_with_non_pep_0484_annotation", 88 | "def complex_annotations", 89 | "def func_fully_typed", 90 | "async def async_func_fully_typed", 91 | "def func_partially_typed", 92 | "def func_partially_typed_no_typed_return", 93 | "def func_partially_typed_only_typed_return", 94 | "def func_typed_no_params", 95 | "def _private", 96 | ], 97 | ), 98 | ( 99 | ["--typed", "--async"], 100 | [ 101 | "async def async_func_fully_typed", 102 | ], 103 | ), 104 | ( 105 | ["--untyped"], 106 | [ 107 | "def func_no_args", 108 | "def func_positional_args", 109 | "async def async_func", 110 | "def func_default_args", 111 | "def func_arbitrary_positional_args", 112 | "def func_arbitrary_keyword_args", 113 | "def func_arbitrary_args", 114 | "def func_positional_only_args", 115 | "def func_keyword_only_args", 116 | ], 117 | ), 118 | ( 119 | ["--partially-typed"], 120 | [ 121 | "def func_partially_typed", 122 | "def func_partially_typed_no_typed_return", 123 | "def func_partially_typed_only_typed_return", 124 | ], 125 | ), 126 | ( 127 | ["--fully-typed"], 128 | [ 129 | "def func_type_annotations", 130 | "def function_with_non_pep_0484_annotation", 131 | "def complex_annotations", 132 | "def func_fully_typed", 133 | "async def async_func_fully_typed", 134 | "def func_typed_no_params", 135 | "def _private", 136 | ], 137 | ), 138 | # Test against methods 139 | ( 140 | ["--typed", "*.*"], 141 | [ 142 | "def method_types", 143 | "def __init__", 144 | "def method_fully_typed", 145 | "def method_partially_typed", 146 | ], 147 | ), 148 | ( 149 | ["--untyped", "*.*"], 150 | [ 151 | "def __init__", 152 | "def method_positional_only_args", 153 | "def method_keyword_only_args", 154 | "async def async_method", 155 | "def method_untyped", 156 | "def _private_method", 157 | ], 158 | ), 159 | ( 160 | ["--fully-typed", "*.*"], 161 | ["def method_types", "def __init__", "def method_fully_typed"], 162 | ), 163 | ( 164 | ["--fully-typed", "--no-init", "*.*"], 165 | [ 166 | "def method_types", 167 | "def method_fully_typed", 168 | ], 169 | ), 170 | ( 171 | ["--partially-typed", "*.*"], 172 | ["def method_partially_typed"], 173 | ), 174 | # Documented and undocumented 175 | ( 176 | ["--documented"], 177 | [ 178 | "def func_no_args", 179 | "def func_positional_args", 180 | ], 181 | ), 182 | ( 183 | ["--undocumented", "func_arbitrary_*"], 184 | [ 185 | "def func_arbitrary_positional_args", 186 | "def func_arbitrary_keyword_args", 187 | "def func_arbitrary_args", 188 | ], 189 | ), 190 | ( 191 | ["--documented", "*.*"], 192 | [ 193 | "def method_fully_typed", 194 | "def method_partially_typed", 195 | ], 196 | ), 197 | ( 198 | ["--undocumented", "*.method_*"], 199 | [ 200 | "def method_types", 201 | "def method_positional_only_args", 202 | "def method_keyword_only_args", 203 | "def method_untyped", 204 | ], 205 | ), 206 | # Private and public and dunder 207 | ( 208 | ["--public"], 209 | [ 210 | "def func_no_args", 211 | "def func_positional_args", 212 | "async def async_func", 213 | "def func_default_args", 214 | "def func_arbitrary_positional_args", 215 | "def func_arbitrary_keyword_args", 216 | "def func_arbitrary_args", 217 | "def func_positional_only_args", 218 | "def func_keyword_only_args", 219 | "def func_type_annotations", 220 | "class ClassNoBase", 221 | "class ClassSingleBase", 222 | "class ClassMultipleBase", 223 | "class ClassWithMeta", 224 | "class ClassWithMethods", 225 | "def function_with_non_pep_0484_annotation", 226 | "def complex_annotations", 227 | "def func_fully_typed", 228 | "async def async_func_fully_typed", 229 | "def func_partially_typed", 230 | "def func_partially_typed_no_typed_return", 231 | "def func_partially_typed_only_typed_return", 232 | "def func_typed_no_params", 233 | "class ClassForTypedTests", 234 | ], 235 | ), 236 | ( 237 | ["--public", "*.*"], 238 | [ 239 | "def __init__", 240 | "def method_types", 241 | "def method_positional_only_args", 242 | "def method_keyword_only_args", 243 | "async def async_method", 244 | "def __init__", 245 | "def method_fully_typed", 246 | "def method_partially_typed", 247 | "def method_untyped", 248 | ], 249 | ), 250 | (["--private", "*.*"], ["def _private_method"]), 251 | ( 252 | ["--public", "--class"], 253 | [ 254 | "class ClassNoBase", 255 | "class ClassSingleBase", 256 | "class ClassMultipleBase", 257 | "class ClassWithMeta", 258 | "class ClassWithMethods", 259 | "class ClassForTypedTests", 260 | ], 261 | ), 262 | ( 263 | ["--private", "--class"], 264 | [ 265 | "class _PrivateClass", 266 | ], 267 | ), 268 | (["--private"], ["def _private", "class _PrivateClass"]), 269 | (["--dunder", "*.*"], ["def __init__", "def __init__"]), 270 | ), 271 | ) 272 | def test_filters(args, expected): 273 | runner = CliRunner() 274 | full_args = args + [ 275 | "-s", 276 | "-f", 277 | str(pathlib.Path(__file__).parent / "example_symbols.py"), 278 | ] 279 | result = runner.invoke( 280 | cli, 281 | full_args, 282 | catch_exceptions=False, 283 | ) 284 | assert result.exit_code == 0 285 | # Remove # File: lines and blank lines 286 | lines = [ 287 | line.strip() 288 | for line in result.stdout.splitlines() 289 | if line.strip() and not line.startswith("# File:") 290 | ] 291 | # We only match up to the opening "(" or ":" 292 | defs = [line.split("(")[0].split(":")[0] for line in lines] 293 | assert defs == expected 294 | 295 | # Test the --count option too 296 | expected_count = len(expected) 297 | result2 = runner.invoke( 298 | cli, 299 | full_args + ["--count"], 300 | catch_exceptions=False, 301 | ) 302 | assert result2.exit_code == 0 303 | assert result2.stdout.strip() == str(expected_count) 304 | 305 | # And the --check option 306 | result3 = runner.invoke( 307 | cli, 308 | full_args + ["--check"], 309 | catch_exceptions=False, 310 | ) 311 | if expected: 312 | assert result3.exit_code == 1 313 | else: 314 | assert result3.exit_code == 0 315 | -------------------------------------------------------------------------------- /tests/test_imports.py: -------------------------------------------------------------------------------- 1 | # Tests for "symbex --imports --sys-path ..." 2 | import pathlib 3 | import pytest 4 | from click.testing import CliRunner 5 | 6 | from symbex.cli import cli 7 | 8 | 9 | @pytest.fixture 10 | def imports_dir(tmpdir): 11 | for path, content in ( 12 | ("one/foo.py", "def foo1():\n pass"), 13 | ("one/bar.py", "def bar1():\n pass"), 14 | ("two/foo.py", "def foo2():\n pass"), 15 | ("two/bar.py", "def bar2():\n pass"), 16 | ("deep/nested/three/foo.py", "def foo3():\n pass"), 17 | ( 18 | "deep/nested/three/bar.py", 19 | ("class Bar3:\n" " def __init__(self):\n" " pass"), 20 | ), 21 | ): 22 | p = pathlib.Path(tmpdir / path) 23 | p.parent.mkdir(parents=True, exist_ok=True) 24 | p.write_text(content, "utf-8") 25 | return tmpdir 26 | 27 | 28 | @pytest.mark.parametrize( 29 | "args,sys_path,expected", 30 | ( 31 | (["foo1"], None, "from one.foo import foo1"), 32 | (["foo2"], None, "from two.foo import foo2"), 33 | (["foo1"], "one/", "from foo import foo1"), 34 | # This should force a relative import: 35 | (["foo2"], "one/", "from .foo import foo2"), 36 | # Various deep nested examples 37 | (["foo3"], None, "from deep.nested.three.foo import foo3"), 38 | (["Bar3"], None, "from deep.nested.three.bar import Bar3"), 39 | (["foo3"], "deep/nested", "from three.foo import foo3"), 40 | # Test display of methods 41 | (["Bar3.*"], "deep/nested", "from three.bar import Bar3"), 42 | ), 43 | ) 44 | def test_imports(args, sys_path, expected, imports_dir): 45 | runner = CliRunner() 46 | args = ["-in", "-d", str(imports_dir)] + args 47 | if sys_path: 48 | args.extend(("--sys-path", str(imports_dir / sys_path))) 49 | result = runner.invoke(cli, args, catch_exceptions=False) 50 | assert result.exit_code == 0 51 | import_line = [ 52 | line[2:] for line in result.stdout.split("\n") if line.startswith("# from") 53 | ][0] 54 | assert import_line == expected 55 | -------------------------------------------------------------------------------- /tests/test_output.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from click.testing import CliRunner 3 | from symbex.cli import cli 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "extra_args,expected,expected_error", 8 | ( 9 | (["--json"], '[{"id": "symbex.py:1", "code": "def blah():"}]\n', None), 10 | (["--csv"], "id,code\nsymbex.py:1,def blah():\n", None), 11 | (["--tsv"], "id\tcode\nsymbex.py:1\tdef blah():\n", None), 12 | (["--nl"], '{"id": "symbex.py:1", "code": "def blah():"}\n', None), 13 | # ID prefix 14 | ( 15 | ["--nl", "--id-prefix", "foo:"], 16 | '{"id": "foo:symbex.py:1", "code": "def blah():"}\n', 17 | None, 18 | ), 19 | # Error states 20 | ( 21 | ["--json", "--csv"], 22 | None, 23 | "Only one of --csv, --tsv, --json, --nl can be used", 24 | ), 25 | ( 26 | ["--id-prefix", "foo:"], 27 | None, 28 | "--id-prefix can only be used with --csv, --tsv, --json or --nl", 29 | ), 30 | ), 31 | ) 32 | def test_output(extra_args, expected, expected_error): 33 | runner = CliRunner() 34 | with runner.isolated_filesystem(): 35 | open("symbex.py", "w").write("def blah():\n pass\n") 36 | result = runner.invoke( 37 | cli, 38 | ["blah", "-s"] + extra_args, 39 | catch_exceptions=False, 40 | ) 41 | if expected_error: 42 | assert result.exit_code != 0 43 | assert expected_error in result.stdout 44 | else: 45 | assert result.exit_code == 0 46 | assert result.output == expected 47 | 48 | 49 | def test_output_class_with_methods(): 50 | runner = CliRunner() 51 | with runner.isolated_filesystem(): 52 | open("symbex.py", "w").write( 53 | "class Foo:\n" 54 | " def bar(self):\n" 55 | " pass\n" 56 | " def baz(self):\n" 57 | " pass\n" 58 | ) 59 | result = runner.invoke( 60 | cli, 61 | ["*", "*.*", "--docs", "--imports", "-n"], 62 | catch_exceptions=False, 63 | ) 64 | assert result.exit_code == 0 65 | assert result.output == ( 66 | "# from symbex import Foo\n" 67 | "class Foo:\n" 68 | "\n" 69 | " def bar(self):\n" 70 | "\n" 71 | " def baz(self):\n" 72 | "\n" 73 | ) 74 | -------------------------------------------------------------------------------- /tests/test_replace.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | from symbex.cli import cli 3 | import pathlib 4 | import pytest 5 | import sys 6 | from unittest.mock import patch 7 | import yaml 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "test", yaml.safe_load(open(pathlib.Path(__file__).parent / "replace_tests.yaml")) 12 | ) 13 | def test_replace(test): 14 | original, stdin, args, expected = ( 15 | test["original"], 16 | test["stdin"], 17 | test["args"], 18 | test["expected"], 19 | ) 20 | runner = CliRunner() 21 | with runner.isolated_filesystem() as root: 22 | path = pathlib.Path(root) / "code.py" 23 | path.write_text(original, "utf-8") 24 | result = runner.invoke(cli, args, input=stdin, catch_exceptions=False) 25 | modified = path.read_text("utf-8") 26 | assert result.exit_code == 0 27 | assert modified.strip() == expected.strip() 28 | 29 | 30 | @pytest.mark.parametrize( 31 | "files,args,error", 32 | ( 33 | ( 34 | {"foo.py": "def bar(): pass"}, 35 | ["baz"], 36 | "Error: --replace only works with a single match, got 0", 37 | ), 38 | ( 39 | {"foo.py": "def bar(): pass", "baz/foo.py": "def bar(): pass"}, 40 | ["bar"], 41 | "Error: --replace only works with a single match, got 2", 42 | ), 43 | ( 44 | {"foo.py": "def bar(): pass"}, 45 | ["bar", "-s"], 46 | "Error: --replace cannot be used with --signatures", 47 | ), 48 | ( 49 | {"foo.py": "def bar(): pass"}, 50 | ["bar"], 51 | "Error: No input for --replace found on stdin", 52 | ), 53 | ), 54 | ) 55 | def test_replace_errors(files, args, error): 56 | runner = CliRunner() 57 | with patch("symbex.cli.sys.stdin.isatty", return_value=True): 58 | with runner.isolated_filesystem() as root: 59 | root = pathlib.Path(root) 60 | for path, code in files.items(): 61 | (root / path).parent.mkdir(parents=True, exist_ok=True) 62 | (root / path).write_text(code, "utf-8") 63 | result = runner.invoke(cli, args + ["--replace"], catch_exceptions=False) 64 | assert result.exit_code == 1 65 | assert result.output.strip() == error 66 | 67 | 68 | INPUT_CODE = """ 69 | def foo(bar): 70 | return 1 + 2 + 3 71 | 72 | class Foo: 73 | def bar(self): 74 | return 1 + 2 + 3 75 | """ 76 | REXEC_EXPECTED = """ 77 | DEF FOO(BAR): 78 | RETURN 1 + 2 + 3 79 | 80 | 81 | class Foo: 82 | def bar(self): 83 | return 1 + 2 + 3 84 | """ 85 | 86 | TO_UPPER = """ 87 | import sys 88 | 89 | print(sys.stdin.read().upper()) 90 | """ 91 | 92 | 93 | def test_replace_rexec(): 94 | runner = CliRunner() 95 | with runner.isolated_filesystem() as root: 96 | path = pathlib.Path(root) / "code.py" 97 | path.write_text(INPUT_CODE, "utf-8") 98 | to_upper = pathlib.Path(root) / "to_upper.py" 99 | to_upper.write_text(TO_UPPER, "utf-8") 100 | long_command = " ".join( 101 | [ 102 | sys.executable, 103 | str(to_upper), 104 | ] 105 | ) 106 | args = ["foo", "--rexec", long_command] 107 | result = runner.invoke(cli, args, catch_exceptions=False) 108 | modified = path.read_text("utf-8") 109 | assert result.exit_code == 0 110 | assert modified.strip() == REXEC_EXPECTED.strip() 111 | 112 | 113 | def test_replace_rexec_error(): 114 | runner = CliRunner() 115 | with runner.isolated_filesystem() as root: 116 | path = pathlib.Path(root) / "code.py" 117 | path.write_text(INPUT_CODE, "utf-8") 118 | to_upper = pathlib.Path(root) / "to_upper.py" 119 | to_upper.write_text("exit(1)", "utf-8") 120 | long_command = " ".join( 121 | [ 122 | sys.executable, 123 | str(to_upper), 124 | ] 125 | ) 126 | args = ["foo", "--rexec", long_command] 127 | result = runner.invoke(cli, args, catch_exceptions=False) 128 | not_modified = path.read_text("utf-8") 129 | assert result.exit_code == 1 130 | assert not_modified.strip() == INPUT_CODE.strip() 131 | -------------------------------------------------------------------------------- /tests/test_symbex.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import pytest 3 | import textwrap 4 | from click.testing import CliRunner 5 | 6 | from symbex.cli import cli 7 | from symbex.lib import read_file, quoted_string 8 | 9 | 10 | def test_no_args_shows_help(): 11 | runner = CliRunner() 12 | result = runner.invoke(cli, catch_exceptions=False) 13 | assert result.exit_code == 0 14 | assert "Usage: cli [OPTIONS]" in result.stdout 15 | 16 | 17 | @pytest.fixture 18 | def directory_full_of_code(tmpdir): 19 | for path, content in ( 20 | ("foo.py", "def foo1():\n pass\n\n@decorated\ndef foo2():\n pass\n\n"), 21 | ("bar.py", "class BarClass:\n pass\n\n"), 22 | ("nested.py/x/baz.py", 'def baz(delimiter=", ", type=str):\n pass\n\n'), 23 | ("nested.py/error.py", "def baz_error()" + "bug:\n pass\n\n"), 24 | ( 25 | "methods.py", 26 | textwrap.dedent( 27 | """ 28 | class MyClass: 29 | def __init__(self, a): 30 | self.a = a 31 | 32 | def method1(self, a=1): 33 | pass 34 | """ 35 | ), 36 | ), 37 | ( 38 | "async.py", 39 | textwrap.dedent( 40 | """ 41 | async def async_func(a, b, c): 42 | pass 43 | 44 | class MyAsyncClass: 45 | async def async_method(a, b, c): 46 | pass 47 | """ 48 | ).strip(), 49 | ), 50 | ): 51 | p = pathlib.Path(tmpdir / path) 52 | p.parent.mkdir(parents=True, exist_ok=True) 53 | p.write_text(content, "utf-8") 54 | return tmpdir 55 | 56 | 57 | @pytest.mark.parametrize( 58 | "args,expected", 59 | ( 60 | (["foo1", "--silent"], "# File: foo.py Line: 1\ndef foo1():\n pass\n\n"), 61 | ( 62 | ["foo*", "--silent"], 63 | "# File: foo.py Line: 1\ndef foo1():\n pass\n\n# File: foo.py Line: 4\n@decorated\ndef foo2():\n pass\n\n", 64 | ), 65 | ( 66 | ["BarClass", "--silent"], 67 | "# File: bar.py Line: 1\nclass BarClass:\n pass\n\n", 68 | ), 69 | ( 70 | ["baz", "--silent"], 71 | '# File: nested.py/x/baz.py Line: 1\ndef baz(delimiter=", ", type=str):\n pass\n\n', 72 | ), 73 | ( 74 | ["async_func", "--silent"], 75 | "# File: async.py Line: 1\nasync def async_func(a, b, c):\n pass\n\n", 76 | ), 77 | # The -f option 78 | ( 79 | ["baz", "-f", "nested.py/x/baz.py", "--silent"], 80 | '# File: nested.py/x/baz.py Line: 1\ndef baz(delimiter=", ", type=str):\n pass\n\n', 81 | ), 82 | # The -d option 83 | ( 84 | ["baz", "-d", "nested.py", "--silent"], 85 | '# File: nested.py/x/baz.py Line: 1\ndef baz(delimiter=", ", type=str):\n pass\n\n', 86 | ), 87 | # The -d option with -x to exclude 88 | ( 89 | ["baz", "-d", "nested.py", "-x", "nested.py/x/", "--silent"], 90 | "", 91 | ), 92 | # -x to exclude top level directory 93 | (["baz", "-x", "nested.py", "--silent"], ""), 94 | # Classes 95 | ( 96 | ["MyClass", "--silent"], 97 | "# File: methods.py Line: 2\n" 98 | "class MyClass:\n" 99 | " def __init__(self, a):\n" 100 | " self.a = a\n" 101 | "\n" 102 | " def method1(self, a=1):\n" 103 | " pass\n" 104 | "\n", 105 | ), 106 | ( 107 | ["MyClass.__init__", "--silent"], 108 | "# File: methods.py Class: MyClass Line: 3\n" 109 | " def __init__(self, a):\n" 110 | " self.a = a\n" 111 | "\n", 112 | ), 113 | ( 114 | ["MyClass.*", "--silent"], 115 | "# File: methods.py Class: MyClass Line: 3\n" 116 | " def __init__(self, a):\n" 117 | " self.a = a\n" 118 | "\n" 119 | "# File: methods.py Class: MyClass Line: 6\n" 120 | " def method1(self, a=1):\n" 121 | " pass\n" 122 | "\n", 123 | ), 124 | ( 125 | ["*.method*", "--silent"], 126 | "# File: methods.py Class: MyClass Line: 6\n" 127 | " def method1(self, a=1):\n" 128 | " pass\n" 129 | "\n", 130 | ), 131 | ( 132 | ["*.async_method", "--silent"], 133 | ( 134 | "# File: async.py Class: MyAsyncClass Line: 5\n" 135 | " async def async_method(a, b, c):\n" 136 | " pass\n" 137 | "\n" 138 | ), 139 | ), 140 | ), 141 | ) 142 | def test_fixture(directory_full_of_code, monkeypatch, args, expected): 143 | runner = CliRunner() 144 | monkeypatch.chdir(directory_full_of_code) 145 | result = runner.invoke(cli, args, catch_exceptions=False) 146 | assert result.exit_code == 0 147 | assert result.stdout == expected 148 | 149 | 150 | @pytest.mark.parametrize( 151 | "args,expected", 152 | ( 153 | ( 154 | ["foo*", "--silent"], 155 | "# File: foo.py Line: 1\n" 156 | "def foo1():\n" 157 | "\n" 158 | "# File: foo.py Line: 5\n" 159 | "def foo2():", 160 | ), 161 | (["BarClass", "--silent"], "# File: bar.py Line: 1\n" "class BarClass:"), 162 | ( 163 | ["baz", "--silent"], 164 | ( 165 | "# File: nested.py/x/baz.py Line: 1\n" 166 | 'def baz(delimiter=", ", type=str):' 167 | ), 168 | ), 169 | # Tests for the --module option 170 | ( 171 | ["-m", "contextlib", "suppress", "--silent", "-sn"], 172 | ("class suppress(AbstractContextManager):"), 173 | ), 174 | ), 175 | ) 176 | def test_symbex_symbols(directory_full_of_code, monkeypatch, args, expected): 177 | runner = CliRunner() 178 | monkeypatch.chdir(directory_full_of_code) 179 | result = runner.invoke(cli, args + ["-s"], catch_exceptions=False) 180 | assert result.exit_code == 0 181 | # Here expected is just the first two lines 182 | assert result.stdout.strip() == expected 183 | 184 | 185 | def test_errors(directory_full_of_code, monkeypatch): 186 | # Test without --silent to see errors 187 | runner = CliRunner(mix_stderr=False) 188 | monkeypatch.chdir(directory_full_of_code) 189 | result = runner.invoke(cli, ["baz"], catch_exceptions=False) 190 | assert result.exit_code == 0 191 | expected = ( 192 | "# File: nested.py/x/baz.py Line: 1\n" 193 | 'def baz(delimiter=", ", type=str):\n' 194 | " pass\n\n" 195 | ) 196 | assert result.stdout == expected 197 | # This differs between different Python versions 198 | assert result.stderr.startswith("# Syntax error in nested.py/error.py:") 199 | 200 | 201 | def test_read_file_with_encoding(tmpdir): 202 | # https://github.com/simonw/symbex/issues/18#issuecomment-1597546242 203 | path = tmpdir / "encoded.py" 204 | path.write_binary( 205 | b"# coding: iso-8859-5\n# (Unlikely to be the default encoding for most testers.)\n" 206 | b"# \xb1\xb6\xff\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef <- Cyrillic characters\n" 207 | b'u = "\xae\xe2\xf0\xc4"\n' 208 | ) 209 | text = read_file(path) 210 | assert text == ( 211 | "# coding: iso-8859-5\n" 212 | "# (Unlikely to be the default encoding for most testers.)\n" 213 | "# БЖџрстуфхцчшщъыьэюя <- Cyrillic characters\n" 214 | 'u = "Ўт№Ф"\n' 215 | ) 216 | 217 | 218 | def test_quoted_string(): 219 | # Single line, no quotes 220 | assert quoted_string("Hello, World!") == '"Hello, World!"' 221 | 222 | # Single line, with quotes 223 | assert quoted_string('Hello, "World"!') == '"Hello, \\"World\\"!"' 224 | 225 | # Multiline, no quotes 226 | multiline_str = "Hello,\nWorld!" 227 | expected_result = '"""Hello,\nWorld!"""' 228 | assert quoted_string(multiline_str) == expected_result 229 | 230 | # Multiline, with triple quotes 231 | multiline_str = '''Hello, 232 | "World", 233 | Here are some triple quotes: """ ''' 234 | expected_multiline_result = ( 235 | '"""Hello,\n"World",\nHere are some triple quotes: \\"\\"\\" """' 236 | ) 237 | quoted_multiline_result = quoted_string(multiline_str) 238 | assert quoted_multiline_result == expected_multiline_result 239 | 240 | # Empty string 241 | assert quoted_string("") == '""' 242 | -------------------------------------------------------------------------------- /tests/test_symbols.py: -------------------------------------------------------------------------------- 1 | # Tests for "symbex -s", using content of example_code.py 2 | import pathlib 3 | import pytest 4 | import re 5 | from click.testing import CliRunner 6 | 7 | from symbex.cli import cli 8 | 9 | 10 | @pytest.fixture 11 | def symbols_text(): 12 | runner = CliRunner() 13 | args = ["-s", "-f", str(pathlib.Path(__file__).parent / "example_symbols.py")] 14 | result = runner.invoke(cli, args, catch_exceptions=False) 15 | assert result.exit_code == 0 16 | return result.stdout 17 | 18 | 19 | @pytest.mark.parametrize( 20 | "name,expected", 21 | ( 22 | ("func_no_args", "def func_no_args()"), 23 | ("func_positional_args", "def func_positional_args(a, b, c)"), 24 | ("async_func", "async def async_func(a, b, c)"), 25 | ("func_default_args", "def func_default_args(a, b=2, c=3)"), 26 | ("func_arbitrary_positional_args", "def func_arbitrary_positional_args(*args)"), 27 | ("func_arbitrary_keyword_args", "def func_arbitrary_keyword_args(**kwargs)"), 28 | ("func_arbitrary_args", "def func_arbitrary_args(*args, **kwargs)"), 29 | ("func_positional_only_args", "def func_positional_only_args(a, /, b, c)"), 30 | ("func_keyword_only_args", "def func_keyword_only_args(a, *, b, c)"), 31 | ("func_type_annotations", "def func_type_annotations(a: int, b: str) -> bool"), 32 | ("ClassNoBase", "class ClassNoBase"), 33 | ("ClassSingleBase", "class ClassSingleBase(int)"), 34 | ("ClassMultipleBase", "class ClassMultipleBase(int, str)"), 35 | ("ClassWithMeta", "class ClassWithMeta(metaclass=type)"), 36 | ( 37 | "function_with_non_pep_0484_annotation", 38 | "def function_with_non_pep_0484_annotation(x: ?, xx: ?, yy: ?, y: ?, zz: float) -> ?", 39 | ), 40 | ( 41 | "complex_annotations", 42 | "complex_annotations(code: str, symbols: Iterable[str]) -> List[Tuple[(AST, Optional[str])]]", 43 | ), 44 | ), 45 | ) 46 | def test_symbols(name, expected, symbols_text): 47 | # For error reporting try and find the relevant bit 48 | likely_line = [ 49 | line 50 | for line in symbols_text.split("\n") 51 | if (f"{name}(" in line or line.startswith(f"class {name}")) 52 | ][0] 53 | assert expected in symbols_text, "\nexpected:\t{}\ngot:\t\t{}".format( 54 | expected, likely_line 55 | ) 56 | # Special case to ensure we don't get ClassNoBase() 57 | assert "ClassNoBase()" not in symbols_text 58 | 59 | 60 | def test_method_symbols(): 61 | runner = CliRunner() 62 | args = [ 63 | "*.async*", 64 | "-s", 65 | "-f", 66 | str(pathlib.Path(__file__).parent / "example_symbols.py"), 67 | ] 68 | result = runner.invoke(cli, args, catch_exceptions=False) 69 | assert result.exit_code == 0 70 | assert result.stdout == ( 71 | "# File: tests/example_symbols.py Class: ClassWithMethods Line: 91\n" 72 | " async def async_method(a, b, c):\n" 73 | "\n" 74 | ) 75 | 76 | 77 | def test_docstrings(): 78 | runner = CliRunner() 79 | args = [ 80 | "*.*", 81 | "*", 82 | "--documented", 83 | "--docstrings", 84 | "-f", 85 | str(pathlib.Path(__file__).parent / "example_symbols.py"), 86 | ] 87 | result = runner.invoke(cli, args, catch_exceptions=False) 88 | assert result.exit_code == 0 89 | expected = """ 90 | # File: tests/example_symbols.py Line: X 91 | def func_no_args(): 92 | "This has a single line docstring" 93 | 94 | # File: tests/example_symbols.py Line: X 95 | def func_positional_args(a, b, c): 96 | \"\"\"This has a 97 | multi-line docstring\"\"\" 98 | 99 | # File: tests/example_symbols.py Class: ClassForTypedTests Line: X 100 | def method_fully_typed(self, a: int, b: str) -> bool: 101 | "Single line" 102 | 103 | # File: tests/example_symbols.py Class: ClassForTypedTests Line: X 104 | def method_partially_typed(self, a: int, b) -> bool: 105 | \"\"\"Multiple 106 | lines\"\"\" 107 | """.strip() 108 | actual = result.stdout.strip() 109 | # Replace 'Line \d' with 'Line X' before comparison using re 110 | actual = re.sub(r"Line: \d+", "Line: X", actual) 111 | assert actual == expected 112 | 113 | 114 | @pytest.mark.parametrize("no_file", (False, True)) 115 | def test_imports(no_file): 116 | runner = CliRunner() 117 | args = [ 118 | "func_arbitrary*", 119 | "--imports", 120 | "-s", 121 | "-d", 122 | str(pathlib.Path(__file__).parent), 123 | ] + (["--no-file"] if no_file else []) 124 | result = runner.invoke(cli, args, catch_exceptions=False) 125 | assert result.exit_code == 0 126 | expected = """ 127 | # File: tests/example_symbols.py Line: 28 128 | # from example_symbols import func_arbitrary_positional_args 129 | def func_arbitrary_positional_args(*args): 130 | 131 | # File: tests/example_symbols.py Line: 33 132 | # from example_symbols import func_arbitrary_keyword_args 133 | def func_arbitrary_keyword_args(**kwargs): 134 | 135 | # File: tests/example_symbols.py Line: 38 136 | # from example_symbols import func_arbitrary_args 137 | def func_arbitrary_args(*args, **kwargs): 138 | """.strip() 139 | if no_file: 140 | lines = expected.split("\n") 141 | lines = [line for line in lines if not line.startswith("# File: ")] 142 | expected = "\n".join(lines) 143 | assert result.output.strip() == expected 144 | --------------------------------------------------------------------------------