├── .editorconfig
├── .flake8
├── .gitattributes
├── .github
├── ISSUE_TEMPLATE
│ ├── bug.yml
│ └── config.yml
└── workflows
│ └── ci.yml
├── .gitignore
├── LICENSE
├── README.md
├── __pypackages__
└── .gitignore
├── aaa_mingshe.pth
├── compile.html
├── docs
├── custom.en.md
├── custom.md
├── img
│ └── use-this-template.png
├── index.en.md
├── index.md
├── syntax
│ ├── conditional.en.md
│ ├── conditional.md
│ ├── nullish-coalescing.en.md
│ ├── nullish-coalescing.md
│ ├── optional-chaining.en.md
│ ├── optional-chaining.md
│ ├── partial.en.md
│ ├── partial.md
│ ├── pipeline.en.md
│ ├── pipeline.md
│ ├── unpack-mapping.en.md
│ └── unpack-mapping.md
├── usage.en.md
└── usage.md
├── mingshe.gram
├── mingshe
├── __init__.py
├── __main__.py
├── __version__.py
├── _vendor
│ ├── README.md
│ └── pegen
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── build.py
│ │ ├── first_sets.py
│ │ ├── grammar.py
│ │ ├── grammar_parser.py
│ │ ├── grammar_visualizer.py
│ │ ├── metagrammar.gram
│ │ ├── parser.py
│ │ ├── parser_generator.py
│ │ ├── python_generator.py
│ │ ├── sccutils.py
│ │ ├── templates
│ │ └── index.html
│ │ ├── tokenizer.py
│ │ ├── validator.py
│ │ └── web.py
├── commands.py
├── core.py
├── importlib.py
├── parser.py
└── utils.py
├── mkdocs.yml
├── pdm.lock
├── pyproject.toml
├── script
├── check.py
├── generate.py
├── lint.py
└── upload.she
└── tests
├── __init__.py
├── for_test_importlib
├── a.she
├── p
│ └── __init__.she
└── t
│ └── b.she
├── python_parser
├── __init__.py
├── conftest.py
├── data
│ ├── advanced_decorators.py
│ ├── assignment.py
│ ├── async.py
│ ├── call.py
│ ├── classes.py
│ ├── comprehensions.py
│ ├── expressions.py
│ ├── function_def.py
│ ├── imports.py
│ ├── lambdas.py
│ ├── multi_statement_per_line.py
│ ├── no_newline_at_end_of_file.py
│ ├── no_newline_at_end_of_file_with_comment.py
│ ├── pattern_matching.py
│ ├── simple_decorators.py
│ ├── statements.py
│ ├── type_comment.py
│ └── with_statement_multi_items.py
├── parser_cache
│ ├── .gitignore
│ └── README
├── test_ast_parsing.py
├── test_syntax_error_handling.py
├── test_unsupported_syntax.py
└── utils.py
├── test_core.py
├── test_importlib.py
└── test_version.py
/.editorconfig:
--------------------------------------------------------------------------------
1 | # EditorConfig is awesome: https://EditorConfig.org
2 |
3 | # top-most EditorConfig file
4 | root = true
5 |
6 | [*]
7 | end_of_line = crlf
8 | insert_final_newline = true
9 | charset = utf-8
10 |
11 | [*.py]
12 | indent_style = space
13 | indent_size = 4
14 |
15 | [*.{yml,yaml}]
16 | indent_style = space
17 | indent_size = 2
18 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = W503,E203,E501,E731
3 | exclude = mingshe/_vendor,mingshe/parser.py,tests/python_parser
4 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.yml:
--------------------------------------------------------------------------------
1 | name: Bug report
2 | description: Report a bug to help improve this project
3 | body:
4 | - type: checkboxes
5 | id: checks
6 | attributes:
7 | label: Checklist
8 | description: Please make sure you check all these items before submitting your bug report.
9 | options:
10 | - label: The bug is reproducible against the latest release or `master`.
11 | required: true
12 | - label: There are no similar issues or pull requests to fix it yet.
13 | required: true
14 | - type: textarea
15 | id: describe
16 | attributes:
17 | label: Describe the bug
18 | description: A clear and concise description of what the bug is.
19 | validations:
20 | required: true
21 | - type: textarea
22 | id: steps
23 | attributes:
24 | label: Steps to reproduce the bug
25 | description: |
26 | Provide a *minimal* example with steps to reproduce the bug locally.
27 | NOTE: try to keep any external dependencies *at an absolute minimum*.
28 | In other words, remove anything that doesn't make the bug go away.
29 | validations:
30 | required: false
31 | - type: textarea
32 | id: expected
33 | attributes:
34 | label: Expected behavior
35 | description: A clear and concise description of what you expected to happen.
36 | validations:
37 | required: false
38 | - type: textarea
39 | id: actual
40 | attributes:
41 | label: Actual behavior
42 | description: A clear and concise description of what actually happened.
43 | validations:
44 | required: false
45 | - type: textarea
46 | id: environment
47 | attributes:
48 | label: Environment
49 | description: Describe your environment.
50 | placeholder: OS / Python / mingshe version
51 | validations:
52 | required: true
53 | - type: textarea
54 | id: additional
55 | attributes:
56 | label: Additional context
57 | description: |
58 | Any additional information that can help understanding the problem.
59 | Eg. linked issues, or a description of what you were trying to achieve.
60 | validations:
61 | required: false
62 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | # Ref: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository#configuring-the-template-chooser
2 | blank_issues_enabled: true
3 | contact_links:
4 | - name: Question
5 | url: https://github.com/abersheeran/mingshe/discussions
6 | about: Ask a question
7 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI/CD
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | tags:
8 | - "*"
9 | paths:
10 | - ".github/workflows/ci.yml"
11 | - "**.py"
12 | - "pdm.lock"
13 | - "!mingshe/__version__.py"
14 | pull_request:
15 | branches:
16 | - master
17 | paths:
18 | - ".github/workflows/ci.yml"
19 | - "**.py"
20 | - "pdm.lock"
21 | - "!mingshe/__version__.py"
22 |
23 | jobs:
24 | tests:
25 | name: "Python ${{ matrix.python-version }} ${{ matrix.os }}"
26 | runs-on: "${{ matrix.os }}"
27 | strategy:
28 | matrix:
29 | python-version: [3.9, "3.10"]
30 | os: [windows-latest, ubuntu-latest, macos-latest]
31 |
32 | steps:
33 | - uses: actions/checkout@v2
34 | - uses: pdm-project/setup-pdm@v2.5
35 | name: Setup Python and PDM
36 | with:
37 | python-version: ${{ matrix.python-version }}
38 | architecture: x64
39 | version: 1.9.0
40 | enable-pep582: true # Enable PEP 582 package loading globally
41 | - name: Install dependencies
42 | run: |
43 | pdm install -v -dG dev -dG test --no-self
44 | - name: Format and type check
45 | run: |
46 | python script/check.py
47 | - name: Test with pytest
48 | run: |
49 | pdm run pytest tests -o log_cli=true -o log_cli_level=DEBUG
50 |
51 | publish:
52 | needs: tests
53 | if: startsWith(github.ref, 'refs/tags/')
54 |
55 | runs-on: "${{ matrix.os }}"
56 | strategy:
57 | matrix:
58 | python-version: [3.9]
59 | os: [ubuntu-latest]
60 |
61 | steps:
62 | - uses: actions/checkout@v2
63 |
64 | - uses: pdm-project/setup-pdm@v2.1
65 | name: Setup Python and PDM
66 | with:
67 | python-version: ${{ matrix.python-version }}
68 | architecture: x64
69 | version: 1.8.5
70 | enable-pep582: true # Enable PEP 582 package loading globally
71 |
72 | - name: Install dependencies
73 | run: |
74 | python -m pip install -U twine
75 |
76 | - name: Build
77 | run: |
78 | pdm build
79 |
80 | - name: Publish
81 | run: |
82 | twine upload --username __token__ --password ${{ secrets.PYPI_API_TOKEN }} --verbose --skip-existing dist/*
83 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 | .venv/
85 |
86 | # Spyder project settings
87 | .spyderproject
88 |
89 | # Rope project settings
90 | .ropeproject
91 | *.npy
92 | *.pkl
93 |
94 | # mypy
95 | .mypy_cache/
96 |
97 | # VSCode
98 | .vscode/
99 |
100 | # PyCharm
101 | .idea/
102 |
103 | # mkdocs
104 | site/
105 |
106 | # pdm
107 | .pdm.toml
108 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright 2019 AberSheeran
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MíngShé
2 |
3 | A better [Python](https://www.python.org/) superset language. Use [Pegen](https://github.com/we-like-parsers/pegen) to compile the code.
4 |
5 | > “鲜山多金玉,无草木,鲜水出焉,而北流注于伊水。其中多鸣蛇,其状如蛇而四翼,其音如磬,见则其邑大旱”——《山海经》
6 |
7 | - [Documentation](https://mingshe.aber.sh/)
8 | - [English](https://mingshe.aber.sh/en/)
9 |
10 | ## Install
11 |
12 | Need Python3.9+
13 |
14 | ```
15 | pip install mingshe
16 | ```
17 |
18 | ## Change log
19 |
20 | Read [releases](https://github.com/abersheeran/mingshe/releases) to see the change log.
21 |
--------------------------------------------------------------------------------
/__pypackages__/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/aaa_mingshe.pth:
--------------------------------------------------------------------------------
1 | import mingshe.importlib;mingshe.importlib.install_meta(".she")
2 |
--------------------------------------------------------------------------------
/compile.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | MíngShé Compiler
6 |
7 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
18 |
19 |
20 |
21 |
67 |
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/docs/custom.en.md:
--------------------------------------------------------------------------------
1 | MíngShé is not only a superset of the Python language but also a template for creating programming languages that compile to Python.
2 |
3 | ## Create a Repository
4 |
5 | Open the [MíngShé GitHub repository](https://github.com/abersheeran/MíngShé) on your computer and click the "Use this template" button on the interface.
6 |
7 | 
8 |
9 | ## Modify the Syntax and Generate the Parsing File
10 |
11 | MíngShé uses [Pegen](https://github.com/we-like-parsers/pegen) to parse files, and the syntax description is located in the `MíngShé.gram` file in the project's root directory. After making the necessary modifications, execute `python script\generate.py` to generate a pure Python compiler for the syntax you described.
12 |
13 | ### Parsing the Syntax File
14 |
15 | The syntax file is divided into two main sections: the code that will be inserted at the beginning of the generated parser file, and the grammar rules.
16 |
17 | In the header code, you can encapsulate some compilation processes for convenience when writing grammar rules, such as `make_partial_function`, `make_nullish_coalescing`, `make_optional_chaining`, etc.
18 |
19 | ### Grammar Rules
20 |
21 | !!! tip "Left Recursion"
22 |
23 | Pegen can handle left-recursive grammars, so you don't need to manually eliminate left recursion while writing the grammar rules.
24 |
25 | #### `# comment`
26 |
27 | Python-style comments.
28 |
29 | #### `e1 e2`
30 |
31 | Matches `e1` and then `e2`.
32 |
33 | ```
34 | rule_name: first_rule second_rule
35 | ```
36 |
37 | #### `e1 | e2`
38 |
39 | Matches either `e1` or `e2`.
40 |
41 | For formatting purposes, the first alternative can also appear on the line after the rule name. In this case, the `|` must be used before the first alternative, as shown below:
42 |
43 | ```
44 | rule_name[return_type]:
45 | | first_alt
46 | | second_alt
47 | ```
48 |
49 | #### `( e )`
50 |
51 | Matches `e`.
52 |
53 | ```
54 | rule_name: (e)
55 | ```
56 |
57 | A slightly more complex and useful example: using grouping parentheses with repetition operators:
58 |
59 | ```
60 | rule_name: (e1 e2)*
61 | ```
62 |
63 | #### `[ e ]` or `e?`
64 |
65 | Optionally matches `e`.
66 |
67 | ```
68 | rule_name: [e]
69 | ```
70 |
71 | A more useful example: defining trailing commas as optional:
72 |
73 | ```
74 | rule_name: e (',' e)* [',']
75 | ```
76 |
77 | #### `e*`
78 |
79 | Matches `e` zero or more times.
80 |
81 | ```
82 | rule_name: (e1 e2)*
83 | ```
84 |
85 | #### `e+`
86 |
87 | Matches `e` one or more times.
88 |
89 | ```
90 | rule_name: (e1 e2)+
91 | ```
92 |
93 | #### `s.e+`
94 |
95 | Matches one or more `e` separated by `s`. The generated parse tree does not include the separators, otherwise it is the same as `(e (s e)*)`.
96 |
97 | ```
98 | rule_name: ','.e+
99 | ```
100 |
101 | #### `&e`
102 |
103 | Attempts to match `e` without consuming any input.
104 |
105 | #### `!e`
106 |
107 | Attempts to match anything except `e` without consuming any input.
108 |
109 | A Python syntax example: `primary` consists of an atom and is not allowed to be followed by `.` or `(` or `[`:
110 |
111 | ```
112 | primary: atom !'.' !'(' !'['
113 | ```
114 |
115 | #### `~`
116 |
117 | Forces the current matching rule to continue even if subsequent matches fail.
118 |
119 | In the following example, if a left parenthesis is successfully matched, no other alternatives will be considered, even if `some_rule` or `')'` fails to parse.
120 |
121 | ```
122 | rule_name: '(' ~ some_rule ')' | some_alt
123 | ```
124 |
125 | #### Assignments within Rules
126 |
127 | Within a rule, you can name partial matches for use in generating the AST.
128 |
129 | ```
130 | rule_name[return_type]: '(' a=some_other_rule ')' { a }
131 | ```
132 |
133 | #### Actions after Matching
134 |
135 | An action can be any valid Python statement, and its return value will be used as the value of the corresponding node.
136 |
137 | ```
138 | rule_name[return_type]:
139 | | first_alt1 first_alt2 { first_action }
140 | | second_alt1 second_alt2 { second_action }
141 | ```
142 |
143 | You can return an AST object, e.g., `{ ast.Add() }`, or call a function, e.g., `{ self.make_partial_function() }`.
144 |
145 | !!! tip "LOCATIONS"
146 |
147 | `LOCATIONS` is a special variable that is equivalent to passing a dictionary as a keyword argument, containing the current location information.
148 |
149 | ## Final Step: The Renaming Magic
150 |
151 | Use a modern editor to search for `MíngShé` throughout the project and replace it with the name you prefer. Now you have a user-friendly superset language that can exist as a Python module.
152 |
153 | ## How to Use Your Own Language
154 |
155 | Use it just like you would use MíngShé.
156 |
--------------------------------------------------------------------------------
/docs/custom.md:
--------------------------------------------------------------------------------
1 | 鸣蛇不仅仅是一个 Python 超集语言,也是编写一个能编译到 Python 的编程语言的模板。
2 |
3 | ## 创建仓库
4 |
5 | 使用电脑打开[鸣蛇的 GitHub 地址](https://github.com/abersheeran/mingshe),点击界面上的 `Use this template` 按钮。
6 |
7 | 
8 |
9 | ## 修改语法并生成解析文件
10 |
11 | 鸣蛇使用 [Pegen](https://github.com/we-like-parsers/pegen) 对文件进行解析,语法描述都在项目根目录下的 `mingshe.gram` 里。修改完成后执行 `python script\generate.py` 即可为你所描述的语法生成一个纯 Python 编译器。
12 |
13 | ### 语法文件剖析
14 |
15 | 整个语法文件分为两大块,一个是插入最终生成的解析器文件头部的代码,一个是语法规则。
16 |
17 | 在头部代码里,我们可以自行封装一些编译过程,例如 `make_partial_function`、`make_nullish_coalescing`、`make_optional_chaining` 等,这是为了后续编写语法规则时更加方便地使用这些过程。
18 |
19 | ### 语法规则
20 |
21 | !!! tip "左递归"
22 |
23 | Pegen 可以解析左递归语法,所以你在编写语法规则的时候不必自己在脑子里解决左递归。
24 |
25 | #### `# comment`
26 |
27 | Python 风格的注释。
28 |
29 | #### `e1 e2`
30 |
31 | 匹配 e1 然后匹配 e2。
32 |
33 | ```
34 | rule_name: first_rule second_rule
35 | ```
36 |
37 | #### `e1 | e2`
38 |
39 | 匹配 e1 或 e2。
40 |
41 | 出于格式化目的,第一个选项也可以出现在规则名称之后的行上。 在这种情况下,`|` 必须在第一个选项之前使用,如下所示:
42 |
43 | ```
44 | rule_name[return_type]:
45 | | first_alt
46 | | second_alt
47 | ```
48 |
49 | #### `( e )`
50 |
51 | 匹配 e。
52 |
53 | ```
54 | rule_name: (e)
55 | ```
56 |
57 | 一个稍微复杂和有用的示例:将分组运算符与重复运算符一起使用:
58 |
59 | ```
60 | rule_name: (e1 e2)*
61 | ```
62 |
63 | #### `[ e ] or e?`
64 |
65 | 非必需地匹配 e。
66 |
67 | ```
68 | rule_name: [e]
69 | ```
70 |
71 | 一个更有用的例子:定义尾随逗号是可选的:
72 |
73 | ```
74 | rule_name: e (',' e)* [',']
75 | ```
76 |
77 | #### `e*`
78 |
79 | 匹配零次或多次出现的 e。
80 |
81 | ```
82 | rule_name: (e1 e2)*
83 | ```
84 |
85 | #### `e+`
86 |
87 | 匹配一次或多次出现的 e。
88 |
89 | ```
90 | rule_name: (e1 e2)+
91 | ```
92 |
93 | #### `s.e+`
94 |
95 | 匹配一个或多个 e,以 s 分隔。生成的解析树不包含分隔符,在其他方面与``(e (s e)*)``相同。
96 |
97 | ```
98 | rule_name: ','.e+
99 | ```
100 |
101 | #### `&e`
102 |
103 | 尝试匹配 e 且不消耗任何输入。
104 |
105 | #### `!e`
106 |
107 | 尝试匹配非 e 且不消耗任何输入。
108 |
109 | 取自 Python 语法的一个示例:`primary` 由一个原子组成,其后不允许跟随 `.` 或 `(` 或 `[`:
110 |
111 | ```
112 | primary: atom !'.' !'(' !'['
113 | ```
114 |
115 | #### `~`
116 |
117 | 强制继续当前的匹配规则,即使它后续匹配失败。
118 |
119 | 在如下示例中,如果成功匹配左括号,则不会考虑其他替代方案,即使 `some_rule` 或 `')'` 无法解析。
120 |
121 | ```
122 | rule_name: '(' ~ some_rule ')' | some_alt
123 | ```
124 |
125 | #### 规则内赋值
126 |
127 | 在一个规则中,你可以对部分匹配进行命名,以便在生成 AST 时使用。
128 |
129 | ```
130 | rule_name[return_type]: '(' a=some_other_rule ')' { a }
131 | ```
132 |
133 | #### 匹配后执行动作
134 |
135 | 动作必须是任何有效的 Python 语句,且它的返回值会作为该节点的值。
136 |
137 | ```
138 | rule_name[return_type]:
139 | | first_alt1 first_alt2 { first_action }
140 | | second_alt1 second_alt2 { second_action }
141 | ```
142 |
143 | 你可以返回一个 ast 对象,例如 `{ ast.Add() }`;也可以调用一个函数,例如 `{ self.make_partial_function() }`。
144 |
145 | !!! tip "LOCATIONS"
146 |
147 | `LOCATIONS` 是一个特殊的变量,它等价于以关键词参数的形式传入一个字典,其中包含了当前的位置信息。
148 |
149 | ## 最后一步:改名大法
150 |
151 | 使用一个现代编辑器在整个项目里搜索 `mingshe`,修改成你喜欢的名称。一个简单易用的、可以作为 Python 模块存在的超集语言就做好了。
152 |
153 | ## 如何使用你自己的语言
154 |
155 | 就像使用鸣蛇一样使用它。
156 |
--------------------------------------------------------------------------------
/docs/img/use-this-template.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abersheeran/mingshe/db1ac01705dc200cc5de3ed55ea5f9ff5ef8f57b/docs/img/use-this-template.png
--------------------------------------------------------------------------------
/docs/index.en.md:
--------------------------------------------------------------------------------
1 | MíngShé is a Python superset language. It uses the [Pegen](https://github.com/we-like-parsers/pegen) syntax parser to compile the code to Python AST for execution.
2 |
3 | In addition to the syntax supported by Python itself (currently synchronized to 3.10), MíngShé supports the following syntax:
4 |
5 | - Pipeline: `arg |> func`
6 | - Conditional: `condition? True_branch: false_branch`
7 | - Partial: `square = pow(?, 2)`
8 | - Nullish coalescing: `obj ?? other`
9 | - Optional chaining: `obj?.attr`, `obj?[key]`, `obj?.func()`
10 | - Unpack mapping:`{ key } = one_dict`
11 |
12 | ## Editor plugins
13 |
14 | Visual Studio Code:
15 |
16 | - [vscode-mingshe](https://marketplace.visualstudio.com/items?itemName=frostming.vscode-mingshe)
17 |
18 | ## Other related websites
19 |
20 | - [Source Code](https://github.com/abersheeran/mingshe)
21 | - [User Forum](https://github.com/abersheeran/mingshe/discussions)
22 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | 鸣蛇是一个 Python 超集语言,它使用 [Pegen](https://github.com/we-like-parsers/pegen) 语法解析器,将代码编译到 Python AST 进行运行。
2 |
3 | 除去 Python 本身支持的语法外(目前同步到 3.10),鸣蛇支持如下语法:
4 |
5 | - 管道:`arg |> func`
6 | - 条件运算:`condition ? true_branch : false_branch`
7 | - 偏函数:`square = pow(?, 2)`
8 | - 空值合并:`obj ?? other`
9 | - 可选链:`obj?.attr`、`obj?[key]`、`obj?.func()`
10 | - 字典解构赋值:`{ key } = one_dict`
11 |
12 | ## 编辑器插件
13 |
14 | Visual Studio Code:
15 |
16 | - [vscode-mingshe](https://marketplace.visualstudio.com/items?itemName=frostming.vscode-mingshe)
17 |
18 | ## 其他相关网站
19 |
20 | - [源代码托管](https://github.com/abersheeran/mingshe)
21 | - [用户论坛](https://github.com/abersheeran/mingshe/discussions)
22 |
--------------------------------------------------------------------------------
/docs/syntax/conditional.en.md:
--------------------------------------------------------------------------------
1 | In [PEP308](https://www.python.org/dev/peps/pep-0308/) Guido finally chose `if-else` as the syntax for conditional operations for Python. MíngShé has added a ternary conditional operator that is more in line with traditional language habits. Whether you like the design of Guido or the design of C, you always can use it in MíngShé.
2 |
3 | ## Grammar
4 |
5 | ```
6 | a ? b : c
7 | ```
8 |
9 | ## Priority
10 |
11 | `a ? b : c` has the same priority as `b if a else c`.
12 |
--------------------------------------------------------------------------------
/docs/syntax/conditional.md:
--------------------------------------------------------------------------------
1 | 在 [PEP308](https://www.python.org/dev/peps/pep-0308/) 中 Guido 最终为 Python 选择了 `if-else` 作为条件运算的语法。鸣蛇增加了更符合传统语言习惯的三元条件运算符,无论你是喜欢 Guido 的设计还是 C 的设计,在鸣蛇里你都可以使用。
2 |
3 | ## 语法
4 |
5 | ```
6 | a ? b : c
7 | ```
8 |
9 | ## 优先级
10 |
11 | `a ? b : c` 与 `b if a else c` 的优先级相同。
12 |
--------------------------------------------------------------------------------
/docs/syntax/nullish-coalescing.en.md:
--------------------------------------------------------------------------------
1 | The null coalescing operator looks very similar to the `or` operation, but `a or b` is equivalent to `a if a else b`, and `a ?? b` is equivalent to `a if a is None else b`. The difference between the two is that `a ?? b` judges the value of `None`, while `a or b` judges all false values (such as: `""`, `False`, `0`, ` []`, `{}`, etc.).
2 |
3 | ## Grammar
4 |
5 | ```
6 | a ?? b
7 | ```
8 |
9 | ## Priority
10 |
11 | `??` has the same priority as `or`.
12 |
13 | ## Notice
14 |
15 | The priority between `??` and `or` is undetermined, so it cannot be used directly in chain. When you want `??` and `or` to appear in the same expression, then you need to put parentheses around them, otherwise a syntax error will be thrown.
16 |
17 | ```
18 | a or b ?? c # syntax error
19 | (a or b) ?? c # correct
20 | a or (b ?? c) # correct
21 | ```
22 |
23 | ## Reference
24 |
25 | - https://peps.python.org/pep-0505/
26 |
--------------------------------------------------------------------------------
/docs/syntax/nullish-coalescing.md:
--------------------------------------------------------------------------------
1 | 空值合并运算符乍一看与 `or` 运算很像,但 `a or b` 等价于 `a if a else b`,而 `a ?? b` 等价于 `a if a is None else b`。两者之间的区别在于,`a ?? b` 仅判断空值(`None`),而 `a or b` 则判断所有的假值(如:`""`、`False`、`0`、`[]`、`{}` 等)。
2 |
3 | ## 语法
4 |
5 | ```
6 | a ?? b
7 | ```
8 |
9 | ## 优先级
10 |
11 | `??` 与 `or` 优先级一致。
12 |
13 | ## 注意
14 |
15 | `??` 与 `or` 之间的优先级未确定,故而不能直接链式使用。当你想让 `??` 和 `or` 出现在同一个表达式里,那么你需要给它们外面加上括号,否则会出现抛出一个语法错误。
16 |
17 | ```
18 | a or b ?? c # 语法错误
19 | (a or b) ?? c # 正确
20 | a or (b ?? c) # 正确
21 | ```
22 |
23 | ## 参考
24 |
25 | - https://peps.python.org/pep-0505/
26 |
--------------------------------------------------------------------------------
/docs/syntax/optional-chaining.en.md:
--------------------------------------------------------------------------------
1 | The difference between the optional chain operator and the primitive operator is only that when `obj` is a null value (`None`), the optional chain operator will return `None`, and the primitive operator will throw an exception.
2 |
3 | When trying to access object properties that may not exist, the optional chain operator will make the expression shorter and more concise.
4 |
5 | ## Grammar
6 |
7 | ```
8 | obj?.attr
9 |
10 | obj?[key]
11 |
12 | obj?.method()
13 | ```
14 |
15 | ## Examples of real code
16 |
17 | ```python
18 | import socket
19 |
20 | sock = None
21 | try:
22 | sock = socket.create_connection(('www.python.org', 80))
23 | ...
24 | finally:
25 | sock?.close()
26 | ```
27 |
28 | ## Reference
29 |
30 | - https://peps.python.org/pep-0505/
31 |
--------------------------------------------------------------------------------
/docs/syntax/optional-chaining.md:
--------------------------------------------------------------------------------
1 | 可选链操作符与原始操作符的区别仅在于 `obj` 为空值(`None`)时,可选链操作符会返回 `None`,而原始操作符会抛出异常。
2 |
3 | 当尝试访问可能不存在的对象属性时,可选链操作符将会使表达式更短、更简明。
4 |
5 | ## 语法
6 |
7 | ```
8 | obj?.attr
9 |
10 | obj?[key]
11 |
12 | obj?.method()
13 | ```
14 |
15 | ## 真实代码的示例
16 |
17 | ```python
18 | import socket
19 |
20 | sock = None
21 | try:
22 | sock = socket.create_connection(('www.python.org', 80))
23 | ...
24 | finally:
25 | sock?.close()
26 | ```
27 |
28 | ## 参考
29 |
30 | - https://peps.python.org/pep-0505/
31 |
--------------------------------------------------------------------------------
/docs/syntax/partial.en.md:
--------------------------------------------------------------------------------
1 | Partial function operators allow you to quickly create a new function by binding some parameters. Compared with the standard library `functools.partial`, it is more flexible and powerful.
2 |
3 | ## Grammar
4 |
5 | ```
6 | f(?, 2)
7 |
8 | f(?, ?, 3, ?)
9 |
10 | f(a, b=?)
11 |
12 | f(*?, b=10)
13 |
14 | f(name="Aber", **?)
15 | ```
16 |
17 | !!! tip ""
18 | Each `?` means to add a positional parameter to the generated function.
19 |
20 | ## Trick
21 |
22 | When you need to pass a sequence, but only want to pass a parameter using a pipe (`|>`), please use `*?`.
23 |
24 | Try to see the difference between `("hello", "world") |> print(?)` and `("hello", "world") |> print(*?)`.
25 |
--------------------------------------------------------------------------------
/docs/syntax/partial.md:
--------------------------------------------------------------------------------
1 | 偏函数操作符可以让你通过绑定部分参数快速创建一个新的函数。与标准库的 `functools.partial` 相比,它更灵活,更强大。
2 |
3 | ## 语法
4 |
5 | ```
6 | f(?, 2)
7 |
8 | f(?, ?, 3, ?)
9 |
10 | f(a, b=?)
11 |
12 | f(*?, b=10)
13 |
14 | f(name="Aber", **?)
15 | ```
16 |
17 | !!! tip ""
18 | 每一个 `?` 都意味着为生成的函数增加一个位置参数。
19 |
20 | ## 使用技巧
21 |
22 | 当你需要传递一个序列,但却只想使用管道(`|>`)传递一个参数时,请使用 `*?`。
23 |
24 | 试试看 `("hello", "world") |> print(?)` 和 `("hello", "world") |> print(*?)` 的区别。
25 |
--------------------------------------------------------------------------------
/docs/syntax/pipeline.en.md:
--------------------------------------------------------------------------------
1 | The pipeline operator allows to chain calls to functions in an easy-to-read way. When multiple functions are chained, using the pipeline operator can improve the readability of the code. Essentially, the pipeline operator is syntactic sugar for single-argument function calls, which allows you to perform a call like this:
2 |
3 | ```
4 | 10 |> range |> list |> print
5 | ```
6 |
7 | To write using traditional syntax, the equivalent code is:
8 |
9 | ```python
10 | print(list(range(10)))
11 | ```
12 |
13 | ## Grammar
14 |
15 | ```
16 | arg |> function
17 | ```
18 |
19 | ## Priority
20 |
21 | The priority of `|>` is lower than `|`, higher than comparison operators (`in`, `not in`, `is`, `is not`, `<`, `<=`, `>`, `>=`, `!=`, `==`).
22 |
--------------------------------------------------------------------------------
/docs/syntax/pipeline.md:
--------------------------------------------------------------------------------
1 | 管道运算符允许以一种易读的方式去对函数链式调用,当链式调用多个函数时,使用管道操作符可以改善代码的可读性。本质上来说,管道操作符是单参数函数调用的语法糖,它允许你像这样执行一个调用:
2 |
3 | ```
4 | 10 |> range |> list |> print
5 | ```
6 |
7 | 使用传统语法来写,等效的代码是:
8 |
9 | ```python
10 | print(list(range(10)))
11 | ```
12 |
13 | ## 语法
14 |
15 | ```
16 | arg |> function
17 | ```
18 |
19 | ## 优先级
20 |
21 | `|>` 的优先级低于 `|`,高于比较运算符(`in`, `not in`, `is`, `is not`, `<`, `<=`, `>`, `>=`, `!=`, `==`)。
22 |
--------------------------------------------------------------------------------
/docs/syntax/unpack-mapping.en.md:
--------------------------------------------------------------------------------
1 | The unpack mapping syntax allows you to easily extract the value of the specified key in the mapping. If the expected key value does not exist, the variable is assigned to the default value of the `.get` method.
2 |
3 | ```
4 | { key } = one_dict
5 | ```
6 |
7 | the equivalent code is:
8 |
9 | ```
10 | key = (lambda **kwargs: kwargs.get('key'))(**one_dict)
11 | ```
12 |
13 | ## Grammar
14 |
15 | ```
16 | { name [, name] } = expression
17 | ```
18 |
19 | ## Trick
20 |
21 | Any object that implements the `Mapping` type can be deconstructed using this syntax, such as `MultiDict` in Django.
22 |
--------------------------------------------------------------------------------
/docs/syntax/unpack-mapping.md:
--------------------------------------------------------------------------------
1 | 字典解构赋值语法允许你便捷的摘取字典中指定的键的值,若期望取出的键值不存在,则变量被赋值为 `.get` 方法的默认值。
2 |
3 | ```
4 | { key } = one_dict
5 | ```
6 |
7 | 其等价于:
8 |
9 | ```
10 | key = (lambda **kwargs: kwargs.get('key'))(**one_dict)
11 | ```
12 |
13 | ## 语法
14 |
15 | ```
16 | { name [, name] } = expression
17 | ```
18 |
19 | ## 使用技巧
20 |
21 | 任何实现了 `Mapping` 类型的对象都可以使用这种语法进行解构,例如 Django 里的 `MultiDict`。
22 |
--------------------------------------------------------------------------------
/docs/usage.en.md:
--------------------------------------------------------------------------------
1 | ## As a script
2 |
3 | Write the following code to `hello.she`, and then run `mingshe ./hello.she`.
4 |
5 | ```mingshe
6 | "hello world" |> print
7 | ```
8 |
9 | ## As a module
10 |
11 | Just like use a python module, you can use a MíngShé module.
12 |
13 | ```python
14 | # lib.she
15 | def digit_sum(s: str) -> int:
16 | return s |> map(int, ?) |> sum
17 | ```
18 |
19 | ```python
20 | # main.py
21 | from lib import digit_sum
22 |
23 | print(digit_sum('123456'))
24 | ```
25 |
26 | ## Compile to python
27 |
28 | Use `mingshe --compile ...` to compile to Python code, and it can be compiled to the specified Python version: `mingshe --compile --python 2.7 ...`.
29 |
30 | ## Run short code
31 |
32 | Just try `mingshe -c "9 ** 106 |> print"`.
33 |
34 | Also, you can use `mingshe --compile -c "9 ** 106 |> print"`.
35 |
--------------------------------------------------------------------------------
/docs/usage.md:
--------------------------------------------------------------------------------
1 | ## 作为一个直接执行的脚本
2 |
3 | 把下面的代码写入 `hello.she` 文件,然后运行 `mingshe ./hello.she`。
4 |
5 | ```mingshe
6 | "hello world" |> print
7 | ```
8 |
9 | ## 作为一个模块
10 |
11 | 就像导入一个 Python 模块一样,你也可以直接使用 import 命令导入一个鸣蛇模块。
12 |
13 | ```python
14 | # lib.she
15 | def digit_sum(s: str) -> int:
16 | return s |> map(int, ?) |> sum
17 | ```
18 |
19 | ```python
20 | # main.py
21 | from lib import digit_sum
22 |
23 | print(digit_sum('123456'))
24 | ```
25 |
26 | ## 编译到 Python
27 |
28 | 使用 `mingshe --compile ...` 编译到 Python 代码,并且可以编译到指定的 Python 版本:`mingshe --compile --python 2.7 ...`。
29 |
30 | ## 运行小段代码
31 |
32 | 例如 `mingshe -c "9 ** 106 |> print"`。
33 |
34 | 你也可以直接使用 `mingshe --compile -c "9 ** 106 |> print"` 来看看编译结果。
35 |
--------------------------------------------------------------------------------
/mingshe/__init__.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import sys
3 |
4 | sys.path.insert(1, str((pathlib.Path(__file__).parent / "_vendor").absolute()))
5 |
--------------------------------------------------------------------------------
/mingshe/__main__.py:
--------------------------------------------------------------------------------
1 | from .commands import main
2 |
3 | main()
4 |
--------------------------------------------------------------------------------
/mingshe/__version__.py:
--------------------------------------------------------------------------------
1 | VERSION = (0, 8, 2)
2 |
3 | __version__ = ".".join(map(str, VERSION))
4 |
--------------------------------------------------------------------------------
/mingshe/_vendor/README.md:
--------------------------------------------------------------------------------
1 | ONLY SYNC. MUST NOT MODIFY.
2 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abersheeran/mingshe/db1ac01705dc200cc5de3ed55ea5f9ff5ef8f57b/mingshe/_vendor/pegen/__init__.py
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3.8
2 |
3 | """pegen -- PEG Generator.
4 |
5 | Search the web for PEG Parsers for reference.
6 | """
7 |
8 | import argparse
9 | import sys
10 | import time
11 | import token
12 | import traceback
13 | from typing import Tuple
14 |
15 | from pegen.build import (
16 | Grammar,
17 | Parser,
18 | ParserGenerator,
19 | Tokenizer,
20 | build_python_parser_and_generator,
21 | )
22 | from pegen.validator import validate_grammar
23 |
24 |
25 | def generate_python_code(
26 | args: argparse.Namespace,
27 | ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
28 |
29 | verbose = args.verbose
30 | verbose_tokenizer = verbose >= 3
31 | verbose_parser = verbose == 2 or verbose >= 4
32 | try:
33 | grammar, parser, tokenizer, gen = build_python_parser_and_generator(
34 | args.grammar_filename,
35 | args.output,
36 | verbose_tokenizer,
37 | verbose_parser,
38 | skip_actions=args.skip_actions,
39 | )
40 | return grammar, parser, tokenizer, gen
41 | except Exception as err:
42 | if args.verbose:
43 | raise # Show traceback
44 | traceback.print_exception(err.__class__, err, None)
45 | sys.stderr.write("For full traceback, use -v\n")
46 | sys.exit(1)
47 |
48 |
49 | argparser = argparse.ArgumentParser(
50 | prog="pegen", description="Experimental PEG-like parser generator"
51 | )
52 | argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
53 | argparser.add_argument(
54 | "-v",
55 | "--verbose",
56 | action="count",
57 | default=0,
58 | help="Print timing stats; repeat for more debug output",
59 | )
60 |
61 | argparser.add_argument("grammar_filename", help="Grammar description")
62 | argparser.add_argument(
63 | "-o",
64 | "--output",
65 | metavar="OUT",
66 | default="parse.py",
67 | help="Where to write the generated parser",
68 | )
69 | argparser.add_argument(
70 | "--skip-actions",
71 | action="store_true",
72 | help="Suppress code emission for rule actions",
73 | )
74 |
75 |
76 | def main() -> None:
77 | args = argparser.parse_args()
78 |
79 | t0 = time.time()
80 | grammar, parser, tokenizer, gen = generate_python_code(args)
81 | t1 = time.time()
82 |
83 | validate_grammar(grammar)
84 |
85 | if not args.quiet:
86 | if args.verbose:
87 | print("Raw Grammar:")
88 | for line in repr(grammar).splitlines():
89 | print(" ", line)
90 |
91 | print("Clean Grammar:")
92 | for line in str(grammar).splitlines():
93 | print(" ", line)
94 |
95 | if args.verbose:
96 | print("First Graph:")
97 | for src, dsts in gen.first_graph.items():
98 | print(f" {src} -> {', '.join(dsts)}")
99 | print("First SCCS:")
100 | for scc in gen.first_sccs:
101 | print(" ", scc, end="")
102 | if len(scc) > 1:
103 | print(
104 | " # Indirectly left-recursive; leaders:",
105 | {name for name in scc if grammar.rules[name].leader},
106 | )
107 | else:
108 | name = next(iter(scc))
109 | if name in gen.first_graph[name]:
110 | print(" # Left-recursive")
111 | else:
112 | print()
113 |
114 | if args.verbose:
115 | dt = t1 - t0
116 | diag = tokenizer.diagnose()
117 | nlines = diag.end[0]
118 | if diag.type == token.ENDMARKER:
119 | nlines -= 1
120 | print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
121 | if dt:
122 | print(f"; {nlines / dt:.0f} lines/sec")
123 | else:
124 | print()
125 | print("Caches sizes:")
126 | print(f" token array : {len(tokenizer._tokens):10}")
127 | print(f" cache : {len(parser._cache):10}")
128 |
129 |
130 | if __name__ == "__main__":
131 | main()
132 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/build.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import sysconfig
3 | import tokenize
4 | from typing import Dict, List, Set, Tuple
5 |
6 | from pegen.grammar import Grammar
7 | from pegen.grammar_parser import GeneratedParser as GrammarParser
8 | from pegen.parser import Parser
9 | from pegen.parser_generator import ParserGenerator
10 | from pegen.python_generator import PythonParserGenerator
11 | from pegen.tokenizer import Tokenizer
12 |
13 | MOD_DIR = pathlib.Path(__file__).resolve().parent
14 |
15 | TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
16 |
17 |
18 | def build_parser(
19 | grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
20 | ) -> Tuple[Grammar, Parser, Tokenizer]:
21 | with open(grammar_file) as file:
22 | tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
23 | parser = GrammarParser(tokenizer, verbose=verbose_parser)
24 | grammar = parser.start()
25 |
26 | if not grammar:
27 | raise parser.make_syntax_error(grammar_file)
28 |
29 | return grammar, parser, tokenizer
30 |
31 |
32 | def build_python_generator(
33 | grammar: Grammar,
34 | grammar_file: str,
35 | output_file: str,
36 | skip_actions: bool = False,
37 | ) -> ParserGenerator:
38 | with open(output_file, "w") as file:
39 | gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
40 | gen.generate(grammar_file)
41 | return gen
42 |
43 |
44 | def build_python_parser_and_generator(
45 | grammar_file: str,
46 | output_file: str,
47 | verbose_tokenizer: bool = False,
48 | verbose_parser: bool = False,
49 | skip_actions: bool = False,
50 | ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
51 | """Generate rules, python parser, tokenizer, parser generator for a given grammar
52 |
53 | Args:
54 | grammar_file (string): Path for the grammar file
55 | output_file (string): Path for the output file
56 | verbose_tokenizer (bool, optional): Whether to display additional output
57 | when generating the tokenizer. Defaults to False.
58 | verbose_parser (bool, optional): Whether to display additional output
59 | when generating the parser. Defaults to False.
60 | skip_actions (bool, optional): Whether to pretend no rule has any actions.
61 | """
62 | grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
63 | gen = build_python_generator(
64 | grammar,
65 | grammar_file,
66 | output_file,
67 | skip_actions=skip_actions,
68 | )
69 | return grammar, parser, tokenizer, gen
70 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/first_sets.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3.8
2 |
3 | import argparse
4 | import pprint
5 | import sys
6 | from typing import Dict, Set
7 |
8 | from pegen.build import build_parser
9 | from pegen.grammar import (
10 | Alt,
11 | Cut,
12 | Gather,
13 | Grammar,
14 | GrammarVisitor,
15 | Group,
16 | Leaf,
17 | Lookahead,
18 | NamedItem,
19 | NameLeaf,
20 | NegativeLookahead,
21 | Opt,
22 | PositiveLookahead,
23 | Repeat,
24 | Repeat0,
25 | Repeat1,
26 | Rhs,
27 | Rule,
28 | StringLeaf,
29 | )
30 |
31 | argparser = argparse.ArgumentParser(
32 | prog="calculate_first_sets",
33 | description="Calculate the first sets of a grammar",
34 | )
35 | argparser.add_argument("grammar_file", help="The grammar file")
36 |
37 |
38 | class FirstSetCalculator(GrammarVisitor):
39 | def __init__(self, rules: Dict[str, Rule]) -> None:
40 | self.rules = rules
41 | for rule in rules.values():
42 | rule.nullable_visit(rules)
43 | self.first_sets: Dict[str, Set[str]] = dict()
44 | self.in_process: Set[str] = set()
45 |
46 | def calculate(self) -> Dict[str, Set[str]]:
47 | for name, rule in self.rules.items():
48 | self.visit(rule)
49 | return self.first_sets
50 |
51 | def visit_Alt(self, item: Alt) -> Set[str]:
52 | result: Set[str] = set()
53 | to_remove: Set[str] = set()
54 | for other in item.items:
55 | new_terminals = self.visit(other)
56 | if isinstance(other.item, NegativeLookahead):
57 | to_remove |= new_terminals
58 | result |= new_terminals
59 | if to_remove:
60 | result -= to_remove
61 |
62 | # If the set of new terminals can start with the empty string,
63 | # it means that the item is completelly nullable and we should
64 | # also considering at least the next item in case the current
65 | # one fails to parse.
66 |
67 | if "" in new_terminals:
68 | continue
69 |
70 | if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
71 | break
72 |
73 | # Do not allow the empty string to propagate.
74 | result.discard("")
75 |
76 | return result
77 |
78 | def visit_Cut(self, item: Cut) -> Set[str]:
79 | return set()
80 |
81 | def visit_Group(self, item: Group) -> Set[str]:
82 | return self.visit(item.rhs)
83 |
84 | def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
85 | return self.visit(item.node)
86 |
87 | def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
88 | return self.visit(item.node)
89 |
90 | def visit_NamedItem(self, item: NamedItem) -> Set[str]:
91 | return self.visit(item.item)
92 |
93 | def visit_Opt(self, item: Opt) -> Set[str]:
94 | return self.visit(item.node)
95 |
96 | def visit_Gather(self, item: Gather) -> Set[str]:
97 | return self.visit(item.node)
98 |
99 | def visit_Repeat0(self, item: Repeat0) -> Set[str]:
100 | return self.visit(item.node)
101 |
102 | def visit_Repeat1(self, item: Repeat1) -> Set[str]:
103 | return self.visit(item.node)
104 |
105 | def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
106 | if item.value not in self.rules:
107 | return {item.value}
108 |
109 | if item.value not in self.first_sets:
110 | self.first_sets[item.value] = self.visit(self.rules[item.value])
111 | return self.first_sets[item.value]
112 | elif item.value in self.in_process:
113 | return set()
114 |
115 | return self.first_sets[item.value]
116 |
117 | def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
118 | return {item.value}
119 |
120 | def visit_Rhs(self, item: Rhs) -> Set[str]:
121 | result: Set[str] = set()
122 | for alt in item.alts:
123 | result |= self.visit(alt)
124 | return result
125 |
126 | def visit_Rule(self, item: Rule) -> Set[str]:
127 | if item.name in self.in_process:
128 | return set()
129 | elif item.name not in self.first_sets:
130 | self.in_process.add(item.name)
131 | terminals = self.visit(item.rhs)
132 | if item.nullable:
133 | terminals.add("")
134 | self.first_sets[item.name] = terminals
135 | self.in_process.remove(item.name)
136 | return self.first_sets[item.name]
137 |
138 |
139 | def main() -> None:
140 | args = argparser.parse_args()
141 |
142 | try:
143 | grammar, parser, tokenizer = build_parser(args.grammar_file)
144 | except Exception as err:
145 | print("ERROR: Failed to parse grammar file", file=sys.stderr)
146 | sys.exit(1)
147 |
148 | firs_sets = FirstSetCalculator(grammar.rules).calculate()
149 | pprint.pprint(firs_sets)
150 |
151 |
152 | if __name__ == "__main__":
153 | main()
154 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/grammar.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | from abc import abstractmethod
4 | from typing import (
5 | TYPE_CHECKING,
6 | AbstractSet,
7 | Any,
8 | Dict,
9 | Iterable,
10 | Iterator,
11 | List,
12 | Optional,
13 | Set,
14 | Tuple,
15 | Union,
16 | )
17 |
18 | if TYPE_CHECKING:
19 | from pegen.parser_generator import ParserGenerator
20 |
21 |
22 | class GrammarError(Exception):
23 | pass
24 |
25 |
26 | class GrammarVisitor:
27 | def visit(self, node: Any, *args: Any, **kwargs: Any) -> Any:
28 | """Visit a node."""
29 | method = "visit_" + node.__class__.__name__
30 | visitor = getattr(self, method, self.generic_visit)
31 | return visitor(node, *args, **kwargs)
32 |
33 | def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None:
34 | """Called if no explicit visitor function exists for a node."""
35 | for value in node:
36 | if isinstance(value, list):
37 | for item in value:
38 | self.visit(item, *args, **kwargs)
39 | else:
40 | self.visit(value, *args, **kwargs)
41 |
42 |
43 | class Grammar:
44 | def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
45 | self.rules = {rule.name: rule for rule in rules}
46 | self.metas = dict(metas)
47 |
48 | def __str__(self) -> str:
49 | return "\n".join(str(rule) for name, rule in self.rules.items())
50 |
51 | def __repr__(self) -> str:
52 | lines = ["Grammar("]
53 | lines.append(" [")
54 | for rule in self.rules.values():
55 | lines.append(f" {repr(rule)},")
56 | lines.append(" ],")
57 | lines.append(" {repr(list(self.metas.items()))}")
58 | lines.append(")")
59 | return "\n".join(lines)
60 |
61 | def __iter__(self) -> Iterator[Rule]:
62 | yield from self.rules.values()
63 |
64 |
65 | # Global flag whether we want actions in __str__() -- default off.
66 | SIMPLE_STR = True
67 |
68 |
69 | class Rule:
70 | def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None):
71 | self.name = name
72 | self.type = type
73 | self.rhs = rhs
74 | self.memo = bool(memo)
75 | self.visited = False
76 | self.nullable = False
77 | self.left_recursive = False
78 | self.leader = False
79 |
80 | def is_loop(self) -> bool:
81 | return self.name.startswith("_loop")
82 |
83 | def is_gather(self) -> bool:
84 | return self.name.startswith("_gather")
85 |
86 | def __str__(self) -> str:
87 | if SIMPLE_STR or self.type is None:
88 | res = f"{self.name}: {self.rhs}"
89 | else:
90 | res = f"{self.name}[{self.type}]: {self.rhs}"
91 | if len(res) < 88:
92 | return res
93 | lines = [res.split(":")[0] + ":"]
94 | lines += [f" | {alt}" for alt in self.rhs.alts]
95 | return "\n".join(lines)
96 |
97 | def __repr__(self) -> str:
98 | return f"Rule({self.name!r}, {self.type!r}, {self.rhs!r})"
99 |
100 | def __iter__(self) -> Iterator[Rhs]:
101 | yield self.rhs
102 |
103 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
104 | if self.visited:
105 | # A left-recursive rule is considered non-nullable.
106 | return False
107 | self.visited = True
108 | self.nullable = self.rhs.nullable_visit(rules)
109 | return self.nullable
110 |
111 | def initial_names(self) -> AbstractSet[str]:
112 | return self.rhs.initial_names()
113 |
114 | def flatten(self) -> Rhs:
115 | # If it's a single parenthesized group, flatten it.
116 | rhs = self.rhs
117 | if (
118 | not self.is_loop()
119 | and len(rhs.alts) == 1
120 | and len(rhs.alts[0].items) == 1
121 | and isinstance(rhs.alts[0].items[0].item, Group)
122 | ):
123 | rhs = rhs.alts[0].items[0].item.rhs
124 | return rhs
125 |
126 | def collect_todo(self, gen: ParserGenerator) -> None:
127 | rhs = self.flatten()
128 | rhs.collect_todo(gen)
129 |
130 |
131 | class Leaf:
132 | def __init__(self, value: str):
133 | self.value = value
134 |
135 | def __str__(self) -> str:
136 | return self.value
137 |
138 | def __iter__(self) -> Iterable[str]:
139 | if False:
140 | yield
141 |
142 | @abstractmethod
143 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
144 | raise NotImplementedError
145 |
146 | @abstractmethod
147 | def initial_names(self) -> AbstractSet[str]:
148 | raise NotImplementedError
149 |
150 |
151 | class NameLeaf(Leaf):
152 | """The value is the name."""
153 |
154 | def __str__(self) -> str:
155 | if self.value == "ENDMARKER":
156 | return "$"
157 | return super().__str__()
158 |
159 | def __repr__(self) -> str:
160 | return f"NameLeaf({self.value!r})"
161 |
162 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
163 | if self.value in rules:
164 | return rules[self.value].nullable_visit(rules)
165 | # Token or unknown; never empty.
166 | return False
167 |
168 | def initial_names(self) -> AbstractSet[str]:
169 | return {self.value}
170 |
171 |
172 | class StringLeaf(Leaf):
173 | """The value is a string literal, including quotes."""
174 |
175 | def __repr__(self) -> str:
176 | return f"StringLeaf({self.value!r})"
177 |
178 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
179 | # The string token '' is considered empty.
180 | return not self.value
181 |
182 | def initial_names(self) -> AbstractSet[str]:
183 | return set()
184 |
185 |
186 | class Rhs:
187 | def __init__(self, alts: List[Alt]):
188 | self.alts = alts
189 | self.memo: Optional[Tuple[Optional[str], str]] = None
190 |
191 | def __str__(self) -> str:
192 | return " | ".join(str(alt) for alt in self.alts)
193 |
194 | def __repr__(self) -> str:
195 | return f"Rhs({self.alts!r})"
196 |
197 | def __iter__(self) -> Iterator[List[Alt]]:
198 | yield self.alts
199 |
200 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
201 | for alt in self.alts:
202 | if alt.nullable_visit(rules):
203 | return True
204 | return False
205 |
206 | def initial_names(self) -> AbstractSet[str]:
207 | names: Set[str] = set()
208 | for alt in self.alts:
209 | names |= alt.initial_names()
210 | return names
211 |
212 | def collect_todo(self, gen: ParserGenerator) -> None:
213 | for alt in self.alts:
214 | alt.collect_todo(gen)
215 |
216 |
217 | class Alt:
218 | def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None):
219 | self.items = items
220 | self.icut = icut
221 | self.action = action
222 |
223 | def __str__(self) -> str:
224 | core = " ".join(str(item) for item in self.items)
225 | if not SIMPLE_STR and self.action:
226 | return f"{core} {{ {self.action} }}"
227 | else:
228 | return core
229 |
230 | def __repr__(self) -> str:
231 | args = [repr(self.items)]
232 | if self.icut >= 0:
233 | args.append(f"icut={self.icut}")
234 | if self.action:
235 | args.append(f"action={self.action!r}")
236 | return f"Alt({', '.join(args)})"
237 |
238 | def __iter__(self) -> Iterator[List[NamedItem]]:
239 | yield self.items
240 |
241 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
242 | for item in self.items:
243 | if not item.nullable_visit(rules):
244 | return False
245 | return True
246 |
247 | def initial_names(self) -> AbstractSet[str]:
248 | names: Set[str] = set()
249 | for item in self.items:
250 | names |= item.initial_names()
251 | if not item.nullable:
252 | break
253 | return names
254 |
255 | def collect_todo(self, gen: ParserGenerator) -> None:
256 | for item in self.items:
257 | item.collect_todo(gen)
258 |
259 |
260 | class NamedItem:
261 | def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None):
262 | self.name = name
263 | self.item = item
264 | self.type = type
265 | self.nullable = False
266 |
267 | def __str__(self) -> str:
268 | if not SIMPLE_STR and self.name:
269 | return f"{self.name}={self.item}"
270 | else:
271 | return str(self.item)
272 |
273 | def __repr__(self) -> str:
274 | return f"NamedItem({self.name!r}, {self.item!r})"
275 |
276 | def __iter__(self) -> Iterator[Item]:
277 | yield self.item
278 |
279 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
280 | self.nullable = self.item.nullable_visit(rules)
281 | return self.nullable
282 |
283 | def initial_names(self) -> AbstractSet[str]:
284 | return self.item.initial_names()
285 |
286 | def collect_todo(self, gen: ParserGenerator) -> None:
287 | gen.callmakervisitor.visit(self.item)
288 |
289 |
290 | class Forced:
291 | def __init__(self, node: Plain):
292 | self.node = node
293 |
294 | def __str__(self) -> str:
295 | return f"&&{self.node}"
296 |
297 | def __iter__(self) -> Iterator[Plain]:
298 | yield self.node
299 |
300 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
301 | return True
302 |
303 | def initial_names(self) -> AbstractSet[str]:
304 | return set()
305 |
306 |
307 | class Lookahead:
308 | def __init__(self, node: Plain, sign: str):
309 | self.node = node
310 | self.sign = sign
311 |
312 | def __str__(self) -> str:
313 | return f"{self.sign}{self.node}"
314 |
315 | def __iter__(self) -> Iterator[Plain]:
316 | yield self.node
317 |
318 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
319 | return True
320 |
321 | def initial_names(self) -> AbstractSet[str]:
322 | return set()
323 |
324 |
325 | class PositiveLookahead(Lookahead):
326 | def __init__(self, node: Plain):
327 | super().__init__(node, "&")
328 |
329 | def __repr__(self) -> str:
330 | return f"PositiveLookahead({self.node!r})"
331 |
332 |
333 | class NegativeLookahead(Lookahead):
334 | def __init__(self, node: Plain):
335 | super().__init__(node, "!")
336 |
337 | def __repr__(self) -> str:
338 | return f"NegativeLookahead({self.node!r})"
339 |
340 |
341 | class Opt:
342 | def __init__(self, node: Item):
343 | self.node = node
344 |
345 | def __str__(self) -> str:
346 | s = str(self.node)
347 | # TODO: Decide whether to use [X] or X? based on type of X
348 | if " " in s:
349 | return f"[{s}]"
350 | else:
351 | return f"{s}?"
352 |
353 | def __repr__(self) -> str:
354 | return f"Opt({self.node!r})"
355 |
356 | def __iter__(self) -> Iterator[Item]:
357 | yield self.node
358 |
359 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
360 | return True
361 |
362 | def initial_names(self) -> AbstractSet[str]:
363 | return self.node.initial_names()
364 |
365 |
366 | class Repeat:
367 | """Shared base class for x* and x+."""
368 |
369 | def __init__(self, node: Plain):
370 | self.node = node
371 | self.memo: Optional[Tuple[Optional[str], str]] = None
372 |
373 | @abstractmethod
374 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
375 | raise NotImplementedError
376 |
377 | def __iter__(self) -> Iterator[Plain]:
378 | yield self.node
379 |
380 | def initial_names(self) -> AbstractSet[str]:
381 | return self.node.initial_names()
382 |
383 |
384 | class Repeat0(Repeat):
385 | def __str__(self) -> str:
386 | s = str(self.node)
387 | # TODO: Decide whether to use (X)* or X* based on type of X
388 | if " " in s:
389 | return f"({s})*"
390 | else:
391 | return f"{s}*"
392 |
393 | def __repr__(self) -> str:
394 | return f"Repeat0({self.node!r})"
395 |
396 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
397 | return True
398 |
399 |
400 | class Repeat1(Repeat):
401 | def __str__(self) -> str:
402 | s = str(self.node)
403 | # TODO: Decide whether to use (X)+ or X+ based on type of X
404 | if " " in s:
405 | return f"({s})+"
406 | else:
407 | return f"{s}+"
408 |
409 | def __repr__(self) -> str:
410 | return f"Repeat1({self.node!r})"
411 |
412 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
413 | return False
414 |
415 |
416 | class Gather(Repeat):
417 | def __init__(self, separator: Plain, node: Plain):
418 | self.separator = separator
419 | self.node = node
420 |
421 | def __str__(self) -> str:
422 | return f"{self.separator!s}.{self.node!s}+"
423 |
424 | def __repr__(self) -> str:
425 | return f"Gather({self.separator!r}, {self.node!r})"
426 |
427 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
428 | return False
429 |
430 |
431 | class Group:
432 | def __init__(self, rhs: Rhs):
433 | self.rhs = rhs
434 |
435 | def __str__(self) -> str:
436 | return f"({self.rhs})"
437 |
438 | def __repr__(self) -> str:
439 | return f"Group({self.rhs!r})"
440 |
441 | def __iter__(self) -> Iterator[Rhs]:
442 | yield self.rhs
443 |
444 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
445 | return self.rhs.nullable_visit(rules)
446 |
447 | def initial_names(self) -> AbstractSet[str]:
448 | return self.rhs.initial_names()
449 |
450 |
451 | class Cut:
452 | def __init__(self) -> None:
453 | pass
454 |
455 | def __repr__(self) -> str:
456 | return f"Cut()"
457 |
458 | def __str__(self) -> str:
459 | return f"~"
460 |
461 | def __iter__(self) -> Iterator[Tuple[str, str]]:
462 | if False:
463 | yield
464 |
465 | def __eq__(self, other: object) -> bool:
466 | if not isinstance(other, Cut):
467 | return NotImplemented
468 | return True
469 |
470 | def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
471 | return True
472 |
473 | def initial_names(self) -> AbstractSet[str]:
474 | return set()
475 |
476 |
477 | Plain = Union[Leaf, Group]
478 | Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut]
479 | RuleName = Tuple[str, str]
480 | MetaTuple = Tuple[str, Optional[str]]
481 | MetaList = List[MetaTuple]
482 | RuleList = List[Rule]
483 | NamedItemList = List[NamedItem]
484 | LookaheadOrCut = Union[Lookahead, Forced, Cut]
485 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/grammar_parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3.8
2 | # @generated by pegen from src/pegen/metagrammar.gram
3 |
4 | import ast
5 | import sys
6 | import tokenize
7 |
8 | from typing import Any, Optional
9 |
10 | from pegen.parser import memoize, memoize_left_rec, logger, Parser
11 | from ast import literal_eval
12 |
13 | from pegen.grammar import (
14 | Alt,
15 | Cut,
16 | Forced,
17 | Gather,
18 | Group,
19 | Item,
20 | Lookahead,
21 | LookaheadOrCut,
22 | MetaTuple,
23 | MetaList,
24 | NameLeaf,
25 | NamedItem,
26 | NamedItemList,
27 | NegativeLookahead,
28 | Opt,
29 | Plain,
30 | PositiveLookahead,
31 | Repeat0,
32 | Repeat1,
33 | Rhs,
34 | Rule,
35 | RuleList,
36 | RuleName,
37 | Grammar,
38 | StringLeaf,
39 | )
40 |
41 | # Keywords and soft keywords are listed at the end of the parser definition.
42 | class GeneratedParser(Parser):
43 | @memoize
44 | def start(self) -> Optional[Grammar]:
45 | # start: grammar $
46 | mark = self._mark()
47 | if (grammar := self.grammar()) and (_endmarker := self.expect("ENDMARKER")):
48 | return grammar
49 | self._reset(mark)
50 | return None
51 |
52 | @memoize
53 | def grammar(self) -> Optional[Grammar]:
54 | # grammar: metas rules | rules
55 | mark = self._mark()
56 | if (metas := self.metas()) and (rules := self.rules()):
57 | return Grammar(rules, metas)
58 | self._reset(mark)
59 | if rules := self.rules():
60 | return Grammar(rules, [])
61 | self._reset(mark)
62 | return None
63 |
64 | @memoize
65 | def metas(self) -> Optional[MetaList]:
66 | # metas: meta metas | meta
67 | mark = self._mark()
68 | if (meta := self.meta()) and (metas := self.metas()):
69 | return [meta] + metas
70 | self._reset(mark)
71 | if meta := self.meta():
72 | return [meta]
73 | self._reset(mark)
74 | return None
75 |
76 | @memoize
77 | def meta(self) -> Optional[MetaTuple]:
78 | # meta: "@" NAME NEWLINE | "@" NAME NAME NEWLINE | "@" NAME STRING NEWLINE
79 | mark = self._mark()
80 | if (
81 | (literal := self.expect("@"))
82 | and (name := self.name())
83 | and (_newline := self.expect("NEWLINE"))
84 | ):
85 | return (name.string, None)
86 | self._reset(mark)
87 | if (
88 | (literal := self.expect("@"))
89 | and (a := self.name())
90 | and (b := self.name())
91 | and (_newline := self.expect("NEWLINE"))
92 | ):
93 | return (a.string, b.string)
94 | self._reset(mark)
95 | if (
96 | (literal := self.expect("@"))
97 | and (name := self.name())
98 | and (string := self.string())
99 | and (_newline := self.expect("NEWLINE"))
100 | ):
101 | return (name.string, literal_eval(string.string))
102 | self._reset(mark)
103 | return None
104 |
105 | @memoize
106 | def rules(self) -> Optional[RuleList]:
107 | # rules: rule rules | rule
108 | mark = self._mark()
109 | if (rule := self.rule()) and (rules := self.rules()):
110 | return [rule] + rules
111 | self._reset(mark)
112 | if rule := self.rule():
113 | return [rule]
114 | self._reset(mark)
115 | return None
116 |
117 | @memoize
118 | def rule(self) -> Optional[Rule]:
119 | # rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE
120 | mark = self._mark()
121 | if (
122 | (rulename := self.rulename())
123 | and (opt := self.memoflag(),)
124 | and (literal := self.expect(":"))
125 | and (alts := self.alts())
126 | and (_newline := self.expect("NEWLINE"))
127 | and (_indent := self.expect("INDENT"))
128 | and (more_alts := self.more_alts())
129 | and (_dedent := self.expect("DEDENT"))
130 | ):
131 | return Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), memo=opt)
132 | self._reset(mark)
133 | if (
134 | (rulename := self.rulename())
135 | and (opt := self.memoflag(),)
136 | and (literal := self.expect(":"))
137 | and (_newline := self.expect("NEWLINE"))
138 | and (_indent := self.expect("INDENT"))
139 | and (more_alts := self.more_alts())
140 | and (_dedent := self.expect("DEDENT"))
141 | ):
142 | return Rule(rulename[0], rulename[1], more_alts, memo=opt)
143 | self._reset(mark)
144 | if (
145 | (rulename := self.rulename())
146 | and (opt := self.memoflag(),)
147 | and (literal := self.expect(":"))
148 | and (alts := self.alts())
149 | and (_newline := self.expect("NEWLINE"))
150 | ):
151 | return Rule(rulename[0], rulename[1], alts, memo=opt)
152 | self._reset(mark)
153 | return None
154 |
155 | @memoize
156 | def rulename(self) -> Optional[RuleName]:
157 | # rulename: NAME annotation | NAME
158 | mark = self._mark()
159 | if (name := self.name()) and (annotation := self.annotation()):
160 | return (name.string, annotation)
161 | self._reset(mark)
162 | if name := self.name():
163 | return (name.string, None)
164 | self._reset(mark)
165 | return None
166 |
167 | @memoize
168 | def memoflag(self) -> Optional[str]:
169 | # memoflag: '(' "memo" ')'
170 | mark = self._mark()
171 | if (
172 | (literal := self.expect("("))
173 | and (literal_1 := self.expect("memo"))
174 | and (literal_2 := self.expect(")"))
175 | ):
176 | return "memo"
177 | self._reset(mark)
178 | return None
179 |
180 | @memoize
181 | def alts(self) -> Optional[Rhs]:
182 | # alts: alt "|" alts | alt
183 | mark = self._mark()
184 | if (alt := self.alt()) and (literal := self.expect("|")) and (alts := self.alts()):
185 | return Rhs([alt] + alts.alts)
186 | self._reset(mark)
187 | if alt := self.alt():
188 | return Rhs([alt])
189 | self._reset(mark)
190 | return None
191 |
192 | @memoize
193 | def more_alts(self) -> Optional[Rhs]:
194 | # more_alts: "|" alts NEWLINE more_alts | "|" alts NEWLINE
195 | mark = self._mark()
196 | if (
197 | (literal := self.expect("|"))
198 | and (alts := self.alts())
199 | and (_newline := self.expect("NEWLINE"))
200 | and (more_alts := self.more_alts())
201 | ):
202 | return Rhs(alts.alts + more_alts.alts)
203 | self._reset(mark)
204 | if (
205 | (literal := self.expect("|"))
206 | and (alts := self.alts())
207 | and (_newline := self.expect("NEWLINE"))
208 | ):
209 | return Rhs(alts.alts)
210 | self._reset(mark)
211 | return None
212 |
213 | @memoize
214 | def alt(self) -> Optional[Alt]:
215 | # alt: items '$' action | items '$' | items action | items
216 | mark = self._mark()
217 | if (items := self.items()) and (literal := self.expect("$")) and (action := self.action()):
218 | return Alt(items + [NamedItem(None, NameLeaf("ENDMARKER"))], action=action)
219 | self._reset(mark)
220 | if (items := self.items()) and (literal := self.expect("$")):
221 | return Alt(items + [NamedItem(None, NameLeaf("ENDMARKER"))], action=None)
222 | self._reset(mark)
223 | if (items := self.items()) and (action := self.action()):
224 | return Alt(items, action=action)
225 | self._reset(mark)
226 | if items := self.items():
227 | return Alt(items, action=None)
228 | self._reset(mark)
229 | return None
230 |
231 | @memoize
232 | def items(self) -> Optional[NamedItemList]:
233 | # items: named_item items | named_item
234 | mark = self._mark()
235 | if (named_item := self.named_item()) and (items := self.items()):
236 | return [named_item] + items
237 | self._reset(mark)
238 | if named_item := self.named_item():
239 | return [named_item]
240 | self._reset(mark)
241 | return None
242 |
243 | @memoize
244 | def named_item(self) -> Optional[NamedItem]:
245 | # named_item: NAME annotation '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead
246 | mark = self._mark()
247 | cut = False
248 | if (
249 | (name := self.name())
250 | and (annotation := self.annotation())
251 | and (literal := self.expect("="))
252 | and (cut := True)
253 | and (item := self.item())
254 | ):
255 | return NamedItem(name.string, item, annotation)
256 | self._reset(mark)
257 | if cut:
258 | return None
259 | cut = False
260 | if (
261 | (name := self.name())
262 | and (literal := self.expect("="))
263 | and (cut := True)
264 | and (item := self.item())
265 | ):
266 | return NamedItem(name.string, item)
267 | self._reset(mark)
268 | if cut:
269 | return None
270 | if item := self.item():
271 | return NamedItem(None, item)
272 | self._reset(mark)
273 | if it := self.forced_atom():
274 | return NamedItem(None, it)
275 | self._reset(mark)
276 | if it := self.lookahead():
277 | return NamedItem(None, it)
278 | self._reset(mark)
279 | return None
280 |
281 | @memoize
282 | def forced_atom(self) -> Optional[LookaheadOrCut]:
283 | # forced_atom: '&' '&' ~ atom
284 | mark = self._mark()
285 | cut = False
286 | if (
287 | (literal := self.expect("&"))
288 | and (literal_1 := self.expect("&"))
289 | and (cut := True)
290 | and (atom := self.atom())
291 | ):
292 | return Forced(atom)
293 | self._reset(mark)
294 | if cut:
295 | return None
296 | return None
297 |
298 | @memoize
299 | def lookahead(self) -> Optional[LookaheadOrCut]:
300 | # lookahead: '&' ~ atom | '!' ~ atom | '~'
301 | mark = self._mark()
302 | cut = False
303 | if (literal := self.expect("&")) and (cut := True) and (atom := self.atom()):
304 | return PositiveLookahead(atom)
305 | self._reset(mark)
306 | if cut:
307 | return None
308 | cut = False
309 | if (literal := self.expect("!")) and (cut := True) and (atom := self.atom()):
310 | return NegativeLookahead(atom)
311 | self._reset(mark)
312 | if cut:
313 | return None
314 | if literal := self.expect("~"):
315 | return Cut()
316 | self._reset(mark)
317 | return None
318 |
319 | @memoize
320 | def item(self) -> Optional[Item]:
321 | # item: '[' ~ alts ']' | atom '?' | atom '*' | atom '+' | atom '.' atom '+' | atom
322 | mark = self._mark()
323 | cut = False
324 | if (
325 | (literal := self.expect("["))
326 | and (cut := True)
327 | and (alts := self.alts())
328 | and (literal_1 := self.expect("]"))
329 | ):
330 | return Opt(alts)
331 | self._reset(mark)
332 | if cut:
333 | return None
334 | if (atom := self.atom()) and (literal := self.expect("?")):
335 | return Opt(atom)
336 | self._reset(mark)
337 | if (atom := self.atom()) and (literal := self.expect("*")):
338 | return Repeat0(atom)
339 | self._reset(mark)
340 | if (atom := self.atom()) and (literal := self.expect("+")):
341 | return Repeat1(atom)
342 | self._reset(mark)
343 | if (
344 | (sep := self.atom())
345 | and (literal := self.expect("."))
346 | and (node := self.atom())
347 | and (literal_1 := self.expect("+"))
348 | ):
349 | return Gather(sep, node)
350 | self._reset(mark)
351 | if atom := self.atom():
352 | return atom
353 | self._reset(mark)
354 | return None
355 |
356 | @memoize
357 | def atom(self) -> Optional[Plain]:
358 | # atom: '(' ~ alts ')' | NAME | STRING
359 | mark = self._mark()
360 | cut = False
361 | if (
362 | (literal := self.expect("("))
363 | and (cut := True)
364 | and (alts := self.alts())
365 | and (literal_1 := self.expect(")"))
366 | ):
367 | return Group(alts)
368 | self._reset(mark)
369 | if cut:
370 | return None
371 | if name := self.name():
372 | return NameLeaf(name.string)
373 | self._reset(mark)
374 | if string := self.string():
375 | return StringLeaf(string.string)
376 | self._reset(mark)
377 | return None
378 |
379 | @memoize
380 | def action(self) -> Optional[str]:
381 | # action: "{" ~ target_atoms "}"
382 | mark = self._mark()
383 | cut = False
384 | if (
385 | (literal := self.expect("{"))
386 | and (cut := True)
387 | and (target_atoms := self.target_atoms())
388 | and (literal_1 := self.expect("}"))
389 | ):
390 | return target_atoms
391 | self._reset(mark)
392 | if cut:
393 | return None
394 | return None
395 |
396 | @memoize
397 | def annotation(self) -> Optional[str]:
398 | # annotation: "[" ~ target_atoms "]"
399 | mark = self._mark()
400 | cut = False
401 | if (
402 | (literal := self.expect("["))
403 | and (cut := True)
404 | and (target_atoms := self.target_atoms())
405 | and (literal_1 := self.expect("]"))
406 | ):
407 | return target_atoms
408 | self._reset(mark)
409 | if cut:
410 | return None
411 | return None
412 |
413 | @memoize
414 | def target_atoms(self) -> Optional[str]:
415 | # target_atoms: target_atom target_atoms | target_atom
416 | mark = self._mark()
417 | if (target_atom := self.target_atom()) and (target_atoms := self.target_atoms()):
418 | return target_atom + " " + target_atoms
419 | self._reset(mark)
420 | if target_atom := self.target_atom():
421 | return target_atom
422 | self._reset(mark)
423 | return None
424 |
425 | @memoize
426 | def target_atom(self) -> Optional[str]:
427 | # target_atom: "{" ~ target_atoms? "}" | "[" ~ target_atoms? "]" | NAME "*" | NAME | NUMBER | STRING | "?" | ":" | !"}" !"]" OP
428 | mark = self._mark()
429 | cut = False
430 | if (
431 | (literal := self.expect("{"))
432 | and (cut := True)
433 | and (atoms := self.target_atoms(),)
434 | and (literal_1 := self.expect("}"))
435 | ):
436 | return "{" + (atoms or "") + "}"
437 | self._reset(mark)
438 | if cut:
439 | return None
440 | cut = False
441 | if (
442 | (literal := self.expect("["))
443 | and (cut := True)
444 | and (atoms := self.target_atoms(),)
445 | and (literal_1 := self.expect("]"))
446 | ):
447 | return "[" + (atoms or "") + "]"
448 | self._reset(mark)
449 | if cut:
450 | return None
451 | if (name := self.name()) and (literal := self.expect("*")):
452 | return name.string + "*"
453 | self._reset(mark)
454 | if name := self.name():
455 | return name.string
456 | self._reset(mark)
457 | if number := self.number():
458 | return number.string
459 | self._reset(mark)
460 | if string := self.string():
461 | return string.string
462 | self._reset(mark)
463 | if literal := self.expect("?"):
464 | return "?"
465 | self._reset(mark)
466 | if literal := self.expect(":"):
467 | return ":"
468 | self._reset(mark)
469 | if (
470 | self.negative_lookahead(self.expect, "}")
471 | and self.negative_lookahead(self.expect, "]")
472 | and (op := self.op())
473 | ):
474 | return op.string
475 | self._reset(mark)
476 | return None
477 |
478 | KEYWORDS = ()
479 | SOFT_KEYWORDS = ("memo",)
480 |
481 |
482 | if __name__ == "__main__":
483 | from pegen.parser import simple_parser_main
484 |
485 | simple_parser_main(GeneratedParser)
486 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/grammar_visualizer.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | from typing import Any, Callable, Iterator
4 |
5 | from pegen.build import build_parser
6 | from pegen.grammar import Grammar, Rule
7 |
8 | argparser = argparse.ArgumentParser(
9 | prog="pegen", description="Pretty print the AST for a given PEG grammar"
10 | )
11 | argparser.add_argument("filename", help="Grammar description")
12 |
13 |
14 | class ASTGrammarPrinter:
15 | def children(self, node: Rule) -> Iterator[Any]:
16 | for value in node:
17 | if isinstance(value, list):
18 | yield from value
19 | else:
20 | yield value
21 |
22 | def name(self, node: Rule) -> str:
23 | if not list(self.children(node)):
24 | return repr(node)
25 | return node.__class__.__name__
26 |
27 | def print_grammar_ast(self, grammar: Grammar, printer: Callable[..., None] = print) -> None:
28 | for rule in grammar.rules.values():
29 | printer(self.print_nodes_recursively(rule))
30 |
31 | def print_nodes_recursively(self, node: Rule, prefix: str = "", istail: bool = True) -> str:
32 |
33 | children = list(self.children(node))
34 | value = self.name(node)
35 |
36 | line = prefix + ("└──" if istail else "├──") + value + "\n"
37 | sufix = " " if istail else "│ "
38 |
39 | if not children:
40 | return line
41 |
42 | *children, last = children
43 | for child in children:
44 | line += self.print_nodes_recursively(child, prefix + sufix, False)
45 | line += self.print_nodes_recursively(last, prefix + sufix, True)
46 |
47 | return line
48 |
49 |
50 | def main() -> None:
51 | args = argparser.parse_args()
52 |
53 | try:
54 | grammar, parser, tokenizer = build_parser(args.filename)
55 | except Exception as err:
56 | print("ERROR: Failed to parse grammar file", file=sys.stderr)
57 | sys.exit(1)
58 |
59 | visitor = ASTGrammarPrinter()
60 | visitor.print_grammar_ast(grammar)
61 |
62 |
63 | if __name__ == "__main__":
64 | main()
65 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/metagrammar.gram:
--------------------------------------------------------------------------------
1 | @subheader """\
2 | from ast import literal_eval
3 |
4 | from pegen.grammar import (
5 | Alt,
6 | Cut,
7 | Forced,
8 | Gather,
9 | Group,
10 | Item,
11 | Lookahead,
12 | LookaheadOrCut,
13 | MetaTuple,
14 | MetaList,
15 | NameLeaf,
16 | NamedItem,
17 | NamedItemList,
18 | NegativeLookahead,
19 | Opt,
20 | Plain,
21 | PositiveLookahead,
22 | Repeat0,
23 | Repeat1,
24 | Rhs,
25 | Rule,
26 | RuleList,
27 | RuleName,
28 | Grammar,
29 | StringLeaf,
30 | )
31 | """
32 |
33 | start[Grammar]: grammar ENDMARKER { grammar }
34 |
35 | grammar[Grammar]:
36 | | metas rules { Grammar(rules, metas) }
37 | | rules { Grammar(rules, []) }
38 |
39 | metas[MetaList]:
40 | | meta metas { [meta] + metas }
41 | | meta { [meta] }
42 |
43 | meta[MetaTuple]:
44 | | "@" NAME NEWLINE { (name.string, None) }
45 | | "@" a=NAME b=NAME NEWLINE { (a.string, b.string) }
46 | | "@" NAME STRING NEWLINE { (name.string, literal_eval(string.string)) }
47 |
48 | rules[RuleList]:
49 | | rule rules { [rule] + rules }
50 | | rule { [rule] }
51 |
52 | rule[Rule]:
53 | | rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT {
54 | Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), memo=opt) }
55 | | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT {
56 | Rule(rulename[0], rulename[1], more_alts, memo=opt) }
57 | | rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) }
58 |
59 | rulename[RuleName]:
60 | | NAME annotation { (name.string, annotation) }
61 | | NAME { (name.string, None) }
62 |
63 | # In the future this may return something more complicated
64 | memoflag[str]:
65 | | '(' "memo" ')' { "memo" }
66 |
67 | alts[Rhs]:
68 | | alt "|" alts { Rhs([alt] + alts.alts)}
69 | | alt { Rhs([alt]) }
70 |
71 | more_alts[Rhs]:
72 | | "|" alts NEWLINE more_alts { Rhs(alts.alts + more_alts.alts) }
73 | | "|" alts NEWLINE { Rhs(alts.alts) }
74 |
75 | alt[Alt]:
76 | | items '$' action { Alt(items + [NamedItem(None, NameLeaf('ENDMARKER'))], action=action) }
77 | | items '$' { Alt(items + [NamedItem(None, NameLeaf('ENDMARKER'))], action=None) }
78 | | items action { Alt(items, action=action) }
79 | | items { Alt(items, action=None) }
80 |
81 | items[NamedItemList]:
82 | | named_item items { [named_item] + items }
83 | | named_item { [named_item] }
84 |
85 | named_item[NamedItem]:
86 | | NAME annotation '=' ~ item {NamedItem(name.string, item, annotation)}
87 | | NAME '=' ~ item {NamedItem(name.string, item)}
88 | | item {NamedItem(None, item)}
89 | | it=forced_atom {NamedItem(None, it)}
90 | | it=lookahead {NamedItem(None, it)}
91 |
92 | forced_atom[LookaheadOrCut]:
93 | | '&''&' ~ atom {Forced(atom)}
94 |
95 | lookahead[LookaheadOrCut]:
96 | | '&' ~ atom {PositiveLookahead(atom)}
97 | | '!' ~ atom {NegativeLookahead(atom)}
98 | | '~' {Cut()}
99 |
100 | item[Item]:
101 | | '[' ~ alts ']' {Opt(alts)}
102 | | atom '?' {Opt(atom)}
103 | | atom '*' {Repeat0(atom)}
104 | | atom '+' {Repeat1(atom)}
105 | | sep=atom '.' node=atom '+' {Gather(sep, node)}
106 | | atom {atom}
107 |
108 | atom[Plain]:
109 | | '(' ~ alts ')' {Group(alts)}
110 | | NAME {NameLeaf(name.string) }
111 | | STRING {StringLeaf(string.string)}
112 |
113 | # Mini-grammar for the actions and annotations
114 |
115 | action[str]: "{" ~ target_atoms "}" { target_atoms }
116 | annotation[str]: "[" ~ target_atoms "]" { target_atoms }
117 |
118 | target_atoms[str]:
119 | | target_atom target_atoms { target_atom + " " + target_atoms }
120 | | target_atom { target_atom }
121 |
122 | target_atom[str]:
123 | | "{" ~ atoms=target_atoms? "}" { "{" + (atoms or "") + "}" }
124 | | "[" ~ atoms=target_atoms? "]" { "[" + (atoms or "") + "]" }
125 | | NAME "*" { name.string + "*" }
126 | | NAME { name.string }
127 | | NUMBER { number.string }
128 | | STRING { string.string }
129 | | "?" { "?" }
130 | | ":" { ":" }
131 | | !"}" !"]" OP { op.string }
132 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/parser.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | import time
4 | import token
5 | import tokenize
6 | import traceback
7 | from abc import abstractmethod
8 | from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast
9 |
10 | from pegen.tokenizer import Mark, Tokenizer, exact_token_types
11 |
12 | T = TypeVar("T")
13 | P = TypeVar("P", bound="Parser")
14 | F = TypeVar("F", bound=Callable[..., Any])
15 |
16 |
17 | def logger(method: F) -> F:
18 | """For non-memoized functions that we want to be logged.
19 |
20 | (In practice this is only non-leader left-recursive functions.)
21 | """
22 | method_name = method.__name__
23 |
24 | def logger_wrapper(self: P, *args: object) -> T:
25 | if not self._verbose:
26 | return method(self, *args)
27 | argsr = ",".join(repr(arg) for arg in args)
28 | fill = " " * self._level
29 | print(f"{fill}{method_name}({argsr}) .... (looking at {self.showpeek()})")
30 | self._level += 1
31 | tree = method(self, *args)
32 | self._level -= 1
33 | print(f"{fill}... {method_name}({argsr}) --> {tree!s:.200}")
34 | return tree
35 |
36 | logger_wrapper.__wrapped__ = method # type: ignore
37 | return cast(F, logger_wrapper)
38 |
39 |
40 | def memoize(method: F) -> F:
41 | """Memoize a symbol method."""
42 | method_name = method.__name__
43 |
44 | def memoize_wrapper(self: P, *args: object) -> T:
45 | mark = self._mark()
46 | key = mark, method_name, args
47 | # Fast path: cache hit, and not verbose.
48 | if key in self._cache and not self._verbose:
49 | tree, endmark = self._cache[key]
50 | self._reset(endmark)
51 | return tree
52 | # Slow path: no cache hit, or verbose.
53 | verbose = self._verbose
54 | argsr = ",".join(repr(arg) for arg in args)
55 | fill = " " * self._level
56 | if key not in self._cache:
57 | if verbose:
58 | print(f"{fill}{method_name}({argsr}) ... (looking at {self.showpeek()})")
59 | self._level += 1
60 | tree = method(self, *args)
61 | self._level -= 1
62 | if verbose:
63 | print(f"{fill}... {method_name}({argsr}) -> {tree!s:.200}")
64 | endmark = self._mark()
65 | self._cache[key] = tree, endmark
66 | else:
67 | tree, endmark = self._cache[key]
68 | if verbose:
69 | print(f"{fill}{method_name}({argsr}) -> {tree!s:.200}")
70 | self._reset(endmark)
71 | return tree
72 |
73 | memoize_wrapper.__wrapped__ = method # type: ignore
74 | return cast(F, memoize_wrapper)
75 |
76 |
77 | def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Optional[T]]:
78 | """Memoize a left-recursive symbol method."""
79 | method_name = method.__name__
80 |
81 | def memoize_left_rec_wrapper(self: P) -> Optional[T]:
82 | mark = self._mark()
83 | key = mark, method_name, ()
84 | # Fast path: cache hit, and not verbose.
85 | if key in self._cache and not self._verbose:
86 | tree, endmark = self._cache[key]
87 | self._reset(endmark)
88 | return tree
89 | # Slow path: no cache hit, or verbose.
90 | verbose = self._verbose
91 | fill = " " * self._level
92 | if key not in self._cache:
93 | if verbose:
94 | print(f"{fill}{method_name} ... (looking at {self.showpeek()})")
95 | self._level += 1
96 |
97 | # For left-recursive rules we manipulate the cache and
98 | # loop until the rule shows no progress, then pick the
99 | # previous result. For an explanation why this works, see
100 | # https://github.com/PhilippeSigaud/Pegged/wiki/Left-Recursion
101 | # (But we use the memoization cache instead of a static
102 | # variable; perhaps this is similar to a paper by Warth et al.
103 | # (http://web.cs.ucla.edu/~todd/research/pub.php?id=pepm08).
104 |
105 | # Prime the cache with a failure.
106 | self._cache[key] = None, mark
107 | lastresult, lastmark = None, mark
108 | depth = 0
109 | if verbose:
110 | print(f"{fill}Recursive {method_name} at {mark} depth {depth}")
111 |
112 | while True:
113 | self._reset(mark)
114 | self.in_recursive_rule += 1
115 | try:
116 | result = method(self)
117 | finally:
118 | self.in_recursive_rule -= 1
119 | endmark = self._mark()
120 | depth += 1
121 | if verbose:
122 | print(
123 | f"{fill}Recursive {method_name} at {mark} depth {depth}: {result!s:.200} to {endmark}"
124 | )
125 | if not result:
126 | if verbose:
127 | print(f"{fill}Fail with {lastresult!s:.200} to {lastmark}")
128 | break
129 | if endmark <= lastmark:
130 | if verbose:
131 | print(f"{fill}Bailing with {lastresult!s:.200} to {lastmark}")
132 | break
133 | self._cache[key] = lastresult, lastmark = result, endmark
134 |
135 | self._reset(lastmark)
136 | tree = lastresult
137 |
138 | self._level -= 1
139 | if verbose:
140 | print(f"{fill}{method_name}() -> {tree!s:.200} [cached]")
141 | if tree:
142 | endmark = self._mark()
143 | else:
144 | endmark = mark
145 | self._reset(endmark)
146 | self._cache[key] = tree, endmark
147 | else:
148 | tree, endmark = self._cache[key]
149 | if verbose:
150 | print(f"{fill}{method_name}() -> {tree!s:.200} [fresh]")
151 | if tree:
152 | self._reset(endmark)
153 | return tree
154 |
155 | memoize_left_rec_wrapper.__wrapped__ = method # type: ignore
156 | return memoize_left_rec_wrapper
157 |
158 |
159 | class Parser:
160 | """Parsing base class."""
161 |
162 | KEYWORDS: ClassVar[Tuple[str, ...]]
163 |
164 | SOFT_KEYWORDS: ClassVar[Tuple[str, ...]]
165 |
166 | def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
167 | self._tokenizer = tokenizer
168 | self._verbose = verbose
169 | self._level = 0
170 | self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {}
171 | # Integer tracking wether we are in a left recursive rule or not. Can be useful
172 | # for error reporting.
173 | self.in_recursive_rule = 0
174 | # Pass through common tokenizer methods.
175 | self._mark = self._tokenizer.mark
176 | self._reset = self._tokenizer.reset
177 |
178 | @abstractmethod
179 | def start(self) -> Any:
180 | pass
181 |
182 | def showpeek(self) -> str:
183 | tok = self._tokenizer.peek()
184 | return f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
185 |
186 | @memoize
187 | def name(self) -> Optional[tokenize.TokenInfo]:
188 | tok = self._tokenizer.peek()
189 | if tok.type == token.NAME and tok.string not in self.KEYWORDS:
190 | return self._tokenizer.getnext()
191 | return None
192 |
193 | @memoize
194 | def number(self) -> Optional[tokenize.TokenInfo]:
195 | tok = self._tokenizer.peek()
196 | if tok.type == token.NUMBER:
197 | return self._tokenizer.getnext()
198 | return None
199 |
200 | @memoize
201 | def string(self) -> Optional[tokenize.TokenInfo]:
202 | tok = self._tokenizer.peek()
203 | if tok.type == token.STRING:
204 | return self._tokenizer.getnext()
205 | return None
206 |
207 | @memoize
208 | def op(self) -> Optional[tokenize.TokenInfo]:
209 | tok = self._tokenizer.peek()
210 | if tok.type == token.OP:
211 | return self._tokenizer.getnext()
212 | return None
213 |
214 | @memoize
215 | def type_comment(self) -> Optional[tokenize.TokenInfo]:
216 | tok = self._tokenizer.peek()
217 | if tok.type == token.TYPE_COMMENT:
218 | return self._tokenizer.getnext()
219 | return None
220 |
221 | @memoize
222 | def soft_keyword(self) -> Optional[tokenize.TokenInfo]:
223 | tok = self._tokenizer.peek()
224 | if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS:
225 | return self._tokenizer.getnext()
226 | return None
227 |
228 | @memoize
229 | def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
230 | tok = self._tokenizer.peek()
231 | if tok.string == type:
232 | return self._tokenizer.getnext()
233 | if type in exact_token_types:
234 | if tok.type == exact_token_types[type]:
235 | return self._tokenizer.getnext()
236 | if type in token.__dict__:
237 | if tok.type == token.__dict__[type]:
238 | return self._tokenizer.getnext()
239 | if tok.type == token.OP and tok.string == type:
240 | return self._tokenizer.getnext()
241 | return None
242 |
243 | def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]:
244 | if res is None:
245 | raise self.make_syntax_error(f"expected {expectation}")
246 | return res
247 |
248 | def positive_lookahead(self, func: Callable[..., T], *args: object) -> T:
249 | mark = self._mark()
250 | ok = func(*args)
251 | self._reset(mark)
252 | return ok
253 |
254 | def negative_lookahead(self, func: Callable[..., object], *args: object) -> bool:
255 | mark = self._mark()
256 | ok = func(*args)
257 | self._reset(mark)
258 | return not ok
259 |
260 | def make_syntax_error(self, message: str, filename: str = "") -> SyntaxError:
261 | tok = self._tokenizer.diagnose()
262 | return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line))
263 |
264 |
265 | def simple_parser_main(parser_class: Type[Parser]) -> None:
266 | argparser = argparse.ArgumentParser()
267 | argparser.add_argument(
268 | "-v",
269 | "--verbose",
270 | action="count",
271 | default=0,
272 | help="Print timing stats; repeat for more debug output",
273 | )
274 | argparser.add_argument(
275 | "-q", "--quiet", action="store_true", help="Don't print the parsed program"
276 | )
277 | argparser.add_argument("filename", help="Input file ('-' to use stdin)")
278 |
279 | args = argparser.parse_args()
280 | verbose = args.verbose
281 | verbose_tokenizer = verbose >= 3
282 | verbose_parser = verbose == 2 or verbose >= 4
283 |
284 | t0 = time.time()
285 |
286 | filename = args.filename
287 | if filename == "" or filename == "-":
288 | filename = ""
289 | file = sys.stdin
290 | else:
291 | file = open(args.filename)
292 | try:
293 | tokengen = tokenize.generate_tokens(file.readline)
294 | tokenizer = Tokenizer(tokengen, verbose=verbose_tokenizer)
295 | parser = parser_class(tokenizer, verbose=verbose_parser)
296 | tree = parser.start()
297 | try:
298 | if file.isatty():
299 | endpos = 0
300 | else:
301 | endpos = file.tell()
302 | except IOError:
303 | endpos = 0
304 | finally:
305 | if file is not sys.stdin:
306 | file.close()
307 |
308 | t1 = time.time()
309 |
310 | if not tree:
311 | err = parser.make_syntax_error(filename)
312 | traceback.print_exception(err.__class__, err, None)
313 | sys.exit(1)
314 |
315 | if not args.quiet:
316 | print(tree)
317 |
318 | if verbose:
319 | dt = t1 - t0
320 | diag = tokenizer.diagnose()
321 | nlines = diag.end[0]
322 | if diag.type == token.ENDMARKER:
323 | nlines -= 1
324 | print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
325 | if endpos:
326 | print(f" ({endpos} bytes)", end="")
327 | if dt:
328 | print(f"; {nlines / dt:.0f} lines/sec")
329 | else:
330 | print()
331 | print("Caches sizes:")
332 | print(f" token array : {len(tokenizer._tokens):10}")
333 | print(f" cache : {len(parser._cache):10}")
334 | ## print_memstats()
335 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/parser_generator.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | from abc import abstractmethod
3 | from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple
4 |
5 | from pegen import sccutils
6 | from pegen.grammar import (
7 | Alt,
8 | Gather,
9 | Grammar,
10 | GrammarError,
11 | GrammarVisitor,
12 | NamedItem,
13 | NameLeaf,
14 | Plain,
15 | Rhs,
16 | Rule,
17 | )
18 |
19 |
20 | class RuleCheckingVisitor(GrammarVisitor):
21 | def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
22 | self.rules = rules
23 | self.tokens = tokens
24 |
25 | def visit_NameLeaf(self, node: NameLeaf) -> None:
26 | if node.value not in self.rules and node.value not in self.tokens:
27 | # TODO: Add line/col info to (leaf) nodes
28 | raise GrammarError(f"Dangling reference to rule {node.value!r}")
29 |
30 | def visit_NamedItem(self, node: NamedItem) -> None:
31 | if node.name and node.name.startswith("_"):
32 | raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'")
33 | self.visit(node.item)
34 |
35 |
36 | class ParserGenerator:
37 |
38 | callmakervisitor: GrammarVisitor
39 |
40 | def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
41 | self.grammar = grammar
42 | self.tokens = tokens
43 | self.rules = grammar.rules
44 | self.validate_rule_names()
45 | if "trailer" not in grammar.metas and "start" not in self.rules:
46 | raise GrammarError("Grammar without a trailer must have a 'start' rule")
47 | checker = RuleCheckingVisitor(self.rules, self.tokens)
48 | for rule in self.rules.values():
49 | checker.visit(rule)
50 | self.file = file
51 | self.level = 0
52 | compute_nullables(self.rules)
53 | self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
54 | self.todo = self.rules.copy() # Rules to generate
55 | self.counter = 0 # For name_rule()/name_loop()
56 | self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules
57 | self._local_variable_stack: List[List[str]] = []
58 |
59 | def validate_rule_names(self) -> None:
60 | for rule in self.rules:
61 | if rule.startswith("_"):
62 | raise GrammarError(f"Rule names cannot start with underscore: '{rule}'")
63 |
64 | @contextlib.contextmanager
65 | def local_variable_context(self) -> Iterator[None]:
66 | self._local_variable_stack.append([])
67 | yield
68 | self._local_variable_stack.pop()
69 |
70 | @property
71 | def local_variable_names(self) -> List[str]:
72 | return self._local_variable_stack[-1]
73 |
74 | @abstractmethod
75 | def generate(self, filename: str) -> None:
76 | raise NotImplementedError
77 |
78 | @contextlib.contextmanager
79 | def indent(self) -> Iterator[None]:
80 | self.level += 1
81 | try:
82 | yield
83 | finally:
84 | self.level -= 1
85 |
86 | def print(self, *args: object) -> None:
87 | if not args:
88 | print(file=self.file)
89 | else:
90 | print(" " * self.level, end="", file=self.file)
91 | print(*args, file=self.file)
92 |
93 | def printblock(self, lines: str) -> None:
94 | for line in lines.splitlines():
95 | self.print(line)
96 |
97 | def collect_todo(self) -> None:
98 | done: Set[str] = set()
99 | while True:
100 | alltodo = list(self.todo)
101 | self.all_rules.update(self.todo)
102 | todo = [i for i in alltodo if i not in done]
103 | if not todo:
104 | break
105 | for rulename in todo:
106 | self.todo[rulename].collect_todo(self)
107 | done = set(alltodo)
108 |
109 | def name_node(self, rhs: Rhs) -> str:
110 | self.counter += 1
111 | name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
112 | self.todo[name] = Rule(name, None, rhs)
113 | return name
114 |
115 | def name_loop(self, node: Plain, is_repeat1: bool) -> str:
116 | self.counter += 1
117 | if is_repeat1:
118 | prefix = "_loop1_"
119 | else:
120 | prefix = "_loop0_"
121 | name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name.
122 | self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
123 | return name
124 |
125 | def name_gather(self, node: Gather) -> str:
126 | self.counter += 1
127 | name = f"_gather_{self.counter}"
128 | self.counter += 1
129 | extra_function_name = f"_loop0_{self.counter}"
130 | extra_function_alt = Alt(
131 | [NamedItem(None, node.separator), NamedItem("elem", node.node)],
132 | action="elem",
133 | )
134 | self.todo[extra_function_name] = Rule(
135 | extra_function_name,
136 | None,
137 | Rhs([extra_function_alt]),
138 | )
139 | alt = Alt(
140 | [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
141 | )
142 | self.todo[name] = Rule(
143 | name,
144 | None,
145 | Rhs([alt]),
146 | )
147 | return name
148 |
149 | def dedupe(self, name: str) -> str:
150 | origname = name
151 | counter = 0
152 | while name in self.local_variable_names:
153 | counter += 1
154 | name = f"{origname}_{counter}"
155 | self.local_variable_names.append(name)
156 | return name
157 |
158 |
159 | def compute_nullables(rules: Dict[str, Rule]) -> None:
160 | """Compute which rules in a grammar are nullable.
161 |
162 | Thanks to TatSu (tatsu/leftrec.py) for inspiration.
163 | """
164 | for rule in rules.values():
165 | rule.nullable_visit(rules)
166 |
167 |
168 | def compute_left_recursives(
169 | rules: Dict[str, Rule]
170 | ) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
171 | graph = make_first_graph(rules)
172 | sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
173 | for scc in sccs:
174 | if len(scc) > 1:
175 | for name in scc:
176 | rules[name].left_recursive = True
177 | # Try to find a leader such that all cycles go through it.
178 | leaders = set(scc)
179 | for start in scc:
180 | for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
181 | # print("Cycle:", " -> ".join(cycle))
182 | leaders -= scc - set(cycle)
183 | if not leaders:
184 | raise ValueError(
185 | f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
186 | )
187 | # print("Leaders:", leaders)
188 | leader = min(leaders) # Pick an arbitrary leader from the candidates.
189 | rules[leader].leader = True
190 | else:
191 | name = min(scc) # The only element.
192 | if name in graph[name]:
193 | rules[name].left_recursive = True
194 | rules[name].leader = True
195 | return graph, sccs
196 |
197 |
198 | def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
199 | """Compute the graph of left-invocations.
200 |
201 | There's an edge from A to B if A may invoke B at its initial
202 | position.
203 |
204 | Note that this requires the nullable flags to have been computed.
205 | """
206 | graph = {}
207 | vertices: Set[str] = set()
208 | for rulename, rhs in rules.items():
209 | graph[rulename] = names = rhs.initial_names()
210 | vertices |= names
211 | for vertex in vertices:
212 | graph.setdefault(vertex, set())
213 | return graph
214 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/python_generator.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import re
3 | import token
4 | from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple
5 |
6 | from pegen import grammar
7 | from pegen.grammar import (
8 | Alt,
9 | Cut,
10 | Forced,
11 | Gather,
12 | GrammarVisitor,
13 | Group,
14 | Lookahead,
15 | NamedItem,
16 | NameLeaf,
17 | NegativeLookahead,
18 | Opt,
19 | PositiveLookahead,
20 | Repeat0,
21 | Repeat1,
22 | Rhs,
23 | Rule,
24 | StringLeaf,
25 | )
26 | from pegen.parser_generator import ParserGenerator
27 |
28 | MODULE_PREFIX = """\
29 | #!/usr/bin/env python3.8
30 | # @generated by pegen from {filename}
31 |
32 | import ast
33 | import sys
34 | import tokenize
35 |
36 | from typing import Any, Optional
37 |
38 | from pegen.parser import memoize, memoize_left_rec, logger, Parser
39 |
40 | """
41 | MODULE_SUFFIX = """
42 |
43 | if __name__ == '__main__':
44 | from pegen.parser import simple_parser_main
45 | simple_parser_main({class_name})
46 | """
47 |
48 |
49 | class InvalidNodeVisitor(GrammarVisitor):
50 | def visit_NameLeaf(self, node: NameLeaf) -> bool:
51 | name = node.value
52 | return name.startswith("invalid")
53 |
54 | def visit_StringLeaf(self, node: StringLeaf) -> bool:
55 | return False
56 |
57 | def visit_NamedItem(self, node: NamedItem) -> bool:
58 | return self.visit(node.item)
59 |
60 | def visit_Rhs(self, node: Rhs) -> bool:
61 | return any(self.visit(alt) for alt in node.alts)
62 |
63 | def visit_Alt(self, node: Alt) -> bool:
64 | return any(self.visit(item) for item in node.items)
65 |
66 | def lookahead_call_helper(self, node: Lookahead) -> bool:
67 | return self.visit(node.node)
68 |
69 | def visit_PositiveLookahead(self, node: PositiveLookahead) -> bool:
70 | return self.lookahead_call_helper(node)
71 |
72 | def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool:
73 | return self.lookahead_call_helper(node)
74 |
75 | def visit_Opt(self, node: Opt) -> bool:
76 | return self.visit(node.node)
77 |
78 | def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:
79 | return self.visit(node.node)
80 |
81 | def visit_Gather(self, node: Gather) -> Tuple[str, str]:
82 | return self.visit(node.node)
83 |
84 | def visit_Group(self, node: Group) -> bool:
85 | return self.visit(node.rhs)
86 |
87 | def visit_Cut(self, node: Cut) -> bool:
88 | return False
89 |
90 | def visit_Forced(self, node: Forced) -> bool:
91 | return self.visit(node.node)
92 |
93 |
94 | class PythonCallMakerVisitor(GrammarVisitor):
95 | def __init__(self, parser_generator: ParserGenerator):
96 | self.gen = parser_generator
97 | self.cache: Dict[Any, Any] = {}
98 | self.keywords: Set[str] = set()
99 | self.soft_keywords: Set[str] = set()
100 |
101 | def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
102 | name = node.value
103 | if name == "SOFT_KEYWORD":
104 | return "soft_keyword", "self.soft_keyword()"
105 | if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT"):
106 | name = name.lower()
107 | return name, f"self.{name}()"
108 | if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
109 | # Avoid using names that can be Python keywords
110 | return "_" + name.lower(), f"self.expect({name!r})"
111 | return name, f"self.{name}()"
112 |
113 | def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
114 | val = ast.literal_eval(node.value)
115 | if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
116 | if node.value.endswith("'"):
117 | self.keywords.add(val)
118 | else:
119 | self.soft_keywords.add(val)
120 | return "literal", f"self.expect({node.value})"
121 |
122 | def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
123 | if node in self.cache:
124 | return self.cache[node]
125 | if len(node.alts) == 1 and len(node.alts[0].items) == 1:
126 | self.cache[node] = self.visit(node.alts[0].items[0])
127 | else:
128 | name = self.gen.name_node(node)
129 | self.cache[node] = name, f"self.{name}()"
130 | return self.cache[node]
131 |
132 | def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
133 | name, call = self.visit(node.item)
134 | if node.name:
135 | name = node.name
136 | return name, call
137 |
138 | def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]:
139 | name, call = self.visit(node.node)
140 | head, tail = call.split("(", 1)
141 | assert tail[-1] == ")"
142 | tail = tail[:-1]
143 | return head, tail
144 |
145 | def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
146 | head, tail = self.lookahead_call_helper(node)
147 | return None, f"self.positive_lookahead({head}, {tail})"
148 |
149 | def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
150 | head, tail = self.lookahead_call_helper(node)
151 | return None, f"self.negative_lookahead({head}, {tail})"
152 |
153 | def visit_Opt(self, node: Opt) -> Tuple[str, str]:
154 | name, call = self.visit(node.node)
155 | # Note trailing comma (the call may already have one comma
156 | # at the end, for example when rules have both repeat0 and optional
157 | # markers, e.g: [rule*])
158 | if call.endswith(","):
159 | return "opt", call
160 | else:
161 | return "opt", f"{call},"
162 |
163 | def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
164 | if node in self.cache:
165 | return self.cache[node]
166 | name = self.gen.name_loop(node.node, False)
167 | self.cache[node] = name, f"self.{name}()," # Also a trailing comma!
168 | return self.cache[node]
169 |
170 | def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
171 | if node in self.cache:
172 | return self.cache[node]
173 | name = self.gen.name_loop(node.node, True)
174 | self.cache[node] = name, f"self.{name}()" # But no trailing comma here!
175 | return self.cache[node]
176 |
177 | def visit_Gather(self, node: Gather) -> Tuple[str, str]:
178 | if node in self.cache:
179 | return self.cache[node]
180 | name = self.gen.name_gather(node)
181 | self.cache[node] = name, f"self.{name}()" # No trailing comma here either!
182 | return self.cache[node]
183 |
184 | def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
185 | return self.visit(node.rhs)
186 |
187 | def visit_Cut(self, node: Cut) -> Tuple[str, str]:
188 | return "cut", "True"
189 |
190 | def visit_Forced(self, node: Forced) -> Tuple[str, str]:
191 | if isinstance(node.node, Group):
192 | _, val = self.visit(node.node.rhs)
193 | return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
194 | else:
195 | return (
196 | "forced",
197 | f"self.expect_forced(self.expect({node.node.value}), {node.node.value!r})",
198 | )
199 |
200 |
201 | class PythonParserGenerator(ParserGenerator, GrammarVisitor):
202 | def __init__(
203 | self,
204 | grammar: grammar.Grammar,
205 | file: Optional[IO[Text]],
206 | tokens: Set[str] = set(token.tok_name.values()),
207 | location_formatting: Optional[str] = None,
208 | unreachable_formatting: Optional[str] = None,
209 | ):
210 | tokens.add("SOFT_KEYWORD")
211 | super().__init__(grammar, tokens, file)
212 | self.callmakervisitor: PythonCallMakerVisitor = PythonCallMakerVisitor(self)
213 | self.invalidvisitor: InvalidNodeVisitor = InvalidNodeVisitor()
214 | self.unreachable_formatting = unreachable_formatting or "None # pragma: no cover"
215 | self.location_formatting = (
216 | location_formatting
217 | or "lineno=start_lineno, col_offset=start_col_offset, "
218 | "end_lineno=end_lineno, end_col_offset=end_col_offset"
219 | )
220 |
221 | def generate(self, filename: str) -> None:
222 | header = self.grammar.metas.get("header", MODULE_PREFIX)
223 | if header is not None:
224 | self.print(header.rstrip("\n").format(filename=filename))
225 | subheader = self.grammar.metas.get("subheader", "")
226 | if subheader:
227 | self.print(subheader)
228 | cls_name = self.grammar.metas.get("class", "GeneratedParser")
229 | self.print("# Keywords and soft keywords are listed at the end of the parser definition.")
230 | self.print(f"class {cls_name}(Parser):")
231 | while self.todo:
232 | for rulename, rule in list(self.todo.items()):
233 | del self.todo[rulename]
234 | self.print()
235 | with self.indent():
236 | self.visit(rule)
237 |
238 | self.print()
239 | with self.indent():
240 | self.print(f"KEYWORDS = {tuple(sorted(self.callmakervisitor.keywords))}")
241 | self.print(f"SOFT_KEYWORDS = {tuple(sorted(self.callmakervisitor.soft_keywords))}")
242 |
243 | trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
244 | if trailer is not None:
245 | self.print(trailer.rstrip("\n"))
246 |
247 | def alts_uses_locations(self, alts: Sequence[Alt]) -> bool:
248 | for alt in alts:
249 | if alt.action and "LOCATIONS" in alt.action:
250 | return True
251 | for n in alt.items:
252 | if isinstance(n.item, Group) and self.alts_uses_locations(n.item.rhs.alts):
253 | return True
254 | return False
255 |
256 | def visit_Rule(self, node: Rule) -> None:
257 | is_loop = node.is_loop()
258 | is_gather = node.is_gather()
259 | rhs = node.flatten()
260 | if node.left_recursive:
261 | if node.leader:
262 | self.print("@memoize_left_rec")
263 | else:
264 | # Non-leader rules in a cycle are not memoized,
265 | # but they must still be logged.
266 | self.print("@logger")
267 | else:
268 | self.print("@memoize")
269 | node_type = node.type or "Any"
270 | self.print(f"def {node.name}(self) -> Optional[{node_type}]:")
271 | with self.indent():
272 | self.print(f"# {node.name}: {rhs}")
273 | if node.nullable:
274 | self.print(f"# nullable={node.nullable}")
275 | self.print("mark = self._mark()")
276 | if self.alts_uses_locations(node.rhs.alts):
277 | self.print("tok = self._tokenizer.peek()")
278 | self.print("start_lineno, start_col_offset = tok.start")
279 | if is_loop:
280 | self.print("children = []")
281 | self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
282 | if is_loop:
283 | self.print("return children")
284 | else:
285 | self.print("return None")
286 |
287 | def visit_NamedItem(self, node: NamedItem) -> None:
288 | name, call = self.callmakervisitor.visit(node.item)
289 | if node.name:
290 | name = node.name
291 | if not name:
292 | self.print(call)
293 | else:
294 | if name != "cut":
295 | name = self.dedupe(name)
296 | self.print(f"({name} := {call})")
297 |
298 | def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
299 | if is_loop:
300 | assert len(node.alts) == 1
301 | for alt in node.alts:
302 | self.visit(alt, is_loop=is_loop, is_gather=is_gather)
303 |
304 | def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
305 | has_cut = any(isinstance(item.item, Cut) for item in node.items)
306 | with self.local_variable_context():
307 | if has_cut:
308 | self.print("cut = False")
309 | if is_loop:
310 | self.print("while (")
311 | else:
312 | self.print("if (")
313 | with self.indent():
314 | first = True
315 | for item in node.items:
316 | if first:
317 | first = False
318 | else:
319 | self.print("and")
320 | self.visit(item)
321 | if is_gather:
322 | self.print("is not None")
323 |
324 | self.print("):")
325 | with self.indent():
326 | action = node.action
327 | if not action:
328 | if is_gather:
329 | assert len(self.local_variable_names) == 2
330 | action = (
331 | f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
332 | )
333 | else:
334 | if self.invalidvisitor.visit(node):
335 | action = "UNREACHABLE"
336 | elif len(self.local_variable_names) == 1:
337 | action = f"{self.local_variable_names[0]}"
338 | else:
339 | action = f"[{', '.join(self.local_variable_names)}]"
340 | elif "LOCATIONS" in action:
341 | self.print("tok = self._tokenizer.get_last_non_whitespace_token()")
342 | self.print("end_lineno, end_col_offset = tok.end")
343 | action = action.replace("LOCATIONS", self.location_formatting)
344 |
345 | if is_loop:
346 | self.print(f"children.append({action})")
347 | self.print(f"mark = self._mark()")
348 | else:
349 | if "UNREACHABLE" in action:
350 | action = action.replace("UNREACHABLE", self.unreachable_formatting)
351 | self.print(f"return {action}")
352 |
353 | self.print("self._reset(mark)")
354 | # Skip remaining alternatives if a cut was reached.
355 | if has_cut:
356 | self.print("if cut: return None")
357 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/sccutils.py:
--------------------------------------------------------------------------------
1 | # Adapted from mypy (mypy/build.py) under the MIT license.
2 |
3 | from typing import *
4 |
5 |
6 | def strongly_connected_components(
7 | vertices: AbstractSet[str], edges: Dict[str, AbstractSet[str]]
8 | ) -> Iterator[AbstractSet[str]]:
9 | """Compute Strongly Connected Components of a directed graph.
10 |
11 | Args:
12 | vertices: the labels for the vertices
13 | edges: for each vertex, gives the target vertices of its outgoing edges
14 |
15 | Returns:
16 | An iterator yielding strongly connected components, each
17 | represented as a set of vertices. Each input vertex will occur
18 | exactly once; vertices not part of a SCC are returned as
19 | singleton sets.
20 |
21 | From http://code.activestate.com/recipes/578507/.
22 | """
23 | identified: Set[str] = set()
24 | stack: List[str] = []
25 | index: Dict[str, int] = {}
26 | boundaries: List[int] = []
27 |
28 | def dfs(v: str) -> Iterator[Set[str]]:
29 | index[v] = len(stack)
30 | stack.append(v)
31 | boundaries.append(index[v])
32 |
33 | for w in edges[v]:
34 | if w not in index:
35 | yield from dfs(w)
36 | elif w not in identified:
37 | while index[w] < boundaries[-1]:
38 | boundaries.pop()
39 |
40 | if boundaries[-1] == index[v]:
41 | boundaries.pop()
42 | scc = set(stack[index[v] :])
43 | del stack[index[v] :]
44 | identified.update(scc)
45 | yield scc
46 |
47 | for v in vertices:
48 | if v not in index:
49 | yield from dfs(v)
50 |
51 |
52 | def topsort(
53 | data: Dict[AbstractSet[str], Set[AbstractSet[str]]]
54 | ) -> Iterable[AbstractSet[AbstractSet[str]]]:
55 | """Topological sort.
56 |
57 | Args:
58 | data: A map from SCCs (represented as frozen sets of strings) to
59 | sets of SCCs, its dependencies. NOTE: This data structure
60 | is modified in place -- for normalization purposes,
61 | self-dependencies are removed and entries representing
62 | orphans are added.
63 |
64 | Returns:
65 | An iterator yielding sets of SCCs that have an equivalent
66 | ordering. NOTE: The algorithm doesn't care about the internal
67 | structure of SCCs.
68 |
69 | Example:
70 | Suppose the input has the following structure:
71 |
72 | {A: {B, C}, B: {D}, C: {D}}
73 |
74 | This is normalized to:
75 |
76 | {A: {B, C}, B: {D}, C: {D}, D: {}}
77 |
78 | The algorithm will yield the following values:
79 |
80 | {D}
81 | {B, C}
82 | {A}
83 |
84 | From http://code.activestate.com/recipes/577413/.
85 | """
86 | # TODO: Use a faster algorithm?
87 | for k, v in data.items():
88 | v.discard(k) # Ignore self dependencies.
89 | for item in set.union(*data.values()) - set(data.keys()):
90 | data[item] = set()
91 | while True:
92 | ready = {item for item, dep in data.items() if not dep}
93 | if not ready:
94 | break
95 | yield ready
96 | data = {item: (dep - ready) for item, dep in data.items() if item not in ready}
97 | assert not data, "A cyclic dependency exists amongst %r" % data
98 |
99 |
100 | def find_cycles_in_scc(
101 | graph: Dict[str, AbstractSet[str]], scc: AbstractSet[str], start: str
102 | ) -> Iterable[List[str]]:
103 | """Find cycles in SCC emanating from start.
104 |
105 | Yields lists of the form ['A', 'B', 'C', 'A'], which means there's
106 | a path from A -> B -> C -> A. The first item is always the start
107 | argument, but the last item may be another element, e.g. ['A',
108 | 'B', 'C', 'B'] means there's a path from A to B and there's a
109 | cycle from B to C and back.
110 | """
111 | # Basic input checks.
112 | assert start in scc, (start, scc)
113 | assert scc <= graph.keys(), scc - graph.keys()
114 |
115 | # Reduce the graph to nodes in the SCC.
116 | graph = {src: {dst for dst in dsts if dst in scc} for src, dsts in graph.items() if src in scc}
117 | assert start in graph
118 |
119 | # Recursive helper that yields cycles.
120 | def dfs(node: str, path: List[str]) -> Iterator[List[str]]:
121 | if node in path:
122 | yield path + [node]
123 | return
124 | path = path + [node] # TODO: Make this not quadratic.
125 | for child in graph[node]:
126 | yield from dfs(child, path)
127 |
128 | yield from dfs(start, [])
129 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | Pegen live demo
11 |
12 |
13 |
14 |
15 |
16 |
40 |
41 |
42 |
43 |
44 |
45 | PEGEN LIVE DEMO
46 |
47 |
48 |
49 |
50 |
54 |
55 |
56 |
57 |
58 |
81 |
82 |
83 | {% if output.strip() != "" %}
84 | {{output}}
85 | {% endif %}
86 |
87 |
88 |
89 |
90 |
91 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/tokenizer.py:
--------------------------------------------------------------------------------
1 | import token
2 | import tokenize
3 | from typing import Dict, Iterator, List
4 |
5 | Mark = int # NewType('Mark', int)
6 |
7 | exact_token_types = token.EXACT_TOKEN_TYPES
8 |
9 |
10 | def shorttok(tok: tokenize.TokenInfo) -> str:
11 | return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
12 |
13 |
14 | class Tokenizer:
15 | """Caching wrapper for the tokenize module.
16 |
17 | This is pretty tied to Python's syntax.
18 | """
19 |
20 | _tokens: List[tokenize.TokenInfo]
21 |
22 | def __init__(
23 | self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
24 | ):
25 | self._tokengen = tokengen
26 | self._tokens = []
27 | self._index = 0
28 | self._verbose = verbose
29 | self._lines: Dict[int, str] = {}
30 | self._path = path
31 | if verbose:
32 | self.report(False, False)
33 |
34 | def getnext(self) -> tokenize.TokenInfo:
35 | """Return the next token and updates the index."""
36 | cached = not self._index == len(self._tokens)
37 | tok = self.peek()
38 | self._index += 1
39 | if self._verbose:
40 | self.report(cached, False)
41 | return tok
42 |
43 | def peek(self) -> tokenize.TokenInfo:
44 | """Return the next token *without* updating the index."""
45 | while self._index == len(self._tokens):
46 | tok = next(self._tokengen)
47 | if tok.type in (tokenize.NL, tokenize.COMMENT):
48 | continue
49 | if tok.type == token.ERRORTOKEN and tok.string.isspace():
50 | continue
51 | if (
52 | tok.type == token.NEWLINE
53 | and self._tokens
54 | and self._tokens[-1].type == token.NEWLINE
55 | ):
56 | continue
57 | self._tokens.append(tok)
58 | if not self._path:
59 | self._lines[tok.start[0]] = tok.line
60 | return self._tokens[self._index]
61 |
62 | def diagnose(self) -> tokenize.TokenInfo:
63 | if not self._tokens:
64 | self.getnext()
65 | return self._tokens[-1]
66 |
67 | def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
68 | for tok in reversed(self._tokens[: self._index]):
69 | if tok.type != tokenize.ENDMARKER and (
70 | tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
71 | ):
72 | break
73 | return tok
74 |
75 | def get_lines(self, line_numbers: List[int]) -> List[str]:
76 | """Retrieve source lines corresponding to line numbers."""
77 | if self._lines:
78 | lines = self._lines
79 | else:
80 | n = len(line_numbers)
81 | lines = {}
82 | count = 0
83 | seen = 0
84 | with open(self._path) as f:
85 | for l in f:
86 | count += 1
87 | if count in line_numbers:
88 | seen += 1
89 | lines[count] = l
90 | if seen == n:
91 | break
92 |
93 | return [lines[n] for n in line_numbers]
94 |
95 | def mark(self) -> Mark:
96 | return self._index
97 |
98 | def reset(self, index: Mark) -> None:
99 | if index == self._index:
100 | return
101 | assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
102 | old_index = self._index
103 | self._index = index
104 | if self._verbose:
105 | self.report(True, index < old_index)
106 |
107 | def report(self, cached: bool, back: bool) -> None:
108 | if back:
109 | fill = "-" * self._index + "-"
110 | elif cached:
111 | fill = "-" * self._index + ">"
112 | else:
113 | fill = "-" * self._index + "*"
114 | if self._index == 0:
115 | print(f"{fill} (Bof)")
116 | else:
117 | tok = self._tokens[self._index - 1]
118 | print(f"{fill} {shorttok(tok)}")
119 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/validator.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from pegen import grammar
4 | from pegen.grammar import (
5 | Alt,
6 | GrammarVisitor,
7 | Rule,
8 | Rhs,
9 | )
10 |
11 |
12 | class ValidationError(Exception):
13 | pass
14 |
15 |
16 | class GrammarValidator(GrammarVisitor):
17 | def __init__(self, grammar: grammar.Grammar) -> None:
18 | self.grammar = grammar
19 | self.rulename: Optional[str] = None
20 |
21 | def validate_rule(self, rulename: str, node: Rule) -> None:
22 | self.rulename = rulename
23 | self.visit(node)
24 | self.rulename = None
25 |
26 |
27 | class SubRuleValidator(GrammarValidator):
28 | def visit_Rhs(self, node: Rhs) -> None:
29 | for index, alt in enumerate(node.alts):
30 | alts_to_consider = node.alts[index + 1 :]
31 | for other_alt in alts_to_consider:
32 | self.check_intersection(alt, other_alt)
33 |
34 | def check_intersection(self, first_alt: Alt, second_alt: Alt) -> None:
35 | if str(second_alt).startswith(str(first_alt)):
36 | raise ValidationError(
37 | f"In {self.rulename} there is an alternative that will "
38 | f"never be visited:\n{second_alt}"
39 | )
40 |
41 |
42 | def validate_grammar(the_grammar: grammar.Grammar) -> None:
43 | for validator_cls in GrammarValidator.__subclasses__():
44 | validator = validator_cls(the_grammar)
45 | for rule_name, rule in the_grammar.rules.items():
46 | validator.validate_rule(rule_name, rule)
47 |
--------------------------------------------------------------------------------
/mingshe/_vendor/pegen/web.py:
--------------------------------------------------------------------------------
1 | import html
2 | import io
3 | import textwrap
4 | import tokenize
5 | import traceback
6 | from typing import IO, Any, Dict, Final, Type, cast
7 |
8 | from flask import Flask, cli, redirect, render_template, url_for # type: ignore
9 | from flask_wtf import FlaskForm # type: ignore
10 | from wtforms import SubmitField, TextAreaField # type: ignore
11 | from wtforms.validators import DataRequired # type: ignore
12 |
13 | from pegen.grammar import Grammar
14 | from pegen.grammar_parser import GeneratedParser as GrammarParser
15 | from pegen.parser import Parser
16 | from pegen.python_generator import PythonParserGenerator
17 | from pegen.tokenizer import Tokenizer
18 |
19 | DEFAULT_GRAMMAR = """\
20 | start: expr NEWLINE? ENDMARKER { expr }
21 | expr:
22 | | expr '+' term { expr + term }
23 | | expr '-' term { expr - term}
24 | | term
25 | term:
26 | | term '*' factor { term * factor }
27 | | term '/' factor { term / factor }
28 | | factor
29 |
30 | factor:
31 | | '(' expr ')' { expr }
32 | | atom { int(atom.string) }
33 | atom: NUMBER
34 | """
35 |
36 | DEFAULT_SOURCE = "(1 + 2) * (3 - 6)"
37 |
38 |
39 | def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
40 | # Run a parser on a file (stream).
41 | tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore # typeshed issue #3515
42 | parser = parser_class(tokenizer, verbose=verbose)
43 | result = parser.start()
44 | if result is None:
45 | raise parser.make_syntax_error("invalid syntax")
46 | return result
47 |
48 |
49 | def parse_string(
50 | source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
51 | ) -> Any:
52 | # Run the parser on a string.
53 | if dedent:
54 | source = textwrap.dedent(source)
55 | file = io.StringIO(source)
56 | return run_parser(file, parser_class, verbose=verbose) # type: ignore # typeshed issue #3515
57 |
58 |
59 | def generate_parser(grammar: Grammar) -> Type[Parser]:
60 | # Generate a parser.
61 | out = io.StringIO()
62 | genr = PythonParserGenerator(grammar, out)
63 | genr.generate("")
64 |
65 | # Load the generated parser class.
66 | ns: Dict[str, Any] = {}
67 | exec(out.getvalue(), ns)
68 | return ns["GeneratedParser"]
69 |
70 |
71 | def make_parser(source: str) -> Type[Parser]:
72 | # Combine parse_string() and generate_parser().
73 | grammar = parse_string(source, GrammarParser)
74 | return generate_parser(grammar)
75 |
76 |
77 | app = Flask(__name__)
78 |
79 | # Flask-WTF requires an encryption key - the string can be anything
80 | app.config["SECRET_KEY"] = "does_not_matter"
81 |
82 |
83 | class GrammarForm(FlaskForm): # type: ignore
84 | grammar = TextAreaField("PEG GRAMMAR", validators=[DataRequired()], default=DEFAULT_GRAMMAR)
85 | source = TextAreaField("PROGRAM", validators=[DataRequired()], default=DEFAULT_SOURCE)
86 | submit = SubmitField("Parse!")
87 |
88 |
89 | @app.route("/", methods=["GET", "POST"])
90 | def index() -> None:
91 | # you must tell the variable 'form' what you named the class, above
92 | # 'form' is the variable name used in this template: index.html
93 | form = GrammarForm()
94 | form.grammar(class_="form-control")
95 | output_text = "\n"
96 | if form.validate_on_submit():
97 | grammar_source = form.grammar.data
98 | program_source = form.source.data
99 | output = io.StringIO()
100 | try:
101 | parser_class = make_parser(grammar_source)
102 | result = parse_string(program_source, parser_class, verbose=False)
103 | print(result, file=output)
104 | except Exception as e:
105 | traceback.print_exc(file=output)
106 | output_text += output.getvalue()
107 | return render_template("index.html", form=form, output=output_text)
108 |
109 |
110 | if __name__ == "__main__":
111 | cli.show_server_banner = lambda *_: None
112 | app.run(debug=False)
113 |
--------------------------------------------------------------------------------
/mingshe/commands.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import argparse
3 | import builtins
4 | import logging
5 | import re
6 | import sys
7 | from pathlib import Path
8 |
9 | from .__version__ import __version__
10 | from .core import compile
11 | from .importlib import install_meta
12 |
13 |
14 | def main():
15 | argparser = argparse.ArgumentParser(description=f"MíngShé {__version__}")
16 | argparser.add_argument("file", help="The .she file", nargs="?")
17 | argparser.add_argument(
18 | "--python",
19 | help="Python version. e.g. 3.7",
20 | default=".".join(map(str, sys.version_info[:2])),
21 | )
22 | argparser.add_argument(
23 | "--compile", dest="compile", action="store_true", help="Only compile"
24 | )
25 | argparser.add_argument(
26 | "-c", dest="cmd", action="store_true", help="Run a short command"
27 | )
28 | argparser.add_argument(
29 | "-v",
30 | "--verbose",
31 | action="count",
32 | default=0,
33 | help="Repeat for more debug output. e.g. -vv -vvv -vvvv",
34 | )
35 |
36 | args = argparser.parse_args()
37 | verbose = args.verbose
38 | verbose_tokenizer = verbose >= 3
39 | verbose_parser = verbose == 2 or verbose >= 4
40 |
41 | if verbose:
42 | logging.getLogger("mingshe").setLevel(logging.DEBUG)
43 | logging.basicConfig()
44 |
45 | python = tuple(map(int, re.fullmatch(r"(\d+)\.(\d+)", args.python).groups()))
46 |
47 | global_vars = {"__name__": "__main__"}
48 |
49 | write_to_py = lambda x: sys.stdout.write(x)
50 |
51 | if not args.cmd:
52 | if args.file is None:
53 | mingshe_code = sys.stdin.readable() and sys.stdin.read()
54 | filename = ""
55 | else:
56 | _filepath = Path(args.file)
57 | mingshe_code = _filepath.read_text(encoding="utf8")
58 | filename = _filepath.absolute().__str__()
59 | write_to_py = _filepath.with_suffix(".py").absolute().write_text
60 | global_vars["__file__"] = filename
61 | else:
62 | mingshe_code = args.file
63 | filename = ""
64 |
65 | ast_obj = compile(
66 | mingshe_code,
67 | filename=filename,
68 | verbose_tokenizer=verbose_tokenizer,
69 | verbose_parser=verbose_parser,
70 | py_version=python,
71 | )
72 |
73 | if args.compile:
74 | py_text = ast.unparse(ast_obj)
75 | write_to_py(py_text, encoding="utf8")
76 | else:
77 | sys.path.insert(0, str(Path(".").absolute()))
78 | install_meta(".she") # 无论 .pth 是否加载均可解析 .she 文件
79 | builtins.exec(builtins.compile(ast_obj, filename, "exec"), global_vars)
80 |
--------------------------------------------------------------------------------
/mingshe/core.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import logging
3 | import time
4 | import token
5 | from io import StringIO
6 | from tokenize import TokenInfo, generate_tokens
7 | from typing import Iterable, List, Literal, Optional, Tuple, overload
8 |
9 | from pegen.tokenizer import Tokenizer
10 |
11 | from .parser import PythonParser
12 |
13 | log = logging.getLogger(__name__)
14 |
15 |
16 | def merge_operators(tokens: Iterable[TokenInfo]) -> List[TokenInfo]:
17 | result = []
18 | for toknum, tokval, (srow, scol), (erow, ecol), linenum in tokens:
19 | if tokval == ">" and result[-1].string == "|": # |>
20 | token_info = TokenInfo(token.OP, "|>", result[-1][2], (erow, ecol), linenum)
21 | del result[-1]
22 | result.append(token_info)
23 | continue
24 | elif tokval == "?":
25 | if result[-1].string == "?": # ??
26 | token_info = TokenInfo(token.OP, "??", result[-1][2], (erow, ecol), linenum)
27 | del result[-1]
28 | result.append(token_info)
29 | continue
30 | else:
31 | token_info = TokenInfo(token.OP, "?", (srow, scol), (erow, ecol), linenum)
32 | result.append(token_info)
33 | continue
34 | elif tokval == ">" and result[-1].string == "=": # =>
35 | token_info = TokenInfo(token.OP, "=>", result[-1][2], (erow, ecol), linenum)
36 | del result[-1]
37 | result.append(token_info)
38 | continue
39 |
40 | result.append(TokenInfo(toknum, tokval, (srow, scol), (erow, ecol), linenum))
41 | return result
42 |
43 |
44 | @overload
45 | def compile(
46 | source: str,
47 | filename: str = "",
48 | symbol: Literal["file"] = "file",
49 | verbose_tokenizer: bool = False,
50 | verbose_parser: bool = False,
51 | py_version: Optional[Tuple[int, int]] = None,
52 | ) -> ast.Module:
53 | ...
54 |
55 |
56 | @overload
57 | def compile(
58 | source: str,
59 | filename: str = "",
60 | symbol: Literal["eval"] = "eval",
61 | verbose_tokenizer: bool = False,
62 | verbose_parser: bool = False,
63 | py_version: Optional[Tuple[int, int]] = None,
64 | ) -> ast.Expression:
65 | ...
66 |
67 |
68 | @overload
69 | def compile(
70 | source: str,
71 | filename: str = "",
72 | symbol: Literal["interactive"] = "interactive",
73 | verbose_tokenizer: bool = False,
74 | verbose_parser: bool = False,
75 | py_version: Optional[Tuple[int, int]] = None,
76 | ) -> ast.Interactive:
77 | ...
78 |
79 |
80 | @overload
81 | def compile(
82 | source: str,
83 | filename: str = "",
84 | symbol: Literal["func_type"] = "func_type",
85 | verbose_tokenizer: bool = False,
86 | verbose_parser: bool = False,
87 | py_version: Optional[Tuple[int, int]] = None,
88 | ) -> ast.FunctionType:
89 | ...
90 |
91 |
92 | @overload
93 | def compile(
94 | source: str,
95 | filename: str = "",
96 | symbol: Literal["fstring"] = "fstring",
97 | verbose_tokenizer: bool = False,
98 | verbose_parser: bool = False,
99 | py_version: Optional[Tuple[int, int]] = None,
100 | ) -> ast.Expr:
101 | ...
102 |
103 |
104 | def compile(
105 | source,
106 | filename="",
107 | symbol="file",
108 | verbose_tokenizer=False,
109 | verbose_parser=False,
110 | py_version=None,
111 | ):
112 | start_time = time.time_ns()
113 | token_list = merge_operators(generate_tokens(StringIO(source).readline))
114 | tokenizer = Tokenizer(iter(token_list), verbose=verbose_tokenizer)
115 | parser = PythonParser(
116 | tokenizer, filename=filename, verbose=verbose_parser, py_version=py_version
117 | )
118 | try:
119 | return parser.parse(symbol)
120 | except SyntaxError as syntax_error:
121 | if parser._exception is None and str(syntax_error) == "invalid syntax":
122 | raise parser.make_syntax_error("unknown syntax error") from None
123 | else:
124 | raise
125 | finally:
126 | end_time = time.time_ns()
127 | log.debug(f"Compile {filename} took {(end_time - start_time) / 1e6:.2f} ms")
128 |
--------------------------------------------------------------------------------
/mingshe/importlib.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import abc
4 | import builtins
5 | import importlib
6 | import importlib.abc
7 | import logging
8 | import os
9 | import sys
10 | import types
11 | from importlib.machinery import ModuleSpec
12 | from pathlib import Path
13 |
14 | from .core import compile
15 |
16 | log = logging.getLogger(__name__)
17 |
18 |
19 | class SingletonMetaFinder(abc.ABCMeta): # inherit from abc.ABCMeta to deal metaclass conflict
20 | def __init__(cls, name, bases, namespace):
21 | cls.instances = {}
22 |
23 | def __call__(cls, suffix: str) -> ExtensionMetaFinder:
24 | if suffix not in cls.instances:
25 | cls.instances[suffix] = super().__call__(suffix)
26 | return cls.instances[suffix]
27 |
28 |
29 | class ExtensionMetaFinder(importlib.abc.MetaPathFinder, metaclass=SingletonMetaFinder):
30 | def __init__(self, suffix: str):
31 | self.suffix = suffix
32 |
33 | def find_spec(self, module_name: str, path: list[str], target: str = None):
34 | if path is None:
35 | paths = [
36 | path for path in map(Path, sys.path) if path.exists() and path.is_dir()
37 | ]
38 | else:
39 | paths = list(map(Path, path))
40 | for try_path in paths:
41 | log.debug(f"Finding module {module_name} in {try_path}")
42 | last_module_name = module_name.rpartition(".")[2]
43 |
44 | fullpath = try_path / (last_module_name + self.suffix)
45 | if fullpath.exists():
46 | log.debug(f"Found module '{module_name}' in {fullpath}")
47 | loader = ExtensionModuleLoader(fullpath)
48 | return ModuleSpec(module_name, loader, origin=fullpath)
49 |
50 | fullpath = try_path / last_module_name
51 | if fullpath.is_dir() and (fullpath / f"__init__{self.suffix}").exists():
52 | log.debug(f"Found package '{module_name}' in {fullpath}")
53 | loader = ExtensionPackageLoader(fullpath)
54 | return ModuleSpec(module_name, loader, origin=fullpath, is_package=True)
55 |
56 |
57 | class ExtensionModuleLoader(importlib.abc.SourceLoader):
58 | def __init__(self, filepath: Path) -> None:
59 | self.filepath = filepath
60 |
61 | def create_module(self, spec: ModuleSpec) -> types.ModuleType | None:
62 | module = super().create_module(spec)
63 | log.debug(f"Created module '{spec.name}'")
64 | return module
65 |
66 | def exec_module(self, module: types.ModuleType) -> None:
67 | setattr(module, "__file__", self.get_filename(module.__name__))
68 | builtins.exec(self.get_code(module.__spec__), module.__dict__)
69 | log.debug(f"Executed module '{module.__name__}'")
70 |
71 | def get_data(self, path: str) -> bytes:
72 | return Path(path).read_bytes()
73 |
74 | def get_filename(self, module_name: str) -> str:
75 | return str(self.filepath)
76 |
77 | def source_to_code(self, data: bytes, path: str = "") -> types.CodeType:
78 | return builtins.compile(compile(data.decode("utf8"), path), path, "exec")
79 |
80 |
81 | class ExtensionPackageLoader(ExtensionModuleLoader):
82 | def exec_module(self, module: types.ModuleType) -> None:
83 | setattr(
84 | module, "__path__", [os.path.dirname(self.get_filename(module.__name__))]
85 | )
86 | super().exec_module(module)
87 |
88 | def get_filename(self, module_name: str) -> str:
89 | return str(self.filepath / "__init__.she")
90 |
91 |
92 | # Utility functions for installing/uninstalling the loader
93 |
94 |
95 | def install_meta(suffix: str) -> None:
96 | finder = ExtensionMetaFinder(suffix)
97 | if sys.meta_path.count(finder) == 0:
98 | sys.meta_path.insert(0, finder)
99 |
100 |
101 | def uninstall_meta(suffix: str) -> None:
102 | finder = ExtensionMetaFinder(suffix)
103 | sys.meta_path.remove(finder)
104 |
--------------------------------------------------------------------------------
/mingshe/utils.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import os
3 | from typing import Tuple
4 |
5 |
6 | @contextlib.contextmanager
7 | def work_with_files(*files: Tuple[str, str]) -> None:
8 | try:
9 | for file, text in files:
10 | with open(file, "w+", encoding="utf8") as f:
11 | f.write(text)
12 | yield None
13 | finally:
14 | for file, _ in files:
15 | try:
16 | os.remove(file)
17 | except Exception:
18 | pass
19 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name: MíngShé
2 | site_description: A better Python superset language.
3 |
4 | repo_name: MíngShé
5 | repo_url: https://github.com/abersheeran/mingshe
6 | edit_uri: "https://github.com/abersheeran/mingshe/tree/master/docs/"
7 |
8 | use_directory_urls: true
9 |
10 | theme:
11 | name: "material"
12 | language: "zh"
13 | palette:
14 | - scheme: default
15 | toggle:
16 | icon: material/weather-sunny
17 | name: Switch to dark mode
18 | - scheme: slate
19 | toggle:
20 | icon: material/weather-night
21 | name: Switch to light mode
22 | font:
23 | code: Fira Code
24 | icon:
25 | admonition:
26 | note: octicons/tag-16
27 | abstract: octicons/checklist-16
28 | info: octicons/info-16
29 | tip: octicons/squirrel-16
30 | success: octicons/check-16
31 | question: octicons/question-16
32 | warning: octicons/alert-16
33 | failure: octicons/x-circle-16
34 | danger: octicons/zap-16
35 | bug: octicons/bug-16
36 | example: octicons/beaker-16
37 | quote: octicons/quote-16
38 | features:
39 | - navigation.sections
40 |
41 | nav:
42 | - 介绍: index.md
43 | - 用法: usage.md
44 | - 语法:
45 | - 管道: syntax/pipeline.md
46 | - 条件运算: syntax/conditional.md
47 | - 偏函数: syntax/partial.md
48 | - 空值合并: syntax/nullish-coalescing.md
49 | - 可选链: syntax/optional-chaining.md
50 | - 字典解构赋值: syntax/unpack-mapping.md
51 | - 内部设计与自定义:
52 | - 编写新语言: custom.md
53 |
54 | markdown_extensions:
55 | - admonition
56 | - extra
57 | - pymdownx.highlight:
58 | linenums: true
59 | - pymdownx.superfences
60 | - pymdownx.details
61 |
62 | plugins:
63 | - search
64 | - i18n:
65 | docs_structure: suffix
66 | languages:
67 | - locale: zh
68 | default: true
69 | name: 简体中文
70 | build: true
71 | - locale: en
72 | name: English
73 | build: true
74 | nav_translations:
75 | 介绍: Introduction
76 | 用法: Usage
77 | 语法: Syntax
78 | 管道: Pipeline
79 | 条件运算: Conditional
80 | 偏函数: Partial
81 | 空值合并: Nullish coalescing
82 | 可选链: Optional chaining
83 | 字典解构赋值: Unpack mapping
84 | 内部设计与自定义: Internal Design and Customization
85 | 编写新语言: Create own language
86 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | authors = [
3 | {name = "abersheeran", email = "me@abersheeran.com"},
4 | ]
5 | classifiers = ["Programming Language :: Python :: 3"]
6 | dependencies = []
7 | description = "A better Python superset language."
8 | license = {text = "Apache-2.0"}
9 | name = "mingshe"
10 | readme = "README.md"
11 | requires-python = ">=3.9"
12 | version = "0.8.2"
13 |
14 | [project.urls]
15 | homepage = "https://github.com/abersheeran/mingshe"
16 | repository = "https://github.com/abersheeran/mingshe"
17 |
18 | [project.scripts]
19 | mingshe = "mingshe.commands:main"
20 |
21 | [project.optional-dependencies]
22 |
23 | [tool.pdm]
24 | includes = ["mingshe", "aaa_mingshe.pth"]
25 |
26 | [tool.pdm.dev-dependencies]
27 | dev = [
28 | "flake8",
29 | ]
30 | doc = [
31 | "mkdocs[i18n]~=1.2",
32 | "mkdocs-material~=7.2",
33 | "mkdocs-static-i18n~=0.19",
34 | ]
35 | test = [
36 | "pytest~=6.2",
37 | ]
38 |
39 | [build-system]
40 | build-backend = "pdm.pep517.api"
41 | requires = ["pdm-pep517"]
42 |
--------------------------------------------------------------------------------
/script/check.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 |
3 | source_dirs = "mingshe tests"
4 | subprocess.check_call(f"pdm run flake8 {source_dirs}", shell=True)
5 |
--------------------------------------------------------------------------------
/script/generate.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import pathlib
3 |
4 | subprocess.check_call(
5 | "python -m pegen --output=../parser.py ../../mingshe.gram".split(" "),
6 | cwd=pathlib.Path(__file__).absolute().parent.parent / "mingshe" / "_vendor",
7 | )
8 |
--------------------------------------------------------------------------------
/script/lint.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 |
3 | source_dirs = "mingshe tests"
4 |
--------------------------------------------------------------------------------
/script/upload.she:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env mingshe
2 | import os
3 | import subprocess
4 | from pathlib import Path
5 |
6 | here = Path(__file__).absolute().parent.parent
7 |
8 | package_name = "mingshe"
9 |
10 |
11 | def get_version(package: str = package_name) -> str:
12 | """
13 | Return version.
14 | """
15 | _globals: dict = {}
16 | (here / package / "__version__.py").read_text(encoding="utf8") |> exec(?, _globals)
17 | return _globals["__version__"]
18 |
19 |
20 | os.chdir(here)
21 | check_call = subprocess.check_call(?, shell=True)
22 | check_call(f"pdm version {get_version()}")
23 | check_call(f"git add {package_name}/__version__.py pyproject.toml")
24 | check_call(f'git commit -m "v{get_version()}"')
25 | check_call("git push")
26 | check_call("git tag v{0}".format(get_version()))
27 | check_call("git push --tags")
28 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abersheeran/mingshe/db1ac01705dc200cc5de3ed55ea5f9ff5ef8f57b/tests/__init__.py
--------------------------------------------------------------------------------
/tests/for_test_importlib/a.she:
--------------------------------------------------------------------------------
1 | name = "a"
2 |
--------------------------------------------------------------------------------
/tests/for_test_importlib/p/__init__.she:
--------------------------------------------------------------------------------
1 | name = "p"
2 |
--------------------------------------------------------------------------------
/tests/for_test_importlib/t/b.she:
--------------------------------------------------------------------------------
1 | name = "t.b"
2 |
--------------------------------------------------------------------------------
/tests/python_parser/__init__.py:
--------------------------------------------------------------------------------
1 | import mingshe
2 |
--------------------------------------------------------------------------------
/tests/python_parser/conftest.py:
--------------------------------------------------------------------------------
1 | """"Conftest for pure python parser."""
2 | from pathlib import Path
3 |
4 | import pytest
5 |
6 | from pegen.build import build_parser
7 | from .utils import generate_parser
8 |
9 |
10 | @pytest.fixture(scope="session")
11 | def python_parser_cls():
12 | grammar_path = Path(__file__).parent.parent.parent / "mingshe.gram"
13 | grammar = build_parser(grammar_path)[0]
14 | source_path = str(Path(__file__).parent / "parser_cache" / "py_parser.py")
15 | parser_cls = generate_parser(grammar, source_path, "PythonParser")
16 |
17 | return parser_cls
18 |
19 |
20 | @pytest.fixture(scope="session")
21 | def python_parse_file():
22 | grammar_path = Path(__file__).parent.parent.parent / "mingshe.gram"
23 | grammar = build_parser(grammar_path)[0]
24 | source_path = str(Path(__file__).parent / "parser_cache" / "py_parser.py")
25 | parser_cls = generate_parser(grammar, source_path, "parse_file")
26 |
27 | return parser_cls
28 |
29 |
30 | @pytest.fixture(scope="session")
31 | def python_parse_str():
32 | grammar_path = Path(__file__).parent.parent.parent / "mingshe.gram"
33 | grammar = build_parser(grammar_path)[0]
34 | source_path = str(Path(__file__).parent / "parser_cache" / "py_parser.py")
35 | parser_cls = generate_parser(grammar, source_path, "parse_string")
36 |
37 | return parser_cls
38 |
--------------------------------------------------------------------------------
/tests/python_parser/data/advanced_decorators.py:
--------------------------------------------------------------------------------
1 | @d[a]
2 | def f():
3 | pass
4 |
5 |
6 | @d
7 | @d()
8 | @d(a)
9 | @d[a]
10 | def f():
11 | pass
12 |
--------------------------------------------------------------------------------
/tests/python_parser/data/assignment.py:
--------------------------------------------------------------------------------
1 | a = b
2 | a += b
3 | a -= b
4 | a *= b
5 | a /= b
6 | a //= b
7 | a %= b
8 | a |= b
9 | a ^= b
10 | a **= b
11 | a &= b
12 | a @= b
13 | a <<= b
14 | a >>= b
15 | a += yield
16 |
17 | (a) += 1
18 | a[1] += 1
19 | a.b += 1
20 | a.b.c += 1
21 | f(i for i in range(2)).a += 1
22 | f().a += 1
23 |
24 | (a) = 1
25 | a.b = 1
26 | a.b.c = 1
27 | a.b.c.d = 1
28 | a[b] = c
29 | a[b][c] = 1
30 | a.b[c] = 1
31 | a[1:] = b
32 | a[:1] = b
33 | a[1:10:2] = b
34 |
35 |
36 | a: int = b
37 | a: int = yield
38 | a.b: int
39 | a.b: int = 1
40 | a[b]: int = 1
41 | a[b]: int = 1
42 | a = 1
43 | a = 1.0
44 | a = ""
45 | a = r"\c"
46 | a = b"a"
47 | a = f"{a}"
48 | a = f"{d}" "rr"
49 | a = ()
50 | a = (1,)
51 | a = (1, 2)
52 | b = []
53 | b = [
54 | 1,
55 | ]
56 | b = [1, 2]
57 | c = {
58 | 1,
59 | }
60 | c = {1, 2}
61 | d = {}
62 | d = {1: 2}
63 | d = {
64 | 1: 2,
65 | }
66 | d = {1: 2, 3: 4}
67 | a = True
68 | b = False
69 | c = None
70 | d = *a, (*b, c)
71 | d = *a, (*b, *c)
72 |
73 | f = (a := 1)
74 |
75 | a, b = c
76 | a, *b = c
77 | a, *b, d = c
78 | a, *b, d = yield d
79 |
--------------------------------------------------------------------------------
/tests/python_parser/data/async.py:
--------------------------------------------------------------------------------
1 | async def f():
2 | pass
3 |
4 |
5 | async def f():
6 | await b
7 |
8 |
9 | async def f():
10 | async for i in range(10):
11 | pass
12 |
13 |
14 | async def f():
15 | async with open(f) as p:
16 | pass
17 |
18 |
19 | async def f():
20 | a = [i async for i in range(10)]
21 | return a
22 |
--------------------------------------------------------------------------------
/tests/python_parser/data/call.py:
--------------------------------------------------------------------------------
1 | a = ()
2 | b = {}
3 | f()
4 | f(b)
5 | f(b=c)
6 | f(*a)
7 | f(c, *a)
8 | f(c=1, *b)
9 | f(*a, c=1)
10 | f(**b)
11 | f(c, *a, **b)
12 | f(c, *a, x, **b)
13 | f(c, a=1, **b)
14 | f(a := 1)
15 | f(**b, a=1)
16 | f(i for i in range(10))
17 |
--------------------------------------------------------------------------------
/tests/python_parser/data/classes.py:
--------------------------------------------------------------------------------
1 | class A:
2 | pass
3 |
4 |
5 | class A(B):
6 | pass
7 |
8 |
9 | class A(
10 | B,
11 | C,
12 | ):
13 | pass
14 |
15 |
16 | class A(metaclass=M):
17 | pass
18 |
19 |
20 | class A(B, metaclass=M):
21 | pass
22 |
23 |
24 | class A(*t):
25 | pass
26 |
27 |
28 | class A(B, *t):
29 | pass
30 |
31 |
32 | class A(**kw):
33 | pass
34 |
35 |
36 | class A(B, **kw):
37 | pass
38 |
--------------------------------------------------------------------------------
/tests/python_parser/data/comprehensions.py:
--------------------------------------------------------------------------------
1 | a = (k for k in g)
2 | b = (k for k in g if k == 1)
3 | (k for k in g).send(None)
4 |
5 |
6 | a = [k for k in g]
7 | b = [k for k in g if k == 1]
8 |
9 |
10 | a = {k for k in g}
11 | b = {k for k in g if k == 1}
12 | a = {k: 1 for k in g}
13 | b = {k: 2 for k in g if k == 1}
14 |
15 |
16 | [k for v in a for k in v]
17 |
--------------------------------------------------------------------------------
/tests/python_parser/data/expressions.py:
--------------------------------------------------------------------------------
1 | a + b
2 | a - b
3 | a * b
4 | a / b
5 | a // b
6 | a % b
7 | a @ b
8 | a << b
9 | a >> b
10 | a | b
11 | a ^ b
12 | a ** b
13 | a == b
14 | a < b
15 | a <= b
16 | a > b
17 | a >= b
18 | a != b
19 | a & b
20 | ~a
21 | (1, 2, 3)
22 | ["a", "b"]
23 | {1, 2}
24 | {a: a.b}
25 | {**d, a: b}
26 |
27 | not b
28 | a if b else c
29 | a or b
30 | a and b
31 | a in b
32 | a not in b
33 | a is b
34 | a is not b
35 |
36 | a * (+1)
37 | a * (-1)
38 | a * (~1)
39 |
40 | (a)
41 | (yield a)
--------------------------------------------------------------------------------
/tests/python_parser/data/function_def.py:
--------------------------------------------------------------------------------
1 | def f():
2 | pass
3 |
4 |
5 | def f() -> None:
6 | pass
7 |
8 |
9 | def f(a):
10 | pass
11 |
12 |
13 | def f(a: int) -> Tuple[int, ...]:
14 | pass
15 |
16 |
17 | def f(a: int = 1) -> Tuple[int, ...]:
18 | pass
19 |
20 |
21 | def f(a, b: int):
22 | pass
23 |
24 |
25 | def f(a: bool, b: int = 1):
26 | pass
27 |
28 |
29 | def f(a, /):
30 | pass
31 |
32 |
33 | def f(a=1, /):
34 | pass
35 |
36 |
37 | def f(a, b=1, /):
38 | pass
39 |
40 |
41 | def f(a, /, b):
42 | pass
43 |
44 |
45 | def f(a, c=2, /, b=5):
46 | pass
47 |
48 |
49 | def f(a, /, b=1):
50 | pass
51 |
52 |
53 | def f(a, *, b):
54 | pass
55 |
56 |
57 | def f(a, *, b, c=1):
58 | pass
59 |
60 |
61 | def f(a, *, b=1):
62 | pass
63 |
64 |
65 | def f(*, b):
66 | pass
67 |
68 |
69 | def f(*, b, c=1):
70 | pass
71 |
72 |
73 | def f(*, b=1):
74 | pass
75 |
76 |
77 | def f(b=1, *c):
78 | pass
79 |
80 |
81 | def f(*args):
82 | pass
83 |
84 |
85 | def f(**kwargs):
86 | pass
87 |
88 |
89 | def f(a, **kwargs):
90 | pass
91 |
92 |
93 | def f(a=1, **kwargs):
94 | pass
95 |
96 |
97 | def f(*, a=1, **kwargs):
98 | pass
99 |
100 |
101 | def f(*a, **b):
102 | pass
103 |
104 |
105 | def f(a, /, b, *, v=1, **d):
106 | pass
107 |
108 |
109 | async def f():
110 | pass
111 |
112 |
113 | async def f() -> None:
114 | pass
115 |
116 |
117 | async def f(a):
118 | pass
119 |
120 |
121 | async def f(a: int) -> Tuple[int, ...]:
122 | pass
123 |
124 |
125 | async def f(a: int = 1) -> Tuple[int, ...]:
126 | pass
127 |
128 |
129 | async def f(a, b: int):
130 | pass
131 |
132 |
133 | async def f(a: bool, b: int = 1):
134 | pass
135 |
136 |
137 | async def f(a, /):
138 | pass
139 |
140 |
141 | async def f(a=1, /):
142 | pass
143 |
144 |
145 | async def f(a, b=1, /):
146 | pass
147 |
148 |
149 | async def f(a, /, b):
150 | pass
151 |
152 |
153 | async def f(a, c=2, /, b=5):
154 | pass
155 |
156 |
157 | async def f(a, /, b=1):
158 | pass
159 |
160 |
161 | async def f(a, *, b):
162 | pass
163 |
164 |
165 | async def f(a, *, b=1):
166 | pass
167 |
168 |
169 | async def f(*, b):
170 | pass
171 |
172 |
173 | async def f(*, b=1):
174 | pass
175 |
176 |
177 | async def f(b=1, *c):
178 | pass
179 |
180 |
181 | async def f(*args):
182 | pass
183 |
184 |
185 | async def f(**kwargs):
186 | pass
187 |
188 |
189 | async def f(a, **kwargs):
190 | pass
191 |
192 |
193 | async def f(a=1, **kwargs):
194 | pass
195 |
196 |
197 | async def f(*, a=1, **kwargs):
198 | pass
199 |
200 |
201 | async def f(*a, **b):
202 | pass
203 |
204 |
205 | async def f(a, /, b, *, v=1, **d):
206 | pass
207 |
--------------------------------------------------------------------------------
/tests/python_parser/data/imports.py:
--------------------------------------------------------------------------------
1 | import test
2 | import a, b
3 | import test as t
4 | import test as t, y
5 | import test.a
6 | import test.b as b
7 |
8 |
9 | from test import a
10 | from test import a, b
11 | from test import (
12 | a,
13 | b,
14 | )
15 | from test import a as b
16 | from test import a as b, c
17 | from test import a as b, c as d
18 | from test import *
19 | from test.a import b
20 | from test.a import b as c
21 | from test.a import b, c
22 | from test.a import b as c, d
23 |
24 |
25 | from . import a
26 | from ... import b
27 | from .... import c
28 | from ..a import b
29 | from ...a import c
30 | from ....a import c
31 | from . import a, b
32 | from ..a import b, c
33 | from ...a import c, d
34 | from ....a import c, d
35 |
--------------------------------------------------------------------------------
/tests/python_parser/data/lambdas.py:
--------------------------------------------------------------------------------
1 | lambda: 1
2 |
3 | lambda x: x
4 |
5 | lambda x,: x
6 |
7 | lambda x=1: x
8 |
9 | lambda x, y: x + y
10 |
11 | lambda x, /: x
12 |
13 | lambda x, y=1, /: x + y
14 |
15 | lambda x, /, y: x + y
16 |
17 | lambda x, /, y=1, z=2: x + y + z
18 |
19 | lambda x, y=1, /, z=5: x + y + z
20 |
21 | lambda x=1, /, *y: x + y
22 |
23 | lambda x, *, y: x + y
24 |
25 | lambda x, *, y, z: x + y + z
26 |
27 | lambda *, x: x
28 |
29 | lambda *x: x
30 |
31 | lambda **x: x
32 |
33 | lambda x, **y: y
34 |
--------------------------------------------------------------------------------
/tests/python_parser/data/multi_statement_per_line.py:
--------------------------------------------------------------------------------
1 | if a: b=1;
2 | a = 1; b=2
3 |
--------------------------------------------------------------------------------
/tests/python_parser/data/no_newline_at_end_of_file.py:
--------------------------------------------------------------------------------
1 | if a:
2 | b = 1
--------------------------------------------------------------------------------
/tests/python_parser/data/no_newline_at_end_of_file_with_comment.py:
--------------------------------------------------------------------------------
1 | if a:
2 | b = 1
3 |
4 | # test
--------------------------------------------------------------------------------
/tests/python_parser/data/simple_decorators.py:
--------------------------------------------------------------------------------
1 | @d
2 | def f():
3 | pass
4 |
5 |
6 | @d.a
7 | def f():
8 | pass
9 |
10 |
11 | @d()
12 | def f():
13 | pass
14 |
15 |
16 | @d.f()
17 | def f():
18 | pass
19 |
20 |
21 | @d(a)
22 | def f():
23 | pass
24 |
25 |
26 | @d
27 | class A:
28 | pass
29 |
--------------------------------------------------------------------------------
/tests/python_parser/data/statements.py:
--------------------------------------------------------------------------------
1 | pass
2 | pass;
3 |
4 | assert a
5 | assert a; assert b
6 | assert a, "eee"
7 |
8 | raise RuntimeError
9 | raise RuntimeError from e
10 |
11 | return
12 | return 1
13 | return 1,
14 | return *a
15 |
16 | del a
17 | del (a)
18 | del a, b,
19 | del a[:]
20 | del a.b
21 | del (a,)
22 | del (a, b)
23 | del [a, b]
24 | del a;
25 |
26 | global a
27 | global a, b
28 | nonlocal a
29 | nonlocal a, b
30 |
31 | yield a
32 | yield from a
33 |
34 |
35 | for i in a:
36 | pass
37 |
38 | for i, in a:
39 | pass
40 |
41 | for (i,) in a:
42 | pass
43 |
44 | for (i,), in a:
45 | pass
46 |
47 | for i, *j in a:
48 | pass
49 |
50 | for i, (a, *b) in a:
51 | pass
52 |
53 | async for i in a:
54 | pass
55 |
56 | async for i, in a:
57 | pass
58 |
59 | async for (i,) in a:
60 | pass
61 |
62 | async for (i,), in a:
63 | pass
64 |
65 | async for i, *j in a:
66 | pass
67 |
68 | async for i, (a, *b) in a:
69 | pass
70 |
71 | for i in b:
72 | pass
73 | else:
74 | pass
75 |
76 |
77 | if a:
78 | b=1
79 |
80 | if a:
81 | pass
82 | else:
83 | pass
84 |
85 | if a:
86 | pass
87 | elif b:
88 | pass
89 | else:
90 | pass
91 |
92 | if a:
93 | pass
94 | elif b:
95 | pass
96 | elif c:
97 | pass
98 |
99 |
100 | while s:
101 | pass
102 |
103 | while False:
104 | pass
105 | else:
106 | pass
107 |
108 |
109 | for i in a:
110 | continue
111 |
112 | for i in a:
113 | break
114 |
115 |
116 | with a:
117 | pass
118 |
119 | with a, b:
120 | pass
121 |
122 | with a as b:
123 | pass
124 |
125 | with a as b, c:
126 | pass
127 |
128 | async with a:
129 | pass
130 |
131 | async with a, b:
132 | pass
133 |
134 | async with a as b:
135 | pass
136 |
137 | async with a as b, c:
138 | pass
139 |
140 |
141 | try:
142 | pass
143 | finally:
144 | pass
145 |
146 |
147 | try:
148 | pass
149 | except:
150 | raise
151 | finally:
152 | pass
153 |
154 | try:
155 | pass
156 | except ValueError:
157 | pass
158 | except (IndexError, RuntimeError,):
159 | pass
160 | except Exception as e:
161 | pass
162 | else:
163 | pass
164 | finally:
165 | pass
166 |
--------------------------------------------------------------------------------
/tests/python_parser/data/type_comment.py:
--------------------------------------------------------------------------------
1 | a = 1 # type: int
2 |
3 | for i in range(10): # type: int
4 | pass
5 |
6 |
7 | with a: # type: int
8 | pass
9 |
10 |
11 | def f(a): # type: (int) -> None
12 | pass
13 |
14 |
15 | def f(a):
16 | # type: (int) -> None
17 | pass
18 |
--------------------------------------------------------------------------------
/tests/python_parser/data/with_statement_multi_items.py:
--------------------------------------------------------------------------------
1 | with (a, c,):
2 | pass
3 |
4 | with (a as b, c):
5 | pass
6 |
7 | async with (a, c,):
8 | pass
9 |
10 | async with (a as b, c):
11 | pass
12 |
--------------------------------------------------------------------------------
/tests/python_parser/parser_cache/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/tests/python_parser/parser_cache/README:
--------------------------------------------------------------------------------
1 | We store the Python parser generated for the tests here to be able to perform
2 | code coverage measurements.
3 |
--------------------------------------------------------------------------------
/tests/python_parser/test_ast_parsing.py:
--------------------------------------------------------------------------------
1 | """Test pure Python parser against cpython parser."""
2 | import ast
3 | import difflib
4 | import io
5 | import sys
6 | import textwrap
7 | import tokenize
8 | from pathlib import Path
9 |
10 | import pytest
11 |
12 | from pegen.tokenizer import Tokenizer
13 |
14 |
15 | @pytest.mark.parametrize(
16 | "filename",
17 | [
18 | pytest.param(
19 | "advanced_decorators.py",
20 | marks=pytest.mark.skipif(
21 | sys.version_info < (3, 9), reason="Valid only in Python 3.9+"
22 | ),
23 | ),
24 | "assignment.py",
25 | "async.py",
26 | "call.py",
27 | "comprehensions.py",
28 | "expressions.py",
29 | "function_def.py",
30 | "imports.py",
31 | "lambdas.py",
32 | pytest.param(
33 | "multi_statement_per_line.py",
34 | marks=pytest.mark.skipif(
35 | sys.version_info < (3, 9), reason="Col offset match only on Python 3.9+"
36 | ),
37 | ),
38 | "no_newline_at_end_of_file.py",
39 | "no_newline_at_end_of_file_with_comment.py",
40 | pytest.param(
41 | "pattern_matching.py",
42 | marks=pytest.mark.skipif(
43 | sys.version_info < (3, 10), reason="Valid only in Python 3.10+"
44 | ),
45 | ),
46 | "simple_decorators.py",
47 | "statements.py",
48 | pytest.param(
49 | "with_statement_multi_items.py",
50 | marks=pytest.mark.skipif(
51 | sys.version_info < (3, 9),
52 | reason="Parenthesized with items allowed only in Python 3.9+",
53 | ),
54 | ),
55 | ],
56 | )
57 | def test_parser(python_parse_file, python_parse_str, filename):
58 | path = Path(__file__).parent / "data" / filename
59 | with open(path) as f:
60 | source = f.read()
61 |
62 | for part in source.split("\n\n\n"):
63 | original = ast.parse(part)
64 |
65 | kwargs = dict(include_attributes=True)
66 | if sys.version_info >= (3, 9):
67 | kwargs["indent"] = " "
68 |
69 | try:
70 | pp_ast = python_parse_str(part, "exec")
71 | except Exception:
72 | temp = io.StringIO(part)
73 | print("Parsing failed:")
74 | print("Source is:")
75 | print(textwrap.indent(part, " "))
76 | temp = io.StringIO(part)
77 | print("Token stream is:")
78 | for t in tokenize.generate_tokens(temp.readline):
79 | print(t)
80 | print()
81 | print("CPython ast is:")
82 | print(ast.dump(original, **kwargs))
83 | raise
84 |
85 | o = ast.dump(original, **kwargs)
86 | p = ast.dump(pp_ast, **kwargs)
87 | diff = "\n".join(
88 | difflib.unified_diff(o.split("\n"), p.split("\n"), "cpython", "python-pegen")
89 | )
90 | if diff:
91 | print(part)
92 | print(diff)
93 | assert not diff
94 |
95 | o = ast.dump(ast.parse(source), **kwargs)
96 | p = ast.dump(python_parse_file(path), **kwargs)
97 | diff = "\n".join(difflib.unified_diff(o.split("\n"), p.split("\n"), "cpython", "python-pegen"))
98 | assert not diff
99 |
--------------------------------------------------------------------------------
/tests/python_parser/test_syntax_error_handling.py:
--------------------------------------------------------------------------------
1 | """Test syntax errors for cases where the parser can generate helpful messages."""
2 | import io
3 | import tokenize
4 | import sys
5 |
6 | import pytest
7 |
8 | from pegen.tokenizer import Tokenizer
9 |
10 |
11 | def parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exc_cls, message):
12 | with pytest.raises(exc_cls) as e:
13 | python_parse_str(source, "exec")
14 |
15 | print(str(e.exconly()))
16 | assert message in str(e.exconly())
17 |
18 | test_file = tmp_path / "test.py"
19 | with open(test_file, "w") as f:
20 | f.write(source)
21 |
22 | with pytest.raises(exc_cls) as e:
23 | python_parse_file(str(test_file))
24 |
25 | print(str(e.exconly()))
26 | assert message in str(e.exconly())
27 |
28 |
29 | @pytest.mark.parametrize(
30 | "source, message", [("f'a = { 1 + }'", "line 1"), ("(\n\t'b'\n\tf'a = { 1 + }'\n)", "line 3")]
31 | )
32 | def test_syntax_error_in_str(python_parse_file, python_parse_str, tmp_path, source, message):
33 | parse_invalid_syntax(
34 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
35 | )
36 |
37 |
38 | @pytest.mark.parametrize(
39 | "source, message",
40 | [
41 | ("a 1", "invalid syntax. Perhaps you forgot a comma?"),
42 | ("2 if 4", "expected 'else' after 'if' expression"),
43 | ("a 1 if b else 2", "invalid syntax. Perhaps you forgot a comma?"),
44 | ("a 1ambda: 1", "invalid syntax. Perhaps you forgot a comma?"),
45 | ("print 1", "Missing parentheses in call to 'print'"),
46 | ("exec 1", "Missing parentheses in call to 'exec'"),
47 | ("a if b", "expected 'else' after 'if' expression"),
48 | ("c = a if b:", "SyntaxError: invalid syntax"),
49 | ],
50 | )
51 | def test_invalid_expression(python_parse_file, python_parse_str, tmp_path, source, message):
52 | parse_invalid_syntax(
53 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
54 | )
55 |
56 |
57 | # Those tests are mostly there to get coverage on exiting rules without matching
58 | @pytest.mark.parametrize(
59 | "source, message",
60 | [
61 | ("global a, 1", "invalid syntax"),
62 | ("nonlocal a, 1", "invalid syntax"),
63 | ("yield raise", "invalid syntax"),
64 | ("assert raise", "invalid syntax"),
65 | ("return def", "invalid syntax"),
66 | ("raise def", "invalid syntax"),
67 | ("del raise", "invalid syntax"),
68 | ("if raise:\n\tpass", "invalid syntax"),
69 | ("@raise\ndef f():\n\tpass", "invalid syntax"),
70 | ("a: int = raise", "invalid syntax"),
71 | ],
72 | )
73 | def test_invalid_statements(python_parse_file, python_parse_str, tmp_path, source, message):
74 | parse_invalid_syntax(
75 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
76 | )
77 |
78 |
79 | @pytest.mark.parametrize(
80 | "source, message",
81 | [
82 | # Invalid arguments rules
83 | ("f(**a, *b)", "iterable argument unpacking follows keyword argument unpacking"),
84 | ("f(a for a in b, c)", "Generator expression must be parenthesized"),
85 | ("f(a for a in b, c for c in d)", "Generator expression must be parenthesized"),
86 | (
87 | "f(a=1 for i in range(10))",
88 | "invalid syntax. Maybe you meant '==' or ':=' instead of '='?",
89 | ),
90 | ("f(a, b for b in c)", "Generator expression must be parenthesized"),
91 | ("f(a, b for b in c, d)", "Generator expression must be parenthesized"),
92 | ("f(**a, b)", "positional argument follows keyword argument unpacking"),
93 | ("f(a=1, b)", "positional argument follows keyword argument"),
94 | # Invalid kwarg rules
95 | ("f(b=c for c in d)", "invalid syntax. Maybe you meant '==' or ':=' instead of '='?"),
96 | ("f(1 + b=2)", 'expression cannot contain assignment, perhaps you meant "=="?'),
97 | ],
98 | )
99 | def test_invalid_call_arguments(python_parse_file, python_parse_str, tmp_path, source, message):
100 | parse_invalid_syntax(
101 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
102 | )
103 |
104 |
105 | @pytest.mark.parametrize(
106 | "source, message",
107 | [
108 | ("'a' = 1", "cannot assign to literal"),
109 | ("1 = 1", "cannot assign to 1"),
110 | ("True = 1", "cannot assign to True"),
111 | ("False = 1", "cannot assign to False"),
112 | ("... = 1", "cannot assign to Ellipsis"),
113 | ("None = 1", "cannot assign to None"),
114 | ("(a, b) : int = (1, 2)", "only single target (not tuple) can be annotated"),
115 | ("[a, b] : int = [1, 2]", "only single target (not list) can be annotated"),
116 | ("([a, b]) : int = (1, 2)", "only single target (not list) can be annotated"),
117 | ("a, b: int, int = 1, 2", "only single target (not tuple) can be annotated"),
118 | ("{a, b} : set", "illegal target for annotation"),
119 | ("a + 1 = 2", "cannot assign to expression"),
120 | ("[i for i in range(2)] = 2", "cannot assign to list comprehension"),
121 | ("yield a = 1", "assignment to yield expression not possible"),
122 | ("a = yield b = 1", "assignment to yield expression not possible"),
123 | ("a + 1 += 1", "expression is an illegal expression for augmented assignment"),
124 | ("a + 1 += yield", "expression is an illegal expression for augmented assignment"),
125 | (
126 | "[i for i in range(2)] += 1",
127 | "list comprehension is an illegal expression for augmented assignment",
128 | ),
129 | ("a += raise", "invalid syntax"),
130 | ],
131 | )
132 | def test_invalid_assignments(python_parse_file, python_parse_str, tmp_path, source, message):
133 | parse_invalid_syntax(
134 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
135 | )
136 |
137 |
138 | @pytest.mark.parametrize(
139 | "source, message",
140 | [
141 | ("del [i for i in range(2)]", "cannot delete list comprehension"),
142 | ("del a + 1", "cannot delete expression"),
143 | ],
144 | )
145 | def test_invalid_del_statements(python_parse_file, python_parse_str, tmp_path, source, message):
146 | parse_invalid_syntax(
147 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
148 | )
149 |
150 |
151 | @pytest.mark.parametrize(
152 | "source, message",
153 | [
154 | (
155 | "(*a for a in enumerate(range(5)))",
156 | "iterable unpacking cannot be used in comprehension",
157 | ),
158 | (
159 | "[*a for a in enumerate(range(5))]",
160 | "iterable unpacking cannot be used in comprehension",
161 | ),
162 | (
163 | "{*a for a in enumerate(range(5))}",
164 | "iterable unpacking cannot be used in comprehension",
165 | ),
166 | (
167 | "[a, a for a in range(5)]",
168 | "did you forget parentheses around the comprehension target?",
169 | ),
170 | (
171 | "[a, a for a in range(5)]",
172 | "did you forget parentheses around the comprehension target?",
173 | ),
174 | (
175 | "[a, for a in range(5)]",
176 | "did you forget parentheses around the comprehension target?",
177 | ),
178 | (
179 | "[a, for a in range(5)]",
180 | "did you forget parentheses around the comprehension target?",
181 | ),
182 | ("{**a for a in [{1: 2}]}", "dict unpacking cannot be used in dict comprehension"),
183 | # check cuts
184 | ("(a for a in raise)", "invalid syntax"),
185 | ("(a async for a in raise)", "invalid syntax"),
186 | ],
187 | )
188 | def test_invalid_comprehension(python_parse_file, python_parse_str, tmp_path, source, message):
189 | parse_invalid_syntax(
190 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
191 | )
192 |
193 |
194 | @pytest.mark.parametrize(
195 | "source",
196 | [
197 | "def f(a=1, b):\n\tpass",
198 | "def f(a=1, /, b):\n\tpass",
199 | "lambda x=1, y: x",
200 | "lambda x=1, /, y: x",
201 | ],
202 | )
203 | def test_invalid_parameters(python_parse_file, python_parse_str, tmp_path, source):
204 | parse_invalid_syntax(
205 | python_parse_file,
206 | python_parse_str,
207 | tmp_path,
208 | source,
209 | SyntaxError,
210 | "non-default argument follows default argument",
211 | )
212 |
213 |
214 | @pytest.mark.parametrize(
215 | "source",
216 | [
217 | "def f(a, *):\n\tpass",
218 | "def f(a, *,):\n\tpass",
219 | "def f(a, *, **):\n\tpass",
220 | "lambda a, *: a",
221 | "lambda a, *, **:a",
222 | ],
223 | )
224 | def test_invalid_star_etc(python_parse_file, python_parse_str, tmp_path, source):
225 | parse_invalid_syntax(
226 | python_parse_file,
227 | python_parse_str,
228 | tmp_path,
229 | source,
230 | SyntaxError,
231 | "named arguments must follow bare *",
232 | )
233 |
234 |
235 | @pytest.mark.parametrize(
236 | "source, message",
237 | [
238 | ("with open(a) as {b: 1}:\n\tpass", "cannot assign to dict"),
239 | ("with open(a) as {b}:\n\tpass", "cannot assign to set"),
240 | ("with open(a) as 1:\n\tpass", "cannot assign to 1"),
241 | ],
242 | )
243 | def test_invalid_with_item(python_parse_file, python_parse_str, tmp_path, source, message):
244 | parse_invalid_syntax(
245 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
246 | )
247 |
248 |
249 | @pytest.mark.parametrize(
250 | "source, message",
251 | [
252 | ("for {a} in [[1]]:\n\tpass", "cannot assign to comparison"),
253 | ("async for (a := i)", "cannot assign to named expression"),
254 | ],
255 | )
256 | def test_invalid_for_target(python_parse_file, python_parse_str, tmp_path, source, message):
257 | parse_invalid_syntax(
258 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
259 | )
260 |
261 |
262 | @pytest.mark.parametrize(
263 | "source, message",
264 | [
265 | ("a = (1+1 := 2)", "cannot use assignment expressions with expression"),
266 | ("a := raise", "invalid syntax"),
267 | ],
268 | )
269 | def test_named_expression(python_parse_file, python_parse_str, tmp_path, source, message):
270 | parse_invalid_syntax(
271 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
272 | )
273 |
274 |
275 | @pytest.mark.parametrize(
276 | "source, message",
277 | [
278 | ("a = (*b)", "cannot use starred expression here"),
279 | ("a = (**b)", "cannot use double starred expression here"),
280 | ],
281 | )
282 | def test_invalid_group(python_parse_file, python_parse_str, tmp_path, source, message):
283 | parse_invalid_syntax(
284 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
285 | )
286 |
287 |
288 | @pytest.mark.parametrize(
289 | "source, message",
290 | [
291 | ("from a import b,", "trailing comma not allowed without surrounding parentheses"),
292 | ("from a import b, and 3", "invalid syntax"),
293 | ("from a import raise", "invalid syntax"),
294 | ],
295 | )
296 | def test_invalid_import_from_as_names(
297 | python_parse_file, python_parse_str, tmp_path, source, message
298 | ):
299 | parse_invalid_syntax(
300 | python_parse_file, python_parse_str, tmp_path, source, SyntaxError, message
301 | )
302 |
303 |
304 | @pytest.mark.parametrize(
305 | "source, exception, message",
306 | [
307 | (
308 | "with open(a) as f, b as d:\npass",
309 | IndentationError,
310 | "expected an indented block after 'with' statement on line 1",
311 | ),
312 | (
313 | "\nasync with (open(a) as f, b as d):\npass",
314 | IndentationError,
315 | "expected an indented block after 'with' statement on line 2",
316 | ),
317 | ("with open(a) as f, b as d\npass", SyntaxError, "expected ':'"),
318 | ("\nasync with (open(a) as f, b as d)\npass", SyntaxError, "expected ':'"),
319 | ],
320 | )
321 | def test_invalid_with_stmt(
322 | python_parse_file, python_parse_str, tmp_path, source, exception, message
323 | ):
324 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
325 |
326 |
327 | @pytest.mark.parametrize(
328 | "source, exception, message",
329 | [
330 | (
331 | "try:\npass",
332 | IndentationError,
333 | "expected an indented block after 'try' statement on line 1",
334 | ),
335 | ("try\n\tpass", SyntaxError, "expected ':'"),
336 | ("try:\n\tpass\na = 1", SyntaxError, "expected 'except' or 'finally' block"),
337 | ],
338 | )
339 | def test_invalid_try_stmt(
340 | python_parse_file, python_parse_str, tmp_path, source, exception, message
341 | ):
342 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
343 |
344 |
345 | @pytest.mark.parametrize(
346 | "source, exception, message",
347 | [
348 | (
349 | "try:\n\tpass\nexcept:\npass",
350 | IndentationError,
351 | "expected an indented block after 'except' statement on line 3",
352 | ),
353 | (
354 | "try:\n\tpass\nexcept Exception:\npass",
355 | IndentationError,
356 | "expected an indented block after 'except' statement on line 3",
357 | ),
358 | (
359 | "try:\n\tpass\nexcept Exception as e:\npass",
360 | IndentationError,
361 | "expected an indented block after 'except' statement on line 3",
362 | ),
363 | (
364 | "try:\n\tpass\nexcept ValueError, IndexError as e:",
365 | SyntaxError,
366 | "exception group must be parenthesized",
367 | ),
368 | (
369 | "try:\n\tpass\nexcept ValueError, IndexError:",
370 | SyntaxError,
371 | "exception group must be parenthesized",
372 | ),
373 | (
374 | "try:\n\tpass\nexcept ValueError, IndexError,:",
375 | SyntaxError,
376 | "exception group must be parenthesized",
377 | ),
378 | (
379 | "try:\n\tpass\nexcept ValueError, IndexError, a=1:",
380 | SyntaxError,
381 | "invalid syntax",
382 | ),
383 | ("try:\n\tpass\nexcept Exception\npass", SyntaxError, "expected ':'"),
384 | ("try:\n\tpass\nexcept Exception as e\npass", SyntaxError, "expected ':'"),
385 | ("try:\n\tpass\nexcept\npass", SyntaxError, "expected ':'"),
386 | ],
387 | )
388 | def test_invalid_except_stmt(
389 | python_parse_file, python_parse_str, tmp_path, source, exception, message
390 | ):
391 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
392 |
393 |
394 | @pytest.mark.parametrize(
395 | "source, exception, message",
396 | [
397 | (
398 | "try:\n\tpass\nfinally:\npass",
399 | IndentationError,
400 | "expected an indented block after 'finally' statement on line 3",
401 | ),
402 | (
403 | "try:\n\tpass\nexcept Exception:\n\tpass\nfinally:\npass",
404 | IndentationError,
405 | "expected an indented block after 'finally' statement on line 5",
406 | ),
407 | ],
408 | )
409 | def test_invalid_finally_stmt(
410 | python_parse_file, python_parse_str, tmp_path, source, exception, message
411 | ):
412 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
413 |
414 |
415 | @pytest.mark.skipif(sys.version_info < (3, 10), reason="Valid only in Python 3.10+")
416 | @pytest.mark.parametrize(
417 | "source, exception, message",
418 | [
419 | ("match a\n\tpass", SyntaxError, "expected ':'"),
420 | (
421 | "match a:\npass",
422 | IndentationError,
423 | "expected an indented block after 'match' statement on line 1",
424 | ),
425 | ],
426 | )
427 | def test_invalid_match_stmt(
428 | python_parse_file, python_parse_str, tmp_path, source, exception, message
429 | ):
430 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
431 |
432 |
433 | @pytest.mark.skipif(sys.version_info < (3, 10), reason="Valid only in Python 3.10+")
434 | @pytest.mark.parametrize(
435 | "source, exception, message",
436 | [
437 | ("match a:\n\tcase 1\n\t\tpass", SyntaxError, "expected ':'"),
438 | (
439 | "match a:\n\tcase 1:\n\tpass",
440 | IndentationError,
441 | "expected an indented block after 'case' statement on line 2",
442 | ),
443 | ],
444 | )
445 | def test_invalid_case_stmt(
446 | python_parse_file, python_parse_str, tmp_path, source, exception, message
447 | ):
448 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
449 |
450 |
451 | @pytest.mark.skipif(sys.version_info < (3, 10), reason="Valid only in Python 3.10+")
452 | @pytest.mark.parametrize(
453 | "source, exception, message",
454 | [
455 | # As pattern
456 | ("match a:\n\tcase 1 as _:\n\t\tpass", SyntaxError, "cannot use '_' as a target"),
457 | (
458 | "match a:\n\tcase 1 as 1+1:\n\tpass",
459 | SyntaxError,
460 | "invalid pattern target",
461 | ),
462 | # Class pattern
463 | (
464 | "match a:\n\tcase Foo(z=1, y=2, x):\n\tpass",
465 | SyntaxError,
466 | "positional patterns follow keyword patterns",
467 | ),
468 | (
469 | "match a:\n\tcase Foo(a, z=1, y=2, x):\n\tpass",
470 | SyntaxError,
471 | "positional patterns follow keyword patterns",
472 | ),
473 | (
474 | "match a:\n\tcase Foo(z=1, x, y=2):\n\tpass",
475 | SyntaxError,
476 | "positional patterns follow keyword patterns",
477 | ),
478 | (
479 | "match a:\n\tcase Foo(a=b, c, d=e, f, g=h, i, j=k, ...):\n\tpass",
480 | SyntaxError,
481 | "positional patterns follow keyword patterns",
482 | ),
483 | ],
484 | )
485 | def test_invalid_case_pattern(
486 | python_parse_file, python_parse_str, tmp_path, source, exception, message
487 | ):
488 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
489 |
490 |
491 | @pytest.mark.parametrize(
492 | "source, exception, message",
493 | [
494 | ("if a\n\tpass", SyntaxError, "expected ':'"),
495 | (
496 | "if a:\npass",
497 | IndentationError,
498 | "expected an indented block after 'if' statement on line 1",
499 | ),
500 | ],
501 | )
502 | def test_invalid_if_stmt(
503 | python_parse_file, python_parse_str, tmp_path, source, exception, message
504 | ):
505 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
506 |
507 |
508 | @pytest.mark.parametrize(
509 | "source, exception, message",
510 | [
511 | ("if a:\n\tpass\nelif a\n\tpass", SyntaxError, "expected ':'"),
512 | (
513 | "if a:\n\tpass\nelif b:\npass",
514 | IndentationError,
515 | "expected an indented block after 'elif' statement on line 3",
516 | ),
517 | ],
518 | )
519 | def test_invalid_elif_stmt(
520 | python_parse_file, python_parse_str, tmp_path, source, exception, message
521 | ):
522 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
523 |
524 |
525 | @pytest.mark.parametrize(
526 | "source, exception, message",
527 | [
528 | ("if a:\n\tpass\nelse\n\tpass", SyntaxError, "expected ':'"),
529 | (
530 | "if a:\n\tpass\nelse:\npass",
531 | IndentationError,
532 | "expected an indented block after 'else' statement on line 3",
533 | ),
534 | ],
535 | )
536 | def test_invalid_else_stmt(
537 | python_parse_file, python_parse_str, tmp_path, source, exception, message
538 | ):
539 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
540 |
541 |
542 | @pytest.mark.parametrize(
543 | "source, exception, message",
544 | [
545 | ("while a\n\tpass", SyntaxError, "expected ':'"),
546 | (
547 | "while a:\npass",
548 | IndentationError,
549 | "expected an indented block after 'while' statement on line 1",
550 | ),
551 | ],
552 | )
553 | def test_invalid_while_stmt(
554 | python_parse_file, python_parse_str, tmp_path, source, exception, message
555 | ):
556 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
557 |
558 |
559 | @pytest.mark.parametrize(
560 | "source, exception, message",
561 | [
562 | (
563 | "for a in range(10):\npass",
564 | IndentationError,
565 | "expected an indented block after 'for' statement on line 1",
566 | ),
567 | (
568 | "async for a in range(10):\npass",
569 | IndentationError,
570 | "expected an indented block after 'for' statement on line 1",
571 | ),
572 | (
573 | "for a in raise:\npass",
574 | SyntaxError,
575 | "invalid syntax",
576 | ),
577 | (
578 | "async for a in raise:\npass",
579 | SyntaxError,
580 | "invalid syntax",
581 | ),
582 | ],
583 | )
584 | def test_invalid_for_stmt(
585 | python_parse_file, python_parse_str, tmp_path, source, exception, message
586 | ):
587 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
588 |
589 |
590 | @pytest.mark.parametrize(
591 | "source, exception, message",
592 | [
593 | (
594 | "def f():\npass",
595 | IndentationError,
596 | "expected an indented block after function definition on line 1",
597 | ),
598 | (
599 | "async def f():\npass",
600 | IndentationError,
601 | "expected an indented block after function definition on line 1",
602 | ),
603 | (
604 | "def f(a,):\npass",
605 | IndentationError,
606 | "expected an indented block after function definition on line 1",
607 | ),
608 | (
609 | "def f() -> None:\npass",
610 | IndentationError,
611 | "expected an indented block after function definition on line 1",
612 | ),
613 | # (
614 | # "def f():\n# type: () -> int\n# type: () -> str\n\tpass",
615 | # SyntaxError,
616 | # "expected an indented block after function definition on line 1",
617 | # ),
618 | ],
619 | )
620 | def test_invalid_def_stmt(
621 | python_parse_file, python_parse_str, tmp_path, source, exception, message
622 | ):
623 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
624 |
625 |
626 | @pytest.mark.parametrize(
627 | "source, exception, message",
628 | [
629 | (
630 | "class A:\npass",
631 | IndentationError,
632 | "expected an indented block after class definition on line 1",
633 | ),
634 | (
635 | "class f(object):\npass",
636 | IndentationError,
637 | "expected an indented block after class definition on line 1",
638 | ),
639 | ],
640 | )
641 | def test_invalid_class_stmt(
642 | python_parse_file, python_parse_str, tmp_path, source, exception, message
643 | ):
644 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
645 |
646 |
647 | @pytest.mark.parametrize(
648 | "source, exception, message",
649 | [
650 | ("{a: 1, b}", SyntaxError, "':' expected after dictionary key"),
651 | ("{a: 1, c: 2, b}", SyntaxError, "':' expected after dictionary key"),
652 | ("{a: 1, b:}", SyntaxError, "expression expected after dictionary key and ':'"),
653 | ("{c: 1, a: *b}", SyntaxError, "cannot use a starred expression in a dictionary value"),
654 | ("{b:}", SyntaxError, "expression expected after dictionary key and ':'"),
655 | ("{b:, c}", SyntaxError, "expression expected after dictionary key and ':'"),
656 | ("{a: *b}", SyntaxError, "cannot use a starred expression in a dictionary value"),
657 | ("{**c, a: *b}", SyntaxError, "cannot use a starred expression in a dictionary value"),
658 | ],
659 | )
660 | def test_invalid_dict_key_value(
661 | python_parse_file, python_parse_str, tmp_path, source, exception, message
662 | ):
663 | parse_invalid_syntax(python_parse_file, python_parse_str, tmp_path, source, exception, message)
664 |
--------------------------------------------------------------------------------
/tests/python_parser/test_unsupported_syntax.py:
--------------------------------------------------------------------------------
1 | """Test identifying unsupported syntax construction in older Python versions.
2 |
3 | Note that we can request the parser to apply stricter bounds on the parsing but
4 | not broader since we would not be able to generate the proper ast nodes.
5 |
6 | """
7 | import io
8 | import sys
9 | import tokenize
10 |
11 | import pytest
12 |
13 | from pegen.tokenizer import Tokenizer
14 |
15 | # matrix mul 3.5
16 | @pytest.mark.parametrize("source", ["a @ b", "a @= b"])
17 | def test_mat_mult(python_parser_cls, source):
18 | temp = io.StringIO(source)
19 | tokengen = tokenize.generate_tokens(temp.readline)
20 | tokenizer = Tokenizer(tokengen, verbose=False)
21 | pp = python_parser_cls(tokenizer, py_version=(3, 4))
22 | with pytest.raises(SyntaxError) as e:
23 | pp.parse("file")
24 |
25 | assert "The '@' operator is" in e.exconly()
26 |
27 |
28 | # await 3.5
29 | def test_await(python_parser_cls):
30 | temp = io.StringIO("await b")
31 | tokengen = tokenize.generate_tokens(temp.readline)
32 | tokenizer = Tokenizer(tokengen, verbose=False)
33 | pp = python_parser_cls(tokenizer, py_version=(3, 4))
34 | with pytest.raises(SyntaxError) as e:
35 | pp.file()
36 |
37 | assert "Await expressions are" in e.exconly()
38 |
39 |
40 | # async 3.5
41 | @pytest.mark.parametrize(
42 | "source, message",
43 | [
44 | ("async def f():\n pass", "Async functions are"),
45 | ("async with a:\n pass", "Async with statements are"),
46 | ("async for a in b:\n pass", "Async for loops are"),
47 | ],
48 | )
49 | def test_async(python_parser_cls, source, message):
50 | temp = io.StringIO(source)
51 | tokengen = tokenize.generate_tokens(temp.readline)
52 | tokenizer = Tokenizer(tokengen, verbose=False)
53 | pp = python_parser_cls(tokenizer, py_version=(3, 4))
54 | with pytest.raises(SyntaxError) as e:
55 | pp.file()
56 |
57 | assert message in e.exconly()
58 |
59 |
60 | # async comprehension 3.6
61 | def test_async_comprehension(python_parser_cls):
62 | temp = io.StringIO("""[a async for a in b if c]""")
63 | tokengen = tokenize.generate_tokens(temp.readline)
64 | tokenizer = Tokenizer(tokengen, verbose=False)
65 | pp = python_parser_cls(tokenizer, py_version=(3, 5))
66 | with pytest.raises(SyntaxError) as e:
67 | pp.file()
68 | assert "Async comprehensions are" in e.exconly()
69 |
70 |
71 | # variable annotation 3.6
72 | @pytest.mark.parametrize("source", ["a: int = 1", "(a): int "])
73 | def test_variable_annotation(python_parser_cls, source):
74 | temp = io.StringIO(source)
75 | tokengen = tokenize.generate_tokens(temp.readline)
76 | tokenizer = Tokenizer(tokengen, verbose=False)
77 | pp = python_parser_cls(tokenizer, py_version=(3, 5))
78 | with pytest.raises(SyntaxError) as e:
79 | pp.file()
80 |
81 | assert "Variable annotation syntax is" in e.exconly()
82 |
83 |
84 | # pos only args 3.8
85 | @pytest.mark.parametrize("source", ["def f(a,/):\n\tpass", "def f(a=1,/):\n\tpass"])
86 | def test_pos_only_args(python_parser_cls, source):
87 | temp = io.StringIO(source)
88 | tokengen = tokenize.generate_tokens(temp.readline)
89 | tokenizer = Tokenizer(tokengen, verbose=False)
90 | pp = python_parser_cls(tokenizer, py_version=(3, 7))
91 | with pytest.raises(SyntaxError) as e:
92 | pp.file()
93 |
94 | assert "Positional only arguments are" in e.exconly()
95 |
96 |
97 | # assignment operator 3.8
98 | @pytest.mark.parametrize("source", ["a := 1"])
99 | def test_assignment_operator(python_parser_cls, source):
100 | temp = io.StringIO(source)
101 | tokengen = tokenize.generate_tokens(temp.readline)
102 | tokenizer = Tokenizer(tokengen, verbose=False)
103 | pp = python_parser_cls(tokenizer, py_version=(3, 7))
104 | with pytest.raises(SyntaxError) as e:
105 | pp.file()
106 |
107 | assert "The ':=' operator is" in e.exconly()
108 |
109 |
110 | # generic decorators 3.9
111 | @pytest.mark.parametrize("source", ["@f[1]\ndef f():\n\tpass"])
112 | def test_generic_decorators(python_parser_cls, source):
113 | temp = io.StringIO(source)
114 | tokengen = tokenize.generate_tokens(temp.readline)
115 | tokenizer = Tokenizer(tokengen, verbose=False)
116 | pp = python_parser_cls(tokenizer, py_version=(3, 8))
117 | with pytest.raises(SyntaxError) as e:
118 | pp.file()
119 |
120 | assert "Generic decorator are" in e.exconly()
121 |
122 |
123 | # parenthesized with items 3.9
124 | @pytest.mark.parametrize("source", ["with (a, b):\n\tpass"])
125 | def test_parenthesized_with_items(python_parser_cls, source):
126 | temp = io.StringIO(source)
127 | tokengen = tokenize.generate_tokens(temp.readline)
128 | tokenizer = Tokenizer(tokengen, verbose=False)
129 | pp = python_parser_cls(tokenizer, py_version=(3, 8))
130 | with pytest.raises(SyntaxError) as e:
131 | pp.file()
132 |
133 | assert "Parenthesized with items" in e.exconly()
134 |
135 |
136 | # match 3.10
137 | @pytest.mark.parametrize(
138 | "source", ["match a:\n\tcase 1:\n\t\tpass", "match a", "match a:\ncase b"]
139 | )
140 | def test_match_statement(python_parser_cls, source):
141 | temp = io.StringIO(source)
142 | tokengen = tokenize.generate_tokens(temp.readline)
143 | tokenizer = Tokenizer(tokengen, verbose=False)
144 | pp = python_parser_cls(tokenizer, py_version=(3, 9))
145 | with pytest.raises(SyntaxError) as e:
146 | pp.file()
147 |
148 | assert "Pattern matching is" in e.exconly()
149 |
--------------------------------------------------------------------------------
/tests/python_parser/utils.py:
--------------------------------------------------------------------------------
1 | import importlib.util
2 | import io
3 | import sys
4 | import textwrap
5 | import token
6 | import tokenize
7 | from typing import IO, Any, Dict, Final, Optional, Type, cast
8 |
9 | from pegen.grammar import Grammar
10 | from pegen.grammar_parser import GeneratedParser as GrammarParser
11 | from pegen.parser import Parser
12 | from pegen.python_generator import PythonParserGenerator
13 | from pegen.tokenizer import Tokenizer
14 |
15 | ALL_TOKENS = token.tok_name
16 | EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore
17 | NON_EXACT_TOKENS = {
18 | name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
19 | }
20 |
21 |
22 | def generate_parser(
23 | grammar: Grammar, parser_path: Optional[str] = None, parser_name: str = "GeneratedParser"
24 | ) -> Type[Parser]:
25 | # Generate a parser.
26 | out = io.StringIO()
27 | genr = PythonParserGenerator(grammar, out)
28 | genr.generate("")
29 |
30 | # Load the generated parser class.
31 | ns: Dict[str, Any] = {}
32 | if parser_path:
33 | with open(parser_path, "w") as f:
34 | f.write(out.getvalue())
35 | mod = import_file("py_parser", parser_path)
36 | return getattr(mod, parser_name)
37 | else:
38 | exec(out.getvalue(), ns)
39 | return ns[parser_name]
40 |
41 |
42 | def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
43 | # Run a parser on a file (stream).
44 | tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore # typeshed issue #3515
45 | parser = parser_class(tokenizer, verbose=verbose)
46 | result = parser.start()
47 | if result is None:
48 | raise parser.make_syntax_error("invalid syntax")
49 | return result
50 |
51 |
52 | def parse_string(
53 | source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
54 | ) -> Any:
55 | # Run the parser on a string.
56 | if dedent:
57 | source = textwrap.dedent(source)
58 | file = io.StringIO(source)
59 | return run_parser(file, parser_class, verbose=verbose) # type: ignore # typeshed issue #3515
60 |
61 |
62 | def make_parser(source: str) -> Type[Parser]:
63 | # Combine parse_string() and generate_parser().
64 | grammar = parse_string(source, GrammarParser)
65 | return generate_parser(grammar)
66 |
67 |
68 | def import_file(full_name: str, path: str) -> Any:
69 | """Import a python module from a path"""
70 |
71 | spec = importlib.util.spec_from_file_location(full_name, path)
72 | mod = importlib.util.module_from_spec(spec)
73 |
74 | # We assume this is not None and has an exec_module() method.
75 | # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
76 | loader = cast(Any, spec.loader)
77 | loader.exec_module(mod)
78 | return mod
79 |
80 |
81 | def print_memstats() -> bool:
82 | MiB: Final = 2 ** 20
83 | try:
84 | import psutil # type: ignore
85 | except ImportError:
86 | return False
87 | print("Memory stats:")
88 | process = psutil.Process()
89 | meminfo = process.memory_info()
90 | res = {}
91 | res["rss"] = meminfo.rss / MiB
92 | res["vms"] = meminfo.vms / MiB
93 | if sys.platform == "win32":
94 | res["maxrss"] = meminfo.peak_wset / MiB
95 | else:
96 | # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
97 | import resource # Since it doesn't exist on Windows.
98 |
99 | rusage = resource.getrusage(resource.RUSAGE_SELF)
100 | if sys.platform == "darwin":
101 | factor = 1
102 | else:
103 | factor = 1024 # Linux
104 | res["maxrss"] = rusage.ru_maxrss * factor / MiB
105 | for key, value in res.items():
106 | print(f" {key:12.12s}: {value:10.0f} MiB")
107 | return True
108 |
--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import inspect
3 |
4 | import pytest
5 |
6 | import mingshe.core
7 |
8 |
9 | @pytest.mark.parametrize(
10 | "raw,result",
11 | [
12 | # 管道运算符
13 | (
14 | "1 |> print",
15 | "print(1)",
16 | ),
17 | (
18 | "[1] |> max",
19 | "max([1])",
20 | ),
21 | (
22 | "{1} |> max",
23 | "max({1})",
24 | ),
25 | (
26 | "{'a': 1} |> max",
27 | "max({'a': 1})",
28 | ),
29 | (
30 | "range(10) |> sum |> print",
31 | "print(sum(range(10)))",
32 | ),
33 | (
34 | """
35 | "hello" |> print
36 | "world" |> print
37 | """,
38 | """
39 | print("hello")
40 | print("world")
41 | """,
42 | ),
43 | (
44 | "10 |> partial(print, 'num:')",
45 | "partial(print, 'num:')(10)",
46 | ),
47 | # 三元运算符
48 | (
49 | "a ? b : c",
50 | "b if a else c",
51 | ),
52 | (
53 | "a ? (b ? d : e) : c",
54 | "(d if b else e) if a else c",
55 | ),
56 | # 偏函数
57 | (
58 | "f(?)",
59 | "(lambda f: (lambda _0: f(_0)))(f)",
60 | ),
61 | (
62 | "pow(?, 2)",
63 | "(lambda f: lambda _0: f(_0, 2))(pow)",
64 | ),
65 | (
66 | "f(a, b=?)",
67 | "(lambda _p_0, f: lambda _0: f(_p_0, b=_0))(a, f)",
68 | ),
69 | (
70 | "f(?, b=0)",
71 | "(lambda f: lambda _0: f(_0, b=0))(f)",
72 | ),
73 | (
74 | "f(?, b=t0)",
75 | "(lambda f, b: lambda _0: f(_0, b=b))(f, b=t0)",
76 | ),
77 | (
78 | "f(1, *?)",
79 | "(lambda f: lambda _0: f(1, *_0))(f)",
80 | ),
81 | (
82 | "f(a, **?)",
83 | "(lambda _p_0, f: lambda _0: f(_p_0, **_0))(a, f)",
84 | ),
85 | (
86 | "f(a, *?, **?)",
87 | "(lambda _p_0, f: lambda _0, _1: f(_p_0, *_0, **_1))(a, f)",
88 | ),
89 | (
90 | "json.dumps(?, ensure_ascii=False)",
91 | "(lambda f: lambda _0: f(_0, ensure_ascii=False))(json.dumps)",
92 | ),
93 | # 空值合并
94 | (
95 | "a ?? b",
96 | "(lambda arg0, arg1: _1 if (_1 := arg0()) is not None else arg1())(lambda : a, lambda : b)",
97 | ),
98 | (
99 | "a ?? b ?? c",
100 | "(lambda arg0, arg1, arg2: _2 if (_2 := (_1 if (_1 := arg0()) is not None else arg1())) is not None else arg2())(lambda : a, lambda : b, lambda : c)",
101 | ),
102 | # 可选链
103 | (
104 | "a?.b",
105 | "(lambda : None if (_ := a) is None else _.b)()",
106 | ),
107 | (
108 | "a?[b]",
109 | "(lambda : None if (_ := a) is None else _[b])()",
110 | ),
111 | (
112 | "a?.b()",
113 | "(lambda : None if (_ := a) is None else _.b())()",
114 | ),
115 | (
116 | "a?.b?.c",
117 | "(lambda : None if (_ := (None if (_ := a) is None else _.b)) is None else _.c)()"
118 | ),
119 | (
120 | "a?[b]?[c]",
121 | "(lambda : None if (_ := (None if (_ := a) is None else _[b])) is None else _[c])()"
122 | ),
123 | (
124 | "a?.b()?.c()",
125 | "(lambda : None if (_ := (None if (_ := a) is None else _.b())) is None else _.c())()"
126 | ),
127 | (
128 | "(p |> f)?[b]()",
129 | "(lambda : None if (_ := f(p)) is None else _[b])()()",
130 | ),
131 | # 字典展开
132 | (
133 | "{ x } = {'x': '1'}",
134 | "(x,) = (lambda **kwargs: (kwargs.get('x'),))(**{'x': '1'})",
135 | ),
136 | (
137 | "{ x, y } = {'x': '1'}",
138 | "(x, y) = (lambda **kwargs: (kwargs.get('x'), kwargs.get('y')))(**{'x': '1'})",
139 | ),
140 | (
141 | "{ x, y } = name",
142 | "(x, y) = (lambda **kwargs: (kwargs.get('x'), kwargs.get('y')))(**name)",
143 | ),
144 | (
145 | "{ x, y } = d0 + d1",
146 | "(x, y) = (lambda **kwargs: (kwargs.get('x'), kwargs.get('y')))(**(d0 + d1))",
147 | )
148 | ],
149 | )
150 | def test_right_example(raw, result):
151 | assert ast.dump(mingshe.core.compile(inspect.cleandoc(raw))) == ast.dump(
152 | ast.parse(inspect.cleandoc(result))
153 | )
154 |
155 |
156 | @pytest.mark.parametrize(
157 | "string",
158 | [
159 | "1 |> ",
160 | "a ? b",
161 | "a or b ?? c",
162 | "a ?? b or c",
163 | "a?.[b]",
164 | ]
165 | )
166 | def test_wrong_example(string):
167 | with pytest.raises(SyntaxError):
168 | mingshe.core.compile(string)
169 |
--------------------------------------------------------------------------------
/tests/test_importlib.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from pathlib import Path
3 |
4 | import pytest
5 |
6 | from mingshe.importlib import install_meta, uninstall_meta
7 |
8 |
9 | def test_importlib():
10 | path = str(Path(__file__).absolute().parent / "for_test_importlib")
11 | sys.path.insert(0, path)
12 | try:
13 | install_meta(".she")
14 |
15 | import a
16 | assert a.name == "a"
17 |
18 | import p
19 | assert p.name == "p"
20 |
21 | from t.b import name
22 | assert name == "t.b"
23 |
24 | uninstall_meta(".she")
25 |
26 | del sys.modules["a"]
27 | del sys.modules["p"]
28 | del sys.modules["t.b"]
29 |
30 | with pytest.raises(ImportError):
31 | import a
32 |
33 | finally:
34 | sys.path.remove(path)
35 |
--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
1 | from mingshe.__version__ import VERSION, __version__
2 |
3 |
4 | def test_version():
5 | VERSION[0]
6 | VERSION[1]
7 | VERSION[2]
8 |
9 | assert isinstance(__version__, str)
10 |
--------------------------------------------------------------------------------