├── .editorconfig ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE.md └── workflows │ └── ci-linux-ubuntu.yml ├── .gitignore ├── AUTHORS.md ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── PULL_REQUEST_TEMPLATE.md ├── README.rst ├── docs ├── about │ ├── CONTRIBUTING.md │ └── LICENSE.md ├── actions.md ├── common.md ├── css │ └── version-select.css ├── debugging.md ├── disambiguation.md ├── getting_started.md ├── grammar.md ├── grammar_language.md ├── grammar_modularization.md ├── handling_errors.md ├── images │ ├── calc.pg │ ├── calc.pg.dot.png │ ├── calc_forest.dot.png │ ├── calc_trace.dot.png │ ├── glr_infinite_recursion.svg │ ├── parglare-logo.png │ └── parglare-logo.svg ├── index.md ├── js │ └── version-select.js ├── lr_parsing.md ├── parse_forest_trees.md ├── parser.md ├── pglr.md ├── recognizers.md ├── release_notes │ ├── release_0_14.md │ └── release_0_15.md └── style.css ├── examples ├── bibtex │ ├── bibtex.pg │ ├── bibtex.py │ └── test.bib ├── c │ ├── README.md │ ├── c.pg │ ├── c2.pg │ └── c_example.py ├── calc │ └── calc.py ├── csv │ └── csv.py ├── custom_table_caching │ ├── .gitignore │ ├── README.md │ ├── compile.py │ ├── grammar.py │ └── parser.py ├── java │ ├── README.md │ ├── TomcatServletWebServerFactory.java │ ├── java16.pg │ └── java_example.py ├── json │ ├── example1.json │ ├── example2.json │ ├── example3.json │ ├── example4.json │ ├── example5.json │ ├── json.pg │ └── json_example.py ├── molecular_formulas │ ├── README.md │ ├── parglare_mw.py │ ├── ply_mw.py │ └── run_test.py ├── quick_intro.py ├── rhapsody │ ├── LightSwitch.rpy │ ├── rhapsody.pg │ └── rhapsody.py └── robot │ ├── README.md │ ├── program.rbt │ ├── robot.pg │ ├── robot.pg.dot.png │ └── robot.py ├── install-dev.sh ├── install-test.sh ├── mkdocs.yml ├── parglare ├── __init__.py ├── actions.py ├── cli.py ├── closure.py ├── common.py ├── exceptions.py ├── export.py ├── glr.py ├── grammar.py ├── parser.py ├── tables │ ├── __init__.py │ └── persist.py ├── termui.py └── trees.py ├── pyproject.toml ├── runtests.sh ├── scripts └── parglare_qtree.py └── tests ├── func ├── __init__.py ├── actions │ ├── __init__.py │ ├── collector │ │ ├── grammar.pg │ │ └── test_actions_get_collector.py │ ├── test_actions.py │ └── test_builtin_actions.py ├── grammar │ ├── __init__.py │ ├── calc.pg │ ├── calcactions.py │ ├── expression_grammar.py │ ├── expression_grammar_numbers.py │ ├── test_grammar.py │ ├── test_groups.py │ ├── test_keywords.py │ ├── test_layout.py │ ├── test_load_from_file.py │ ├── test_meta_data.py │ ├── test_repeatable.py │ ├── test_special_grammars.py │ ├── test_terminals.py │ └── test_whitespaces.py ├── import │ ├── basic │ │ ├── first.pg │ │ ├── second.pg │ │ ├── submodule │ │ │ └── third.pg │ │ └── test_import.py │ ├── diamond │ │ ├── base.pg │ │ ├── components.pg │ │ ├── model.pg │ │ ├── modules.pg │ │ ├── packages.pg │ │ └── test_diamond.py │ ├── fqn │ │ ├── A.pg │ │ ├── B.pg │ │ ├── C.pg │ │ ├── D.pg │ │ └── test_fqn.py │ ├── imported_actions │ │ ├── by_action_name │ │ │ ├── base.pg │ │ │ ├── base_actions.py │ │ │ ├── components.pg │ │ │ └── model.pg │ │ ├── by_decorator_action_name │ │ │ ├── base.pg │ │ │ ├── base_actions.py │ │ │ ├── components.pg │ │ │ └── model.pg │ │ ├── by_symbol_name │ │ │ ├── base.pg │ │ │ ├── base_actions.py │ │ │ ├── components.pg │ │ │ └── model.pg │ │ ├── in_grammar_by_action_name │ │ │ ├── base.pg │ │ │ ├── base_actions.py │ │ │ ├── components.pg │ │ │ ├── model.pg │ │ │ └── model_actions.py │ │ ├── in_grammar_by_symbol_name │ │ │ ├── base.pg │ │ │ ├── base_actions.py │ │ │ ├── components.pg │ │ │ ├── model.pg │ │ │ └── model_actions.py │ │ └── test_imported_actions.py │ ├── imported_recognizers │ │ ├── __init__.py │ │ ├── base.pg │ │ ├── base_recognizers.py │ │ ├── components.pg │ │ ├── model.pg │ │ ├── model_override.pg │ │ ├── model_override_recognizers.py │ │ └── test_imported_recognizers.py │ ├── override │ │ ├── base.pg │ │ ├── first.pg │ │ ├── nonexisting.pg │ │ ├── second.pg │ │ └── test_override.py │ └── recursion │ │ ├── base.pg │ │ ├── components.pg │ │ ├── model.pg │ │ ├── modules.pg │ │ ├── packages.pg │ │ └── test_recursion.py ├── parsing │ ├── __init__.py │ ├── parsing_errors.txt │ ├── parsing_from_file.txt │ ├── test_build_tree.py │ ├── test_conflicts.py │ ├── test_dynamic_disambiguation_filters.py │ ├── test_error_recovery.py │ ├── test_glr_error_recovery.py │ ├── test_glr_forest.py │ ├── test_glr_forest_disambiguation.py │ ├── test_glr_parsing.py │ ├── test_greedy.py │ ├── test_lexical_disambiguation.py │ ├── test_objects.py │ ├── test_parse_context.py │ ├── test_parser_construction.py │ ├── test_parsing.py │ ├── test_parsing_errors.py │ ├── test_to_dot.py │ └── test_to_str.py ├── persistence │ ├── calc_with_actions │ │ ├── calc.pg │ │ ├── calc_actions.py │ │ ├── test_table_persistance.py │ │ └── variable.pg │ └── compare_table │ │ ├── base.pg │ │ ├── components.pg │ │ ├── model.pg │ │ ├── model.pgt │ │ ├── model_compare.pgt │ │ ├── modules.pg │ │ ├── packages.pg │ │ └── test_compare_table.py ├── pglr │ ├── __init__.py │ ├── grammar.pg │ └── test_pglr.py ├── recognizers │ ├── __init__.py │ ├── collector │ │ ├── grammar.pg │ │ └── test_recognizers_get_collector.py │ ├── test_recognizer_context.py │ └── test_recognizers.py ├── regressions │ ├── issue38 │ │ ├── names.pg │ │ └── test_issue_38_unicode_py2.py │ ├── issue52 │ │ ├── grammar.pg │ │ └── test_issue52_table_caching.py │ ├── issue97 │ │ ├── first.pg │ │ ├── second.pg │ │ └── test_issue97.py │ ├── test_glr_list_building_bug.py │ ├── test_issue147.py │ ├── test_issue31_glr_drop_parses_on_lexical_ambiguity.py │ ├── test_issue32.py │ ├── test_issue64.py │ ├── test_issue73.py │ ├── test_issue_110_wrong_positions_with_glr_and_empty.py │ ├── test_issue_112.py │ ├── test_issue_114.py │ ├── test_issue_22_wrong_associativity_behaviour.py │ ├── test_issue_23_multiple_rules_with_assignments.py │ ├── test_issue_54_collector_unicode.py │ ├── test_recognizer_nonexisting_terminal │ │ ├── grammar.pg │ │ ├── grammar_recognizers.py │ │ └── test_recognizer_nonexisting_terminal.py │ ├── test_regex_alternative_match_bug.py │ └── test_terminal_exists_noerror_on_terminal_definition_before.py ├── test_common_errors.py ├── test_examples.py └── test_export.py └── perf ├── profile.sh ├── report_grammar.py ├── reports ├── cpu-report.txt ├── grammar-sizes.txt └── mem-report.txt ├── requirements.txt ├── runall.sh ├── test1 ├── g.pg ├── input1 ├── input2 ├── input3 ├── input4 ├── input5 └── input6 ├── test2 ├── g.pg ├── input1 ├── input2 ├── input3 ├── input4 ├── input5 └── input6 ├── test3 ├── g.pg ├── input1 ├── input2 ├── input3 ├── input4 ├── input5 └── input6 ├── test_cpu.py ├── test_mem.py └── tests.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | tests/perf/test_inputs/* linguist-vendored 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | * parglare version: 2 | * Python version: 3 | * Operating System: 4 | 5 | ### Description 6 | 7 | Describe what you were trying to get done. 8 | Tell us what happened, what went wrong, and what you expected to happen. 9 | 10 | ### What I Did 11 | 12 | ``` 13 | Paste the command(s) you ran and the output. 14 | If there was a crash, please include the traceback here. 15 | ``` 16 | -------------------------------------------------------------------------------- /.github/workflows/ci-linux-ubuntu.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - 'gh-pages' 7 | pull_request: 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-20.04 13 | strategy: 14 | matrix: 15 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | - name: Install test dependencies 24 | run: | 25 | ./install-test.sh 26 | - name: Run unit tests 27 | run: | 28 | ./runtests.sh 29 | - name: Coveralls 30 | if: github.ref == 'refs/heads/master' 31 | env: 32 | COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} 33 | run: | 34 | coveralls 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # pyenv python configuration file 62 | .python-version 63 | 64 | # Other 65 | *.dot 66 | .pytest_cache 67 | site 68 | issues-temp 69 | tests/perf/*.pstats 70 | *.pgt 71 | /.tool-versions 72 | /venv/ 73 | /issues/ 74 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | ##Development Lead 2 | 3 | - Igor R. Dejanovic 4 | 5 | ##Contributors 6 | 7 | For a full list of contributors and their activity 8 | see [here](https://github.com/igordejanovic/parglare/graphs/contributors). 9 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | See [docs/about/CONTRIBUTING.md](docs/about/CONTRIBUTING.md) 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016-2020, Igor R. Dejanovic and contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS.md 2 | 3 | include CONTRIBUTING.md 4 | include CHANGELOG.md 5 | include LICENSE 6 | include README.rst 7 | 8 | recursive-include tests * 9 | recursive-exclude * __pycache__ 10 | recursive-exclude * *.py[co] 11 | recursive-exclude * *.log 12 | recursive-exclude * *.dot 13 | recursive-exclude * *.pdf 14 | recursive-exclude * *.pstats 15 | recursive-exclude * *.report 16 | recursive-exclude * *.txt 17 | 18 | recursive-include docs *.md *.png 19 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help lint 2 | .DEFAULT_GOAL := help 3 | define BROWSER_PYSCRIPT 4 | import os, webbrowser, sys 5 | try: 6 | from urllib import pathname2url 7 | except: 8 | from urllib.request import pathname2url 9 | 10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 11 | endef 12 | export BROWSER_PYSCRIPT 13 | 14 | define PRINT_HELP_PYSCRIPT 15 | import re, sys 16 | 17 | for line in sys.stdin: 18 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 19 | if match: 20 | target, help = match.groups() 21 | print("%-20s %s" % (target, help)) 22 | endef 23 | export PRINT_HELP_PYSCRIPT 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 25 | 26 | help: 27 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 28 | 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 30 | 31 | 32 | clean-build: ## remove build artifacts 33 | rm -fr build/ 34 | rm -fr dist/ 35 | rm -fr .eggs/ 36 | find . -name '*.egg-info' -exec rm -fr {} + 37 | find . -name '*.egg' -exec rm -f {} + 38 | 39 | clean-pyc: ## remove Python file artifacts 40 | find . -name '*.pyc' -exec rm -f {} + 41 | find . -name '*.pyo' -exec rm -f {} + 42 | find . -name '*~' -exec rm -f {} + 43 | find . -name '__pycache__' -exec rm -fr {} + 44 | 45 | clean-test: ## remove test and coverage artifacts 46 | rm -fr .tox/ 47 | rm -f .coverage 48 | rm -fr htmlcov/ 49 | 50 | lint: ## check style with flake8 51 | flake8 52 | 53 | test: ## run tests quickly with the default Python 54 | py.test tests/func 55 | 56 | 57 | test-all: ## run tests on every Python version with tox 58 | tox 59 | 60 | coverage: ## check code coverage quickly with the default Python 61 | coverage run --source parglare -m pytest tests/func 62 | 63 | coverage report -m 64 | coverage html 65 | $(BROWSER) htmlcov/index.html 66 | 67 | docs: ## generate MkDocs HTML documentation 68 | mkdocs build 69 | $(BROWSER) docs/_build/html/index.html 70 | 71 | servedocs: ## compile the docs watching for changes 72 | mkdocs serve 73 | $(BROWSER) "http://localhost:8000/" 74 | 75 | release: clean ## package and upload a release 76 | python setup.py sdist upload 77 | python setup.py bdist_wheel upload 78 | 79 | dist: clean ## builds source and wheel package 80 | python setup.py sdist 81 | python setup.py bdist_wheel 82 | ls -l dist 83 | 84 | install: clean ## install the package to the active Python's site-packages 85 | python setup.py install 86 | -------------------------------------------------------------------------------- /PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Code review checklist 4 | 5 | - [ ] Pull request represents a single change (i.e. not fixing disparate/unrelated things in a single PR) 6 | - [ ] Title summarizes what is changing 7 | - [ ] Commit messages are meaningful (see [this][commit messages] for details) 8 | - [ ] Tests have been included and/or updated 9 | - [ ] Docstrings have been included and/or updated, as appropriate 10 | - [ ] Standalone docs have been updated accordingly 11 | - [ ] Changelog(s) has/have been updated, as needed (see `CHANGELOG.md`, no need 12 | to update for typo fixes and such). 13 | 14 | 15 | [commit messages]: https://chris.beams.io/posts/git-commit/ 16 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://raw.githubusercontent.com/igordejanovic/parglare/master/docs/images/parglare-logo.png 2 | 3 | |build-status| |coverage| |docs| |status| |license| |python-versions| 4 | 5 | 6 | A pure Python scannerless LR/GLR parser. 7 | 8 | 9 | For more information see `the docs `_. 10 | 11 | 12 | Quick intro 13 | ----------- 14 | 15 | This is just a small example to get the general idea. This example shows how to 16 | parse and evaluate expressions with 5 operations with different priority and 17 | associativity. Evaluation is done using semantic/reduction actions. 18 | 19 | The whole expression evaluator is done in under 30 lines of code! 20 | 21 | .. code:: python 22 | 23 | from parglare import Parser, Grammar 24 | 25 | grammar = r""" 26 | E: E '+' E {left, 1} 27 | | E '-' E {left, 1} 28 | | E '*' E {left, 2} 29 | | E '/' E {left, 2} 30 | | E '^' E {right, 3} 31 | | '(' E ')' 32 | | number; 33 | 34 | terminals 35 | number: /\d+(\.\d+)?/; 36 | """ 37 | 38 | actions = { 39 | "E": [lambda _, n: n[0] + n[2], 40 | lambda _, n: n[0] - n[2], 41 | lambda _, n: n[0] * n[2], 42 | lambda _, n: n[0] / n[2], 43 | lambda _, n: n[0] ** n[2], 44 | lambda _, n: n[1], 45 | lambda _, n: n[0]], 46 | "number": lambda _, value: float(value), 47 | } 48 | 49 | g = Grammar.from_string(grammar) 50 | parser = Parser(g, debug=True, actions=actions) 51 | 52 | result = parser.parse("34 + 4.6 / 2 * 4^2^2 + 78") 53 | 54 | print("Result = ", result) 55 | 56 | # Output 57 | # -- Debugging/tracing output with detailed info about grammar, productions, 58 | # -- terminals and nonterminals, DFA states, parsing progress, 59 | # -- and at the end of the output: 60 | # Result = 700.8 61 | 62 | 63 | Installation 64 | ------------ 65 | 66 | - Stable version: 67 | 68 | .. code:: shell 69 | 70 | $ pip install parglare 71 | 72 | - Development version: 73 | 74 | .. code:: shell 75 | 76 | $ git clone git@github.com:igordejanovic/parglare.git 77 | $ pip install -e parglare 78 | 79 | Citing parglare 80 | --------------- 81 | 82 | If you use parglare in your research please cite this paper: 83 | 84 | .. code:: text 85 | 86 | Igor Dejanović, Parglare: A LR/GLR parser for Python, 87 | Science of Computer Programming, issn:0167-6423, p.102734, 88 | DOI:10.1016/j.scico.2021.102734, 2021. 89 | 90 | @article{dejanovic2021b, 91 | author = {Igor Dejanović}, 92 | title = {Parglare: A LR/GLR parser for Python}, 93 | doi = {10.1016/j.scico.2021.102734}, 94 | issn = {0167-6423}, 95 | journal = {Science of Computer Programming}, 96 | keywords = {parsing, LR, GLR, Python, visualization}, 97 | pages = {102734}, 98 | url = {https://www.sciencedirect.com/science/article/pii/S0167642321001271}, 99 | year = {2021} 100 | } 101 | 102 | License 103 | ------- 104 | 105 | MIT 106 | 107 | Python versions 108 | --------------- 109 | 110 | Tested with 3.6-3.11 111 | 112 | Credits 113 | ------- 114 | 115 | Initial layout/content of this package was created with `Cookiecutter 116 | `_ and the 117 | `audreyr/cookiecutter-pypackage `_ project template. 118 | 119 | 120 | .. |build-status| image:: https://github.com/igordejanovic/parglare/actions/workflows/ci-linux-ubuntu.yml/badge.svg 121 | :target: https://github.com/igordejanovic/parglare/actions 122 | 123 | .. |coverage| image:: https://coveralls.io/repos/github/igordejanovic/parglare/badge.svg?branch=master 124 | :target: https://coveralls.io/github/igordejanovic/parglare?branch=master 125 | 126 | .. |docs| image:: https://img.shields.io/badge/docs-latest-green.svg 127 | :target: http://www.igordejanovic.net/parglare/latest/ 128 | 129 | .. |status| image:: https://img.shields.io/pypi/status/parglare.svg 130 | 131 | .. |license| image:: https://img.shields.io/badge/License-MIT-blue.svg 132 | :target: https://opensource.org/licenses/MIT 133 | 134 | .. |python-versions| image:: https://img.shields.io/pypi/pyversions/parglare.svg 135 | -------------------------------------------------------------------------------- /docs/about/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are welcome, and they are greatly appreciated! Every little bit 4 | helps, and credit will always be given. 5 | 6 | You can contribute in many ways: 7 | 8 | 9 | ## Types of Contributions 10 | 11 | ### Report Bugs 12 | 13 | Report bugs at https://github.com/igordejanovic/parglare/issues. 14 | 15 | If you are reporting a bug, please include: 16 | 17 | - Your operating system name and version. 18 | - Any details about your local setup that might be helpful in troubleshooting. 19 | - Detailed steps to reproduce the bug. 20 | 21 | 22 | ### Fix Bugs 23 | 24 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help 25 | wanted" is open to whoever wants to implement it. 26 | 27 | 28 | ### Implement Features 29 | 30 | Look through the GitHub issues for features. Anything tagged with "enhancement" 31 | and "help wanted" is open to whoever wants to implement it. 32 | 33 | 34 | ### Write Documentation 35 | 36 | parglare could always use more documentation, whether as part of the official 37 | parglare docs, in docstrings, or even on the web in blog posts, articles, and 38 | such. 39 | 40 | 41 | ### Submit Feedback 42 | 43 | The best way to send feedback is to file an issue at 44 | https://github.com/igordejanovic/parglare/issues. 45 | 46 | If you are proposing a feature: 47 | 48 | - Explain in detail how it would work. 49 | - Keep the scope as narrow as possible, to make it easier to implement. 50 | - Remember that this is a volunteer-driven project, and that contributions are 51 | welcome :) 52 | 53 | 54 | ## Get Started! 55 | 56 | Ready to contribute? Here's how to set up `parglare` for local development. 57 | 58 | 1. Fork the `parglare` repo on GitHub. 59 | 2. Clone your fork locally: 60 | 61 | $ git clone git@github.com:your_name_here/parglare.git 62 | 63 | 3. Install your local copy into a virtual environment. This is how you set up 64 | your fork for local development: 65 | 66 | $ cd parglare/ 67 | $ python -m venv venv 68 | $ source venv/bin/activate 69 | $ ./install-dev.sh 70 | 71 | This is needed just the first time. To work on parglare later you just need 72 | to activate the virtual environment for each new terminal session: 73 | 74 | $ cd parglare/ 75 | $ source venv/bin/activate 76 | 77 | 4. Create a branch for local development:: 78 | 79 | $ git checkout -b name-of-your-bugfix-or-feature 80 | 81 | Now you can make your changes locally. 82 | 83 | 5. When you're done making changes, run tests: 84 | 85 | $ ./runtests.sh 86 | 87 | and verify that all tests pass. 88 | 89 | 6. Commit your changes and push your branch to GitHub: 90 | 91 | $ git add . 92 | $ git commit -m "Your detailed description of your changes." 93 | $ git push origin name-of-your-bugfix-or-feature 94 | 95 | Check [this](https://chris.beams.io/posts/git-commit/) on how to write nice 96 | git log messages. 97 | 98 | 7. Submit a pull request through the GitHub website. CI will run the tests for 99 | all supported Python versions. Check in the GitHub UI that all pipelines pass. 100 | 101 | 102 | ## Pull Request Guidelines 103 | 104 | Before you submit a pull request, check that it meets these guidelines: 105 | 106 | 1. The pull request should include tests. 107 | 2. If the pull request adds/changes functionality, the docs should be updated. 108 | 3. The pull request should work for Python 3.8-3.12. Check 109 | https://travis-ci.org/igordejanovic/parglare/pull_requests and make sure that 110 | the tests pass for all supported Python versions. 111 | 112 | 113 | ## Tips 114 | 115 | To run a subset of tests: 116 | 117 | ``` 118 | $ py.test tests/func/mytest.py 119 | ``` 120 | 121 | or a single test: 122 | 123 | ``` 124 | $ py.test tests/func/mytest.py::some_test 125 | ``` 126 | -------------------------------------------------------------------------------- /docs/about/LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016-2018, Igor R. Dejanović and contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /docs/common.md: -------------------------------------------------------------------------------- 1 | # Common classes and functions 2 | 3 | ## The Context object 4 | 5 | An object of this kind object is passed to various callback functions (actions, 6 | recognizers, error recovery etc.). It is not always an instance of the same 7 | class, but all context objects have the following properties: 8 | 9 | - **start_position/end_position** - the beginning and the end in the input 10 | stream where the match occurred. `start_position` is the location of the first 11 | element/character in the input while the `end_position` is one past the last 12 | element/character of the match. Thus `end_position - start_position` will give 13 | the length of the match including the layout. You can use 14 | `parglare.pos_to_line_col(input, position)` function to get line and column of 15 | the position. This function returns a tuple `(line, column)`. 16 | 17 | - **file_name** - the name/path of the file being parsed. `None` if Python 18 | string is parsed. 19 | 20 | - **input_str** - the input string (or list of objects) that is being parsed. 21 | 22 | - **layout_content** - is the layout (whitespaces, comments etc.) that are 23 | collected from the previous non-layout match. 24 | 25 | - **layout_content_ahead** - layout content before `token_ahead`. 26 | 27 | - **token**- the token shifted during SHIFT operation. Instance of 28 | `parglare.parser.Token`. 29 | 30 | - **token_ahead** - the token recognized as a lookahead. 31 | 32 | - **production** - an instance of `parglare.grammar.Production` class available 33 | only on reduction actions (not on shifts). Represents the grammar production. 34 | 35 | - **state** - An instance of `parglare.tables.LRState`. The LR state of the 36 | parser automata. This object contains information of the possible actions in 37 | this state. 38 | 39 | - **node** - this is available only if the actions are called over the parse tree 40 | using `call_actions`. It represents the instance of `NodeNonTerm` or `NodeTerm` 41 | classes from the parse tree where the actions is executed. 42 | 43 | - **parser** - is the reference to the parser instance. You should use this only 44 | to investigate parser configuration not to alter its state. 45 | 46 | - **head** - is a reference to the Graph-structured stack node (`GSSNode`). Only 47 | used for GLR parsing. 48 | 49 | - **extra** - this attribute can store arbitrary user information for state 50 | tracking. If not given as a parameter to `parse` call a `dict` is used. 51 | 52 | 53 | ## Location class 54 | 55 | Used at various places in parglare to define location and span in the files 56 | (e.g. for error reporting). 57 | 58 | ### Attributes 59 | 60 | - **input_str** - the input string being parsed. 61 | 62 | - **file_name** (property) - the name of the file being parsed (`None` if string 63 | is parsed), 64 | 65 | - **start_position/end_position** - an absolute position in the input where the 66 | span starts/ends, 67 | 68 | - **line**/**column** (properties) - line and column where the span starts. 69 | 70 | - **line_end**/**column_end** (properties) - line and column where the span 71 | ends. 72 | 73 | 74 | If there is an error in the grammar itself parglare will raise 75 | `parglare.GrammarError` exception. 76 | -------------------------------------------------------------------------------- /docs/css/version-select.css: -------------------------------------------------------------------------------- 1 | #version-selector { 2 | display: block; 3 | margin: -10px auto 0.809em; 4 | padding: 2px; 5 | } 6 | -------------------------------------------------------------------------------- /docs/getting_started.md: -------------------------------------------------------------------------------- 1 | # Getting started 2 | 3 | The first thing to do is to write your language grammar using 4 | the [parglare grammar language](./grammar_language.md). You write the grammar 5 | either as a Python string in your source code or as a separate file. In case you 6 | are writing a grammar of a complex language I would suggest the separate file 7 | approach. Although not mandatory, the convention is that parglare grammar files 8 | have `.pg` extension. 9 | 10 | The next step is to create the instance of the `Grammar` class. This is achieved 11 | by importing the `Grammar` class and calling either `from_file` or `from_str` 12 | methods supplying the file name for the former and the Python string for the 13 | later call. 14 | 15 | ```python 16 | from parglare import Grammar 17 | 18 | file_name = ..... 19 | grammar = Grammar.from_file(file_name) 20 | ``` 21 | 22 | If there is no errors in the grammar you now have the grammar instance. For more 23 | information see the [section about `Grammar` class](./grammar.md). 24 | 25 | 26 | !!! tip 27 | 28 | There is also a handy [pglr command line tool](./pglr.md) that can be 29 | used for grammar checking, visualization and debugging. 30 | 31 | The next step is to create an instance of the parser. There are two options. If 32 | you want to use LR parser instantiate `Parser` class. For GLR instantiate 33 | `GLRParser` class. 34 | 35 | 36 | ```python 37 | from parglare import Parser 38 | parser = Parser(grammar) 39 | ``` 40 | 41 | or 42 | 43 | ```python 44 | from parglare import GLRParser 45 | parser = GLRParser(grammar) 46 | ``` 47 | 48 | You can provide additional [parser parameters](./parser.md) during instantiation. 49 | 50 | !!! note 51 | 52 | LR parser is faster as the GLR machinery brings a significant overhead. So, 53 | the general advice is to stick to the LR parsing until you are sure that you 54 | need additional power of GLR, i.e. either you need more than one token of 55 | lookahead or your language is inherently ambiguous. pglr tool will help you in 56 | investigating why you have LR conflicts in your grammar and there are some 57 | nice [disambiguation features](./lr_parsing.md#resolving-conflicts) in parglare 58 | that will help you resolve some of those conflicts. 59 | 60 | Now parse your input calling `parse` method on the parser instance. 61 | 62 | ```python 63 | result = parser.parse(input_str) 64 | ``` 65 | 66 | Depending on whether you have configured [actions](./actions.md) and what 67 | parameters you used for parser instance you will 68 | get either: 69 | 70 | - a nested lists if no actions are used, 71 | - a parse tree if [`build_tree` parser param](./parser.md#build_tree) is set to 72 | `True`, 73 | - some other representation of your input if custom actions are used. 74 | 75 | In case of the GLR parser you will get a list of all possible results (a.k.a. 76 | _the parse forest_). 77 | 78 | ## Where to go next? 79 | 80 | You can investigate various topics in the docs. 81 | The [examples](https://github.com/igordejanovic/parglare/tree/master/examples) 82 | and 83 | the [tests](https://github.com/igordejanovic/parglare/tree/master/tests/func) 84 | are also a good source of information. 85 | -------------------------------------------------------------------------------- /docs/handling_errors.md: -------------------------------------------------------------------------------- 1 | # Handling errors 2 | 3 | When parglare encounters a situation in which no SHIFT or REDUCE operation could 4 | be performed it will report an error by raising an instance of 5 | `parglare.ParseError` class. 6 | 7 | `ParseError` has the following attributes: 8 | 9 | - **location** - an instance of the [Location class](./common.md#location-class) 10 | with information of the span of the error. 11 | 12 | - **symbols_expected (list)** - a list of expected symbol at the location. 13 | 14 | - **tokens_ahead (list)** - a list of tokens recognized at the position by 15 | trying all terminal symbols recognizers from the grammar. Note that this list 16 | might be empty in case nothing can be recognized at the position or it might 17 | have more than one element if more recognizers succeeds (lexical ambiguity). 18 | 19 | - **symbols_before (list)** - a list of last seen symbols. In the case of LR 20 | parser it will always be a single element list. In the case of GLR there might 21 | be more symbols if there were multiple parser heads. 22 | 23 | - **last_heads (list)** - A list of last GLR parser heads. Available only for 24 | GLR parsing. 25 | 26 | - **grammar (Grammar)** - An instance of `parglare.Grammar` class used for 27 | parsing. 28 | 29 | 30 | # Error recovery 31 | 32 | There are a lot of situations where you would want parser to report all the 33 | errors in one go. To do this, parser has to recover from errors, i.e. get to 34 | the valid state and continue. 35 | 36 | To enable error recovery set `error_recovery` [parameter of parser 37 | construction](./parser.md#error_recovery) to `True`. This will enable implicit 38 | error recovery strategy that will try to search for expected tokens in the input 39 | ahead and when the first is found the parsing will continue. All errors will be 40 | collected as an `errors` list on the parser instance. 41 | 42 | Each error is an instance of [`ParseError` class](#handling-errors). In case no 43 | recovery is possible last `ParseError` will be raised. `ParserError` has a 44 | location which represents the span of the error in the input (e.g. 45 | `error.location.start_position` and `error.location.end_position`). 46 | 47 | 48 | ## Custom recovery strategy 49 | 50 | To provide a custom strategy for error recovery set `error_recovery` parser 51 | constructor parameter to a Python function. This function should have the 52 | following signature: 53 | 54 | def error_recovery_strategy(context, error): 55 | ... 56 | 57 | 58 | - **context***- context like object (usually the parser head). 59 | - **error** - [`ParseError` instance](#handling-errors). 60 | 61 | Using the head object you can query the state of the parser. E.g. to get the 62 | position use `context.position`, to get the parser state use `context.state`, to 63 | get expected symbols in this state use `context.state.actions.keys()`. 64 | 65 | To get information about the error use `error` object. E.g. to get expected 66 | symbols at this position for which parser can successfully continue use 67 | `error.symbols_expected`. 68 | 69 | The recovery function should modify the head (e.g. its position and/or 70 | `token_ahead`) and bring it to a state which can continue. If the recovery is 71 | successful the function should return `True`, otherwise `False`. 72 | 73 | You can call a default error recovery from your custom recovery by 74 | `context.parser.default_error_recovery(context)` 75 | -------------------------------------------------------------------------------- /docs/images/calc.pg: -------------------------------------------------------------------------------- 1 | E: E "+" E | E "*" E | number; 2 | terminals 3 | number: /\d+/; 4 | -------------------------------------------------------------------------------- /docs/images/calc.pg.dot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/docs/images/calc.pg.dot.png -------------------------------------------------------------------------------- /docs/images/calc_forest.dot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/docs/images/calc_forest.dot.png -------------------------------------------------------------------------------- /docs/images/calc_trace.dot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/docs/images/calc_trace.dot.png -------------------------------------------------------------------------------- /docs/images/parglare-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/docs/images/parglare-logo.png -------------------------------------------------------------------------------- /docs/js/version-select.js: -------------------------------------------------------------------------------- 1 | window.addEventListener("DOMContentLoaded", function() { 2 | function normalizePath(path) { 3 | var normalized = []; 4 | path.split("/").forEach(function(bit, i) { 5 | if (bit === "." || (bit === "" && i !== 0)) { 6 | return; 7 | } else if (bit === "..") { 8 | if (normalized.length === 1 && normalized[0] === "") { 9 | // We must be trying to .. past the root! 10 | throw new Error("invalid path"); 11 | } else if (normalized.length === 0 || 12 | normalized[normalized.length - 1] === "..") { 13 | normalized.push(".."); 14 | } else { 15 | normalized.pop(); 16 | } 17 | } else { 18 | normalized.push(bit); 19 | } 20 | }); 21 | return normalized.join("/"); 22 | } 23 | 24 | // `base_url` comes from the base.html template for this theme. 25 | var REL_BASE_URL = base_url; 26 | var ABS_BASE_URL = normalizePath(window.location.pathname + "/" + 27 | REL_BASE_URL); 28 | var CURRENT_VERSION = ABS_BASE_URL.split("/").pop(); 29 | 30 | function makeSelect(options, selected) { 31 | var select = document.createElement("select"); 32 | 33 | options.forEach(function(i) { 34 | var option = new Option(i.text, i.value, undefined, 35 | i.value === selected); 36 | select.add(option); 37 | }); 38 | 39 | return select; 40 | } 41 | 42 | var xhr = new XMLHttpRequest(); 43 | xhr.open("GET", REL_BASE_URL + "/../versions.json"); 44 | xhr.onload = function() { 45 | var versions = JSON.parse(this.responseText); 46 | 47 | var realVersion = versions.find(function(i) { 48 | return i.version === CURRENT_VERSION || 49 | i.aliases.includes(CURRENT_VERSION); 50 | }).version; 51 | 52 | var select = makeSelect(versions.map(function(i) { 53 | return {text: i.title, value: i.version}; 54 | }), realVersion); 55 | select.id = "version-selector"; 56 | select.addEventListener("change", function(event) { 57 | window.location.href = REL_BASE_URL + "/../" + this.value; 58 | }); 59 | 60 | var title = document.querySelector("div.wy-side-nav-search"); 61 | title.insertBefore(select, title.querySelector(".icon-home").nextSibling); 62 | }; 63 | xhr.send(); 64 | }); 65 | -------------------------------------------------------------------------------- /docs/release_notes/release_0_14.md: -------------------------------------------------------------------------------- 1 | # Release notes for 0.14 2 | 3 | ## Parenthesized groups 4 | 5 | RHS in grammar rules now can used parentheses to group element. These groups 6 | behave similar to any other rule reference. E.g. repetitions and assignments can 7 | be applied. 8 | 9 | Previously: 10 | 11 | ```nohiglight 12 | S: a_then_b*[comma] c; 13 | a_then_b: a b; 14 | ... 15 | 16 | ``` 17 | 18 | Now you can write: 19 | 20 | ```nohiglight 21 | S: (a b)*[comma] c; 22 | ... 23 | 24 | ``` 25 | 26 | You can nest groups, combine with choice operator etc. 27 | 28 | ```nohiglight 29 | S: ( (a c)+ | b)*[comma] c; 30 | 31 | ``` 32 | For more info see a [new section in the docs](../grammar_language.md#parenthesized-groups). 33 | 34 | 35 | ## GLR forest 36 | 37 | GLR now returns `Forest` object. This object represents all the possible solutions. 38 | Forest can be iterated, indexed, yielding lazy parse trees. 39 | 40 | See [more info in the docs](../parse_forest_trees.md). 41 | 42 | 43 | ## Extensions to `pglr` command 44 | 45 | `pglr trace` now provides `--frontier` flag to organize GSS nodes into 46 | frontiers. See [the docs](../pglr.md#tracing-glr-parsing). 47 | 48 | `pglr parse` is added for parsing input strings and files and producing forests 49 | and trees as either string or graphical dot representation. See [the 50 | docs](../pglr.md#parsing-inputs). 51 | 52 | 53 | ## Support for visitor 54 | 55 | *Visitor pattern* is supported as a `visitor` function enabling depth-first 56 | processing of tree-like structures. See [the 57 | docs](../parse_forest_trees.md#visitor). 58 | 59 | 60 | ## New examples 61 | 62 | Several new examples are added: 63 | 64 | - [JSON](https://github.com/igordejanovic/parglare/tree/master/examples/json) 65 | - [BibTeX](https://github.com/igordejanovic/parglare/tree/master/examples/bibtex) 66 | - [Java](https://github.com/igordejanovic/parglare/tree/master/examples/java) (based on Java SE 16 version) 67 | 68 | 69 | ## Performance tests 70 | 71 | New performance tests based on new example grammars are provided in 72 | [tests/perf](https://github.com/igordejanovic/parglare/tree/master/tests/perf). 73 | Run `runall.sh` and read the reports in 74 | [tests/perf/reports](https://github.com/igordejanovic/parglare/tree/master/tests/perf/reports). 75 | -------------------------------------------------------------------------------- /docs/release_notes/release_0_15.md: -------------------------------------------------------------------------------- 1 | # Release notes for 0.15 2 | 3 | This release should be fully backward compatible so the upgrade should require 4 | no changes. 5 | 6 | ## Greedy repetitions 7 | 8 | The most important new feature in this release is a support for greedy 9 | repetition. Read more in [the docs](../../grammar_language/#greedy-repetitions). 10 | 11 | ## New way to disambiguate the GLR forest 12 | 13 | A new and recommended way for dynamic disambiguation is by using 14 | `forest.disambiguate`. Read more in [the docs](../../disambiguation/#disambiguation-of-a-glr-forest). 15 | 16 | ## Optimized getting of the first tree from the GLR forest 17 | 18 | If you are not interested into analyzing the forest and comparing trees but just 19 | want to get any valid tree you can use `forest.get_first_tree()` which is 20 | optimized to avoid tree enumeration that might be costly. The returned tree is 21 | fully unpacked and doesn't use proxies, i.e. it contains only `NodeTerm` and 22 | `NodeNonTerm` instances. 23 | -------------------------------------------------------------------------------- /docs/style.css: -------------------------------------------------------------------------------- 1 | .rst-content .section ol p, .rst-content .section ul p { margin-bottom: 6px; margin-top: 12px;} 2 | -------------------------------------------------------------------------------- /examples/bibtex/bibtex.pg: -------------------------------------------------------------------------------- 1 | BibFile: 2 | entries=BibEntry+ 3 | ; 4 | 5 | BibEntry: BibLineComment | BibComment | BibPreamble | BibString | BibRefEntry; 6 | 7 | BibLineComment: 8 | text=BibCommentLine 9 | ; 10 | 11 | BibComment: '@' 'comment' '{' 12 | text=BlockCommentBody 13 | '}' 14 | ; 15 | 16 | BibPreamble: '@' 'preamble' '{' 17 | value=Value 18 | '}' 19 | ; 20 | 21 | BibString: '@' type='string' '{' 22 | fields=BibField*[Comma] 23 | '}' 24 | ; 25 | 26 | BibRefEntry: '@' type=BibType '{' key=BibKey Comma 27 | fields=BibField*[Comma] 28 | Comma? 29 | '}' 30 | ; 31 | 32 | BibField: name=Ident '=' value=Value; 33 | 34 | 35 | Value: '"' Piece+[Hash] '"' | Piece+[Hash]; 36 | Piece: '{' Piece* '}' | InBraces; 37 | 38 | 39 | terminals 40 | 41 | Comma: ','; 42 | Hash: '#'; 43 | //BibType: /(?!(string|comment|preamble))\w+/; 44 | BibType: /\w+/; 45 | BibKey: /[^, =\t\}\n]+/; 46 | Ident: /[^\d]([^ \t\"#%\'\(\),={}])+/; 47 | InBraces: /[^{}]+/; 48 | 49 | BibCommentLine: /[^@][^\n]+/; 50 | BlockCommentBody: /[^\}]*/; 51 | -------------------------------------------------------------------------------- /examples/bibtex/bibtex.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | 3 | from parglare import GLRParser, Grammar 4 | 5 | 6 | def main(debug=False): 7 | THIS_DIR = dirname(__file__) 8 | g = Grammar.from_file(join(THIS_DIR, 'bibtex.pg')) 9 | 10 | parser = GLRParser(g) 11 | 12 | forest = parser.parse_file(join(THIS_DIR, 'test.bib')) 13 | print(f'Solutions: {len(forest)}') 14 | print(f'Ambiguities: {forest.ambiguities}') 15 | if debug: 16 | with open('forest.txt', 'w') as f: 17 | f.write(forest.to_str()) 18 | print('See forest.txt') 19 | 20 | 21 | if __name__ == "__main__": 22 | main(debug=True) 23 | -------------------------------------------------------------------------------- /examples/bibtex/test.bib: -------------------------------------------------------------------------------- 1 | @comment{x-kbibtex-encoding=utf-8} 2 | 3 | @article{knuth1965, 4 | title = {On the translation of languages from left to right}, 5 | author = {Knuth, Donald E}, 6 | year = {1965}, 7 | journal = {Information and control}, 8 | number = {6}, 9 | pages = {607–639}, 10 | publisher = {Elsevier}, 11 | volume = {8}, 12 | x-stars = {94.89}, 13 | keywords = {parsing, LR, dslbook} 14 | } 15 | 16 | @inproceedings{tomita1984, 17 | title = {LR parsers for natural languages}, 18 | author = {Tomita, Masaru}, 19 | booktitle = {10th International Conference on Computational Linguistics and 22nd Annual Meeting of the Association for Computational Linguistics}, 20 | pages = {354--357}, 21 | year = {1984} 22 | } 23 | 24 | @inproceedings{tomita1985, 25 | title = {{An Efficient Context-Free Parsing Algorithm for Natural Languages.}}, 26 | author = {Tomita, Masaru}, 27 | year = {1985}, 28 | booktitle = {{IJCAI}}, 29 | pages = {756–764}, 30 | volume = {2} 31 | } 32 | 33 | @incollection{nozohoor1991, 34 | title = {GLR Parsing for $\varepsilon$-Grammers}, 35 | author = {Nozohoor-Farshi, Rahman}, 36 | booktitle = {Generalized LR parsing}, 37 | file = {docs/tomita2012.pdf}, 38 | pages = {61--75}, 39 | year = {1991}, 40 | publisher = {Springer} 41 | } 42 | 43 | @article{scott2007, 44 | title = {BRNGLR: a cubic Tomita-style GLR parsing algorithm}, 45 | author = {Scott, Elizabeth and Johnstone, Adrian and Economopoulos, Rob}, 46 | journal = {Acta informatica}, 47 | volume = {44}, 48 | number = {6}, 49 | pages = {427--461}, 50 | year = {2007}, 51 | publisher = {Springer} 52 | } 53 | 54 | @inproceedings{mcpeak2004, 55 | title = {Elkhound: A fast, practical GLR parser generator}, 56 | author = {McPeak, Scott and Necula, George C}, 57 | booktitle = {International Conference on Compiler Construction}, 58 | pages = {73--88}, 59 | year = {2004}, 60 | organization = {Springer} 61 | } 62 | -------------------------------------------------------------------------------- /examples/c/README.md: -------------------------------------------------------------------------------- 1 | This is an example of C parsing. Still WIP. 2 | -------------------------------------------------------------------------------- /examples/c/c_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is work in progress 3 | """ 4 | import os 5 | import re 6 | 7 | from parglare import GLRParser, Grammar 8 | 9 | 10 | def main(debug=False): 11 | this_folder = os.path.dirname(__file__) 12 | g = Grammar.from_file(os.path.join(this_folder, 'c.pg'), 13 | re_flags=re.MULTILINE | re.VERBOSE) 14 | parser = GLRParser(g, debug=debug, debug_colors=True) 15 | 16 | # The input is C code after preprocessing 17 | forest = parser.parse_file(os.path.join(this_folder, 'example.c')) 18 | 19 | print('Solutions: ', len(forest)) 20 | print('Ambiguities: ', forest.ambiguities) 21 | 22 | 23 | if __name__ == "__main__": 24 | main(debug=False) 25 | -------------------------------------------------------------------------------- /examples/calc/calc.py: -------------------------------------------------------------------------------- 1 | from parglare import Grammar, Parser 2 | from parglare.actions import pass_inner, pass_single 3 | 4 | grammar = r""" 5 | Calc: Assignments E; 6 | Assignments: Assignment | Assignments Assignment | EMPTY; 7 | Assignment: VariableName "=" Number; 8 | 9 | E: E "+" E {left, 1} 10 | | E "-" E {left, 1} 11 | | E "*" E {left, 2} 12 | | E "/" E {left, 2} 13 | | "(" E ")" 14 | | VariableRef 15 | | Number 16 | ; 17 | 18 | VariableRef: VariableName; 19 | 20 | terminals 21 | VariableName: /[a-zA-Z_][_a-zA-Z0-9]*/; 22 | Number: /\d+(\.\d+)?/; 23 | """ 24 | 25 | 26 | # Semantic Actions 27 | def act_assignment(context, nodes): 28 | """Semantic action for variable assignment.""" 29 | 30 | name = nodes[0] 31 | number = nodes[2] 32 | 33 | if context.extra is None: 34 | context.extra = {} 35 | 36 | context.extra[name] = number 37 | 38 | 39 | actions = { 40 | "Calc": lambda _, nodes: nodes[1], 41 | "Assignment": act_assignment, 42 | "E": [lambda _, nodes: nodes[0] + nodes[2], 43 | lambda _, nodes: nodes[0] - nodes[2], 44 | lambda _, nodes: nodes[0] * nodes[2], 45 | lambda _, nodes: nodes[0] / nodes[2], 46 | pass_inner, 47 | pass_single, 48 | pass_single], 49 | "Number": lambda _, value: float(value), 50 | "VariableRef": lambda context, nodes: context.extra[nodes[0]], 51 | } 52 | 53 | 54 | def main(debug=False): 55 | g = Grammar.from_string(grammar, debug=debug, debug_colors=True) 56 | parser = Parser(g, actions=actions, debug=debug, debug_colors=True) 57 | 58 | input_str = """ 59 | a = 5 60 | b = 10 61 | 62 | a + 56.4 / 3 * 5 - b + 8 * 3 63 | """ 64 | 65 | res = parser.parse(input_str) 66 | 67 | assert res == 5. + 56.4 / 3 * 5 - 10 + 8 * 3 68 | print("Input:\n", input_str) 69 | print("Result = ", res) 70 | 71 | 72 | if __name__ == "__main__": 73 | main(debug=True) 74 | -------------------------------------------------------------------------------- /examples/csv/csv.py: -------------------------------------------------------------------------------- 1 | from parglare import Grammar, Parser 2 | 3 | grammar = r""" 4 | @pass_inner 5 | CSVFile: OptionalNewLines Records OptionalNewLines; 6 | @collect_sep 7 | Records: Records OptionalNewLines Record| Record; 8 | @pass_single 9 | Record: Fields NewLine; 10 | @collect_sep 11 | Fields: Fields "," Field | Field; 12 | Field: QuotedField | FieldContent; 13 | NewLines: NewLine | NewLines NewLine; 14 | OptionalNewLines: NewLines | EMPTY; 15 | @pass_inner 16 | QuotedField: "\"" FieldContentQuoted "\""; 17 | 18 | terminals 19 | FieldContent: /[^,\n]+/; 20 | FieldContentQuoted: /(("")|([^"]))+/; 21 | NewLine: "\n"; 22 | """ 23 | 24 | 25 | def main(debug=False): 26 | g = Grammar.from_string(grammar) 27 | parser = Parser(g, ws='\t ', debug=debug, debug_colors=True) 28 | 29 | input_str = """ 30 | First, Second with multiple words, "Third, quoted with comma" 31 | 32 | 33 | Next line, Previous line has newlines, 2 34 | Another Line, 34.45, "Quoted", field 35 | 36 | 37 | """ 38 | 39 | res = parser.parse(input_str) 40 | 41 | print("Input:\n", input_str) 42 | print("Result = ", res) 43 | 44 | 45 | if __name__ == "__main__": 46 | main(debug=True) 47 | -------------------------------------------------------------------------------- /examples/custom_table_caching/.gitignore: -------------------------------------------------------------------------------- 1 | _table.py 2 | -------------------------------------------------------------------------------- /examples/custom_table_caching/README.md: -------------------------------------------------------------------------------- 1 | Custom parse table caching example 2 | ================================== 3 | 4 | Parse table is stored as a python module (`_table.py`). To generate it, in this directory run: 5 | 6 | python compile.py 7 | 8 | Then precomputed parse table is used in parser script 9 | 10 | python parser.py 11 | -------------------------------------------------------------------------------- /examples/custom_table_caching/compile.py: -------------------------------------------------------------------------------- 1 | from grammar import grammar, start_symbol 2 | 3 | from parglare.tables import LALR, create_table 4 | from parglare.tables.persist import table_to_serializable 5 | 6 | table = create_table( 7 | grammar, 8 | start_production=grammar.get_production_id(start_symbol), 9 | itemset_type=LALR, 10 | prefer_shifts=False, 11 | prefer_shifts_over_empty=False, 12 | ) 13 | serializable_table = table_to_serializable(table) 14 | 15 | with open('_table.py', 'w') as f: 16 | f.write('table = ') 17 | f.write(repr(serializable_table)) 18 | -------------------------------------------------------------------------------- /examples/custom_table_caching/grammar.py: -------------------------------------------------------------------------------- 1 | from parglare import Grammar 2 | 3 | grammar = Grammar.from_string(""" 4 | start: ab; 5 | ab: "a" ab "b" | EMPTY; 6 | """) 7 | 8 | start_symbol = 'start' 9 | -------------------------------------------------------------------------------- /examples/custom_table_caching/parser.py: -------------------------------------------------------------------------------- 1 | from _table import table 2 | from grammar import grammar 3 | 4 | from parglare import GLRParser 5 | from parglare.tables.persist import table_from_serializable 6 | 7 | table = table_from_serializable(table, grammar) 8 | parser = GLRParser(grammar, table=table) 9 | 10 | print(parser.parse('aaabbb')) 11 | -------------------------------------------------------------------------------- /examples/java/README.md: -------------------------------------------------------------------------------- 1 | This is an implementation of Java SE 16 Edition based on the spec defined in 2 | https://docs.oracle.com/javase/specs/jls/se16/html/jls-19.html 3 | 4 | Since the grammar is derived from the official specification, please see the 5 | legal terms from the specification if you plan to use it in your project. 6 | https://docs.oracle.com/javase/specs/jls/se16/html/index.html 7 | 8 | The test input file is from the [Spring Boot 9 | project](https://github.com/spring-projects/spring-boot). 10 | -------------------------------------------------------------------------------- /examples/java/java_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | from parglare import GLRParser, Grammar 5 | 6 | 7 | def main(debug=False): 8 | this_folder = os.path.dirname(__file__) 9 | 10 | g = Grammar.from_file(os.path.join(this_folder, 'java16.pg')) 11 | parser = GLRParser(g, debug=debug, debug_colors=True) 12 | 13 | file_name = os.path.join(this_folder, 'TomcatServletWebServerFactory.java') 14 | file_size = os.path.getsize(file_name) 15 | 16 | t_start = time.time() 17 | forest = parser.parse_file(file_name) 18 | t_end = time.time() 19 | 20 | print(f'Elapsed time: {t_end - t_start:.2f}', 'sec') 21 | print(f'Speed = {file_size/1000/(t_end - t_start):.2f}', 22 | 'KB/sec\n') 23 | print('Solutions: ', forest.solutions) 24 | print('Ambiguities: ', forest.ambiguities) 25 | 26 | 27 | if __name__ == "__main__": 28 | main(debug=False) 29 | -------------------------------------------------------------------------------- /examples/json/example1.json: -------------------------------------------------------------------------------- 1 | { 2 | "glossary": { 3 | "title": "example glossary", 4 | "GlossDiv": { 5 | "title": "S", 6 | "GlossList": { 7 | "GlossEntry": { 8 | "ID": "SGML", 9 | "SortAs": "SGML", 10 | "GlossTerm": "Standard Generalized Markup Language", 11 | "Acronym": "SGML", 12 | "Abbrev": "ISO 8879:1986", 13 | "GlossDef": { 14 | "para": "A meta-markup language, used to create markup languages such as DocBook.", 15 | "GlossSeeAlso": ["GML", "XML"] 16 | }, 17 | "GlossSee": "markup" 18 | } 19 | } 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /examples/json/example2.json: -------------------------------------------------------------------------------- 1 | {"menu": { 2 | "id": "file", 3 | "value": "File", 4 | "popup": { 5 | "menuitem": [ 6 | {"value": "New", "onclick": "CreateNewDoc()"}, 7 | {"value": "Open", "onclick": "OpenDoc()"}, 8 | {"value": "Close", "onclick": "CloseDoc()"} 9 | ] 10 | } 11 | }} 12 | -------------------------------------------------------------------------------- /examples/json/example3.json: -------------------------------------------------------------------------------- 1 | {"widget": { 2 | "debug": "on", 3 | "window": { 4 | "title": "Sample Konfabulator Widget", 5 | "name": "main_window", 6 | "width": 500, 7 | "height": 500 8 | }, 9 | "image": { 10 | "src": "Images/Sun.png", 11 | "name": "sun1", 12 | "hOffset": 250, 13 | "vOffset": 250, 14 | "alignment": "center" 15 | }, 16 | "text": { 17 | "data": "Click Here", 18 | "size": 36, 19 | "style": "bold", 20 | "name": "text1", 21 | "hOffset": 250, 22 | "vOffset": 100, 23 | "alignment": "center", 24 | "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;" 25 | } 26 | }} 27 | 28 | -------------------------------------------------------------------------------- /examples/json/example4.json: -------------------------------------------------------------------------------- 1 | {"web-app": { 2 | "servlet": [ 3 | { 4 | "servlet-name": "cofaxCDS", 5 | "servlet-class": "org.cofax.cds.CDSServlet", 6 | "init-param": { 7 | "configGlossary:installationAt": "Philadelphia, PA", 8 | "configGlossary:adminEmail": "ksm@pobox.com", 9 | "configGlossary:poweredBy": "Cofax", 10 | "configGlossary:poweredByIcon": "/images/cofax.gif", 11 | "configGlossary:staticPath": "/content/static", 12 | "templateProcessorClass": "org.cofax.WysiwygTemplate", 13 | "templateLoaderClass": "org.cofax.FilesTemplateLoader", 14 | "templatePath": "templates", 15 | "templateOverridePath": "", 16 | "defaultListTemplate": "listTemplate.htm", 17 | "defaultFileTemplate": "articleTemplate.htm", 18 | "useJSP": false, 19 | "jspListTemplate": "listTemplate.jsp", 20 | "jspFileTemplate": "articleTemplate.jsp", 21 | "cachePackageTagsTrack": 200, 22 | "cachePackageTagsStore": 200, 23 | "cachePackageTagsRefresh": 60, 24 | "cacheTemplatesTrack": 100, 25 | "cacheTemplatesStore": 50, 26 | "cacheTemplatesRefresh": 15, 27 | "cachePagesTrack": 200, 28 | "cachePagesStore": 100, 29 | "cachePagesRefresh": 10, 30 | "cachePagesDirtyRead": 10, 31 | "searchEngineListTemplate": "forSearchEnginesList.htm", 32 | "searchEngineFileTemplate": "forSearchEngines.htm", 33 | "searchEngineRobotsDb": "WEB-INF/robots.db", 34 | "useDataStore": true, 35 | "dataStoreClass": "org.cofax.SqlDataStore", 36 | "redirectionClass": "org.cofax.SqlRedirection", 37 | "dataStoreName": "cofax", 38 | "dataStoreDriver": "com.microsoft.jdbc.sqlserver.SQLServerDriver", 39 | "dataStoreUrl": "jdbc:microsoft:sqlserver://LOCALHOST:1433;DatabaseName=goon", 40 | "dataStoreUser": "sa", 41 | "dataStorePassword": "dataStoreTestQuery", 42 | "dataStoreTestQuery": "SET NOCOUNT ON;select test='test';", 43 | "dataStoreLogFile": "/usr/local/tomcat/logs/datastore.log", 44 | "dataStoreInitConns": 10, 45 | "dataStoreMaxConns": 100, 46 | "dataStoreConnUsageLimit": 100, 47 | "dataStoreLogLevel": "debug", 48 | "maxUrlLength": 500}}, 49 | { 50 | "servlet-name": "cofaxEmail", 51 | "servlet-class": "org.cofax.cds.EmailServlet", 52 | "init-param": { 53 | "mailHost": "mail1", 54 | "mailHostOverride": "mail2"}}, 55 | { 56 | "servlet-name": "cofaxAdmin", 57 | "servlet-class": "org.cofax.cds.AdminServlet"}, 58 | 59 | { 60 | "servlet-name": "fileServlet", 61 | "servlet-class": "org.cofax.cds.FileServlet"}, 62 | { 63 | "servlet-name": "cofaxTools", 64 | "servlet-class": "org.cofax.cms.CofaxToolsServlet", 65 | "init-param": { 66 | "templatePath": "toolstemplates/", 67 | "log": 1, 68 | "logLocation": "/usr/local/tomcat/logs/CofaxTools.log", 69 | "logMaxSize": "", 70 | "dataLog": 1, 71 | "dataLogLocation": "/usr/local/tomcat/logs/dataLog.log", 72 | "dataLogMaxSize": "", 73 | "removePageCache": "/content/admin/remove?cache=pages&id=", 74 | "removeTemplateCache": "/content/admin/remove?cache=templates&id=", 75 | "fileTransferFolder": "/usr/local/tomcat/webapps/content/fileTransferFolder", 76 | "lookInContext": 1, 77 | "adminGroupID": 4, 78 | "betaServer": true}}], 79 | "servlet-mapping": { 80 | "cofaxCDS": "/", 81 | "cofaxEmail": "/cofaxutil/aemail/*", 82 | "cofaxAdmin": "/admin/*", 83 | "fileServlet": "/static/*", 84 | "cofaxTools": "/tools/*"}, 85 | 86 | "taglib": { 87 | "taglib-uri": "cofax.tld", 88 | "taglib-location": "/WEB-INF/tlds/cofax.tld"}}} 89 | -------------------------------------------------------------------------------- /examples/json/example5.json: -------------------------------------------------------------------------------- 1 | {"menu": { 2 | "header": "SVG Viewer", 3 | "items": [ 4 | {"id": "Open"}, 5 | {"id": "OpenNew", "label": "Open New"}, 6 | null, 7 | {"id": "ZoomIn", "label": "Zoom In"}, 8 | {"id": "ZoomOut", "label": "Zoom Out"}, 9 | {"id": "OriginalView", "label": "Original View"}, 10 | null, 11 | {"id": "Quality"}, 12 | {"id": "Pause"}, 13 | {"id": "Mute"}, 14 | null, 15 | {"id": "Find", "label": "Find..."}, 16 | {"id": "FindAgain", "label": "Find Again"}, 17 | {"id": "Copy"}, 18 | {"id": "CopyAgain", "label": "Copy Again"}, 19 | {"id": "CopySVG", "label": "Copy SVG"}, 20 | {"id": "ViewSVG", "label": "View SVG"}, 21 | {"id": "ViewSource", "label": "View Source"}, 22 | {"id": "SaveAs", "label": "Save As"}, 23 | null, 24 | {"id": "Help"}, 25 | {"id": "About", "label": "About Adobe CVG Viewer..."} 26 | ] 27 | }} 28 | -------------------------------------------------------------------------------- /examples/json/json.pg: -------------------------------------------------------------------------------- 1 | value: FALSE | TRUE | NULL | object | array | number | string; 2 | object: "{" member*[COMMA] "}"; 3 | member: string ":" value; 4 | array: "[" value*[COMMA] "]"; 5 | 6 | terminals 7 | FALSE: 'false'; 8 | TRUE: 'true'; 9 | NULL: 'null'; 10 | COMMA: ','; 11 | number: /-?\d+(\.\d+)?(e|E[-+]?\d+)?/; 12 | string: /"((\\")|[^"])*"/; 13 | -------------------------------------------------------------------------------- /examples/json/json_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from parglare import Grammar, Parser 4 | 5 | 6 | def main(debug=False): 7 | this_folder = os.path.dirname(__file__) 8 | g = Grammar.from_file(os.path.join(this_folder, 'json.pg')) 9 | parser = Parser(g, debug=debug, debug_colors=True) 10 | 11 | for i in range(5): 12 | result = parser.parse_file(os.path.join(this_folder, f'example{i+1}.json')) 13 | print(result) 14 | 15 | 16 | if __name__ == "__main__": 17 | main(debug=True) 18 | -------------------------------------------------------------------------------- /examples/molecular_formulas/README.md: -------------------------------------------------------------------------------- 1 | This example is based 2 | on 3 | [article by Andrew Dalke](http://www.dalkescientific.com/writings/diary/archive/2007/11/03/antlr_java.html) comparing 4 | ANTLR and PLY performance in parsing molecular formulas. 5 | 6 | An example is modified to compare PLY and parglare. You can see the difference 7 | in styles of grammar/actions definition and parser construction. 8 | 9 | By running `python run_test.py` you will see the speed difference. PLY is faster 10 | in this tests. 11 | -------------------------------------------------------------------------------- /examples/molecular_formulas/parglare_mw.py: -------------------------------------------------------------------------------- 1 | """Calculate the molecular weight given a molecular formula 2 | 3 | Parse the formula using parglare. 4 | This example is based on the example from 5 | PLY compared with pyparsing and ANTLR by Andrew Dalke 6 | http://www.dalkescientific.com/writings/diary/archive/2007/11/03/antlr_java.html 7 | """ 8 | from parglare import Grammar, Parser 9 | 10 | grammar = r""" 11 | mw: EMPTY | formula; 12 | formula: species | formula species; 13 | species: ATOM DIGITS | ATOM; 14 | 15 | terminals 16 | DIGITS: /\d+/; 17 | """ 18 | 19 | mw_table = { 20 | 'H': 1.00794, 21 | 'C': 12.001, 22 | 'Cl': 35.453, 23 | 'O': 15.999, 24 | 'S': 32.06, 25 | } 26 | 27 | atom_names = sorted( 28 | mw_table.keys(), 29 | key=lambda symbol: (symbol[0], -len(symbol), symbol)) 30 | 31 | # Creates a pattern like: Cl|C|H|O|S 32 | atom_pattern = "|".join(atom_names) 33 | 34 | # Extend grammar definition with the ATOM rule 35 | grammar += f'\nATOM: /{atom_pattern}/;' 36 | 37 | actions = { 38 | 'mw': [lambda _, __: 0.0, 39 | lambda _, nodes: nodes[0]], 40 | 'formula': [lambda _, nodes: nodes[0], 41 | lambda _, nodes: nodes[0] + nodes[1]], 42 | 'species': [lambda _, nodes: nodes[0] * nodes[1], 43 | lambda _, nodes: nodes[0]], 44 | 'ATOM': lambda _, value: mw_table[value], 45 | 'DIGITS': lambda _, value: int(value) 46 | } 47 | 48 | parser = Parser(Grammar.from_string(grammar), actions=actions) 49 | 50 | 51 | def calculate_mw(formula): 52 | return parser.parse(formula) 53 | -------------------------------------------------------------------------------- /examples/molecular_formulas/ply_mw.py: -------------------------------------------------------------------------------- 1 | """Calculate the molecular weight given a molecular formula 2 | 3 | Parse the formula using PLY. 4 | """ 5 | # ply_mw.py 6 | 7 | import ply.yacc as yacc 8 | from ply import lex 9 | from ply.lex import TOKEN 10 | 11 | 12 | class ParseError(Exception): 13 | def __init__(self, msg, offset): 14 | self.msg = msg 15 | self.offset = offset 16 | 17 | def __repr__(self): 18 | return f"ParseError({self.msg!r}, {self.offset!r})" 19 | 20 | def __str__(self): 21 | return f"{self.msg} at position {self.offset + 1}" 22 | 23 | 24 | # Define the lexer 25 | 26 | tokens = ( 27 | "ATOM", 28 | "DIGITS", 29 | ) 30 | 31 | mw_table = { 32 | 'H': 1.00794, 33 | 'C': 12.001, 34 | 'Cl': 35.453, 35 | 'O': 15.999, 36 | 'S': 32.06, 37 | } 38 | 39 | 40 | # I don't want to duplicate the atom names so extract the 41 | # keys to make the lexer pattern. 42 | 43 | # Sort order is: 44 | # - alphabetically on first character, to make it easier 45 | # for a human to look at and debug any problems 46 | # 47 | # - then by the length of the symbol; two letters before 1 48 | # Needed because Python's regular expression matcher 49 | # uses "first match" not "longest match" rules. 50 | # For example, "C|Cl" matches only the "C" in "Cl" 51 | # The "-" in "-len(symbol)" is a trick to reverse the sort order. 52 | # 53 | # - then by the full symbol, to make it easier for people 54 | 55 | # (This is more complicated than needed; it's to show how 56 | # this approach can scale to all 100+ known and named elements) 57 | 58 | atom_names = sorted( 59 | mw_table.keys(), 60 | key=lambda symbol: (symbol[0], -len(symbol), symbol)) 61 | 62 | # Creates a pattern like: Cl|C|H|O|S 63 | atom_pattern = "|".join(atom_names) 64 | 65 | 66 | # Use a relatively new PLY feature to set the __doc__ 67 | # string based on a Python variable. 68 | @TOKEN(atom_pattern) 69 | def t_ATOM(t): 70 | t.value = mw_table[t.value] 71 | return t 72 | 73 | 74 | def t_DIGITS(t): 75 | r"\d+" 76 | t.value = int(t.value) 77 | return t 78 | 79 | 80 | def t_error(t): 81 | raise ParseError("unknown character", t.lexpos) 82 | 83 | 84 | lexer = lex.lex() 85 | 86 | # Here's an example of using the lexer 87 | 88 | # data = "H2SO4" 89 | # 90 | # lex.input(data) 91 | # 92 | # for tok in iter(lex.token, None): 93 | # print tok 94 | 95 | # Define the grammar 96 | 97 | 98 | # The molecular weight of "" is 0.0 99 | def p_mw_empty(p): 100 | "mw : " 101 | p[0] = 0.0 102 | 103 | 104 | def p_mw_formula(p): 105 | "mw : formula" 106 | p[0] = p[1] 107 | 108 | 109 | def p_first_species_term(p): 110 | "formula : species" 111 | p[0] = p[1] 112 | 113 | 114 | def p_species_list(p): 115 | "formula : formula species" 116 | p[0] = p[1] + p[2] 117 | 118 | 119 | def p_species(p): 120 | "species : ATOM DIGITS" 121 | p[0] = p[1] * p[2] 122 | 123 | 124 | def p_species_default(p): 125 | "species : ATOM" 126 | p[0] = p[1] 127 | 128 | 129 | def p_error(p): 130 | raise ParseError("unexpected character", p.lexpos) 131 | 132 | 133 | parser = yacc.yacc() 134 | 135 | # Work around a problem in PLY 2.3 where the first parse does not 136 | # allow a "". I reported it to the ply mailing list on 2 November. 137 | # This guarantees the first parse will never be "" :) 138 | parser.parse("C") 139 | 140 | 141 | # Calculate molecular weight 142 | def calculate_mw(formula): 143 | return parser.parse(formula, lexer=lexer) 144 | -------------------------------------------------------------------------------- /examples/molecular_formulas/run_test.py: -------------------------------------------------------------------------------- 1 | """Run tests to validate the MW parsers and compare timing results.""" 2 | # compare_mw.py 3 | 4 | import sys 5 | 6 | # time.clock is more accurate under Windows 7 | import time 8 | 9 | import parglare_mw 10 | import ply_mw 11 | 12 | if sys.platform == "win32": 13 | timer = time.clock 14 | else: 15 | timer = time.time 16 | 17 | _mw_table = { 18 | 'H': 1.00794, 19 | 'C': 12.001, 20 | 'Cl': 35.453, 21 | 'O': 15.999, 22 | 'S': 32.06, 23 | } 24 | 25 | _element_names = list(_mw_table.keys()) 26 | 27 | 28 | def _generate_random_formulas(): 29 | import random 30 | # Using semi-random values so I can check a wide space 31 | # Number of terms in the formula 32 | _possible_lengths = (1, 2, 3, 4, 5, 10, 53, 104) 33 | # Repeat count for each formula 34 | _possible_counts = tuple(range(12)) + (88, 91, 106, 107, 200, 1234) 35 | for _i in range(2500): 36 | terms = [] 37 | total_mw = 0.0 38 | # Use a variety of lengths 39 | for _j in range(random.choice(_possible_lengths)): 40 | symbol = random.choice(_element_names) 41 | terms.append(symbol) 42 | count = random.choice(_possible_counts) 43 | if count == 1 and random.randint(0, 2) == 1: 44 | pass 45 | else: 46 | terms.append(str(count)) 47 | 48 | total_mw += _mw_table[symbol] * count 49 | yield total_mw, "".join(terms) 50 | 51 | 52 | _selected_formulas = [ 53 | (0.0, ""), 54 | (1.00794, "H"), 55 | (1.00794, "H1"), 56 | (32.06, "S"), 57 | (12.001+1.00794*4, "CH4"), 58 | ] 59 | 60 | good_test_data = (_selected_formulas + 61 | list(_generate_random_formulas())) 62 | 63 | 64 | def do_tests(calculate_mw): 65 | start_time = timer() 66 | for expected_mw, formula in good_test_data: 67 | got_mw = calculate_mw(formula) 68 | if expected_mw != got_mw: 69 | raise AssertionError(f"{formula!r} expected {expected_mw!r} got {got_mw!r}") 70 | return timer() - start_time 71 | 72 | 73 | print("Testing", len(good_test_data), "formulas") 74 | 75 | # Evaluate everything with parglare 76 | parglare_time = do_tests(parglare_mw.calculate_mw) 77 | print("parglare", parglare_time) 78 | 79 | # Evaluate everything with PLY 80 | ply_time = do_tests(ply_mw.calculate_mw) 81 | print("PLY", ply_time) 82 | 83 | print("ratio = %.02f" % (parglare_time / ply_time)) 84 | 85 | # I really should test that they handle invalid formulas ... 86 | -------------------------------------------------------------------------------- /examples/quick_intro.py: -------------------------------------------------------------------------------- 1 | from parglare import Grammar, Parser 2 | 3 | grammar = r""" 4 | E: E '+' E {left, 1} 5 | | E '-' E {left, 1} 6 | | E '*' E {left, 2} 7 | | E '/' E {left, 2} 8 | | E '^' E {right, 3} 9 | | '(' E ')' 10 | | number; 11 | 12 | terminals 13 | number: /\d+(\.\d+)?/; 14 | """ 15 | 16 | actions = { 17 | "E": [lambda _, n: n[0] + n[2], 18 | lambda _, n: n[0] - n[2], 19 | lambda _, n: n[0] * n[2], 20 | lambda _, n: n[0] / n[2], 21 | lambda _, n: n[0] ** n[2], 22 | lambda _, n: n[1], 23 | lambda _, n: n[0]], 24 | "number": lambda _, value: float(value), 25 | } 26 | 27 | g = Grammar.from_string(grammar) 28 | parser = Parser(g, debug=True, actions=actions) 29 | 30 | result = parser.parse("34 + 4.6 / 2 * 4^2^2 + 78") 31 | 32 | print("Result = ", result) 33 | 34 | # Output 35 | # -- Debugging/tracing output with detailed info about grammar, productions, 36 | # -- terminals and nonterminals, DFA states, parsing progress, 37 | # -- and at the end of the output: 38 | # Result = 700.8 39 | -------------------------------------------------------------------------------- /examples/rhapsody/rhapsody.pg: -------------------------------------------------------------------------------- 1 | Model: Header Object; 2 | Object: '{' ID Property+ '}'; 3 | Property: '-' ID '=' Values SemiColon?; 4 | Property: '-' ID '=' SemiColon; 5 | Values: Value | Values SemiColon? Value; 6 | Value: STRING | INT | FLOAT | GUID | Object | ID; 7 | 8 | 9 | terminals 10 | 11 | STRING: /("(\\"|[^"])*")|(\'(\\\'|[^\'])*\')/; 12 | 13 | // INT and FLOAT are ambiguous. Prefer INT if both match are of same length. 14 | INT: /[-+]?[0-9]+\b/ {prefer}; 15 | FLOAT: /[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\b/; 16 | 17 | GUID: /[a-f0-9]*-[a-f0-9]*-[a-f0-9]*-[a-f0-9]*-[a-f0-9]*/; 18 | SemiColon: ';'; 19 | ID: /[a-zA-Z_][a-zA-Z_0-9]*/; 20 | Header: /[^\n]*/; 21 | -------------------------------------------------------------------------------- /examples/rhapsody/rhapsody.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from parglare import Grammar, Parser 4 | 5 | 6 | def main(debug=False): 7 | 8 | this_folder = os.path.dirname(__file__) 9 | grammar_file = os.path.join(this_folder, 'rhapsody.pg') 10 | g = Grammar.from_file(grammar_file, debug=debug, debug_colors=True) 11 | parser = Parser(g, build_tree=True, debug=debug, debug_colors=True) 12 | 13 | with open(os.path.join(this_folder, 'LightSwitch.rpy')) as f: 14 | result = parser.parse(f.read()) 15 | print(result.to_str()) 16 | 17 | 18 | if __name__ == '__main__': 19 | main(debug=True) 20 | -------------------------------------------------------------------------------- /examples/robot/README.md: -------------------------------------------------------------------------------- 1 | # Robot example 2 | 3 | In this example we have a simple language for moving a robot on a discrete grid. 4 | There are two type of commands: (1) setting initial position (2) moving in a 5 | given direction for given steps. If no steps are given 1 is assumed. 6 | 7 | - `robot.pg` - is the grammar of the language. Language supports C-like comments. 8 | - `program.rbt` - is the "program" executed in this example 9 | - `robot.py` - is a script that defines semantic actions, constructs and 10 | executes parser. 11 | - `robot.pg.dot.png` - is a PNG file representing LR automata. This file is 12 | produced by: 13 | 14 | ``` 15 | pglr viz robot.pg 16 | dot -Tpng -O robot.pg.dot 17 | ``` 18 | 19 | `dot` is a part of [GraphViz](http://graphviz.org/) software package. 20 | -------------------------------------------------------------------------------- /examples/robot/program.rbt: -------------------------------------------------------------------------------- 1 | begin 2 | initial 3, 1 3 | up 4 // go up 4 steps 4 | left 9 5 | down // step is optional 6 | right 1 7 | end 8 | -------------------------------------------------------------------------------- /examples/robot/robot.pg: -------------------------------------------------------------------------------- 1 | program: "begin" commands=command* "end"; 2 | command: initial | move; 3 | initial: INITIAL x=INT "," y=INT; 4 | move: direction=direction steps=INT?; 5 | direction: "up" | "down" | "left" | "right"; 6 | 7 | // Support for C-like comments 8 | LAYOUT: LayoutItem | LAYOUT LayoutItem | EMPTY; 9 | LayoutItem: WS | Comment; 10 | 11 | terminals 12 | INT: /\d+/; 13 | INITIAL: "initial"; 14 | WS: /\s+/; 15 | Comment: /\/\/.*/; 16 | -------------------------------------------------------------------------------- /examples/robot/robot.pg.dot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/examples/robot/robot.pg.dot.png -------------------------------------------------------------------------------- /examples/robot/robot.py: -------------------------------------------------------------------------------- 1 | """This example is inspired by an example from LISA tool 2 | (http://labraj.uni-mb.si/lisa/) presented during the lecture given by prof. 3 | Marjan Mernik (http://lpm.uni-mb.si/mernik/) at the University of Novi Sad in 4 | June, 2011. 5 | 6 | An example of the robot program: 7 | begin 8 | initial 3, 1 9 | up 4 10 | left 9 11 | down 12 | right 1 13 | end 14 | 15 | """ 16 | import os 17 | 18 | from parglare import Grammar, Parser, get_collector 19 | 20 | action = get_collector() 21 | 22 | 23 | @action 24 | def INT(_, value): 25 | return int(value) 26 | 27 | 28 | @action 29 | def initial(context, nodes, x, y): 30 | print(f"Robot initial position set to: {x}, {y}") 31 | # We use context.extra to keep robot position state. 32 | context.extra = (x, y) 33 | 34 | 35 | @action 36 | def program(context, nodes, commands): 37 | return context.extra 38 | 39 | 40 | @action 41 | def move(context, nodes, direction, steps): 42 | steps = 1 if steps is None else steps 43 | print(f"Moving robot {direction} for {steps} steps.") 44 | 45 | move = { 46 | "up": (0, 1), 47 | "down": (0, -1), 48 | "left": (-1, 0), 49 | "right": (1, 0) 50 | }[direction] 51 | 52 | # Calculate new robot position 53 | x, y = context.extra 54 | context.extra = (x + steps * move[0], y + steps * move[1]) 55 | 56 | 57 | def main(debug=False): 58 | this_folder = os.path.dirname(__file__) 59 | g = Grammar.from_file(os.path.join(this_folder, 'robot.pg'), 60 | debug=debug, debug_colors=True) 61 | parser = Parser(g, actions=action.all, debug=debug, 62 | debug_colors=True) 63 | 64 | end_position = parser.parse_file(os.path.join(this_folder, 'program.rbt')) 65 | 66 | print(f"Robot stops at position: {end_position}") 67 | 68 | 69 | if __name__ == "__main__": 70 | main(debug=False) 71 | -------------------------------------------------------------------------------- /install-dev.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | pip install --upgrade pip || exit 1 4 | pip install -e .[dev] || exit 1 5 | ./install-test.sh 6 | -------------------------------------------------------------------------------- /install-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | pip install --upgrade pip || exit 1 4 | pip install -e .[test] || exit 1 5 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: parglare 2 | 3 | site_description: A pure Python (G)LR parser with integrated scanner 4 | site_author: Igor Dejanović 5 | 6 | repo_url: https://github.com/igordejanovic/parglare 7 | theme: 8 | name: readthedocs 9 | analytics: 10 | gtag: UA-68681917-1 11 | extra_css: [style.css] 12 | strict: true 13 | 14 | nav: 15 | - Home: index.md 16 | - User Guide: 17 | - Getting started: getting_started.md 18 | - Grammar language: grammar_language.md 19 | - Grammar class: grammar.md 20 | - Parser: parser.md 21 | - Actions: actions.md 22 | - Common API: common.md 23 | - Recognizers: recognizers.md 24 | - Modularization: grammar_modularization.md 25 | - LR parsing and conflicts: lr_parsing.md 26 | - Disambiguation: disambiguation.md 27 | - Parse forest/trees: parse_forest_trees.md 28 | - Handling errors: handling_errors.md 29 | - pglr command: pglr.md 30 | - Debugging: debugging.md 31 | - Release Notes: 32 | - 0.15: release_notes/release_0_15.md 33 | - 0.14: release_notes/release_0_14.md 34 | - About: 35 | - Contributing: about/CONTRIBUTING.md 36 | - License: about/LICENSE.md 37 | 38 | markdown_extensions: 39 | - admonition: 40 | - toc: 41 | permalink: true 42 | 43 | plugins: 44 | - mike 45 | - search 46 | 47 | copyright: Copyright © Igor Dejanović. 48 | -------------------------------------------------------------------------------- /parglare/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # flake8: NOQA 3 | from parglare.parser import Parser, Token, pos_to_line_col 4 | from parglare.tables import LALR, SLR, SHIFT, REDUCE, ACCEPT 5 | from parglare.glr import GLRParser 6 | from parglare.grammar import Grammar, NonTerminal, Terminal, \ 7 | RegExRecognizer, StringRecognizer, EMPTY, STOP 8 | from parglare.common import get_collector 9 | from parglare.trees import Node, NodeTerm, NodeNonTerm, visitor 10 | from parglare.exceptions import ParserInitError, ParseError, GrammarError, \ 11 | DisambiguationError, LoopError 12 | 13 | try: 14 | from importlib.metadata import version 15 | except ModuleNotFoundError: 16 | from importlib_metadata import version 17 | 18 | __version__ = version("parglare") 19 | -------------------------------------------------------------------------------- /parglare/actions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common parsing actions. 3 | """ 4 | import contextlib 5 | 6 | 7 | def pass_none(_, value, *args): 8 | return None 9 | 10 | 11 | def pass_nochange(_, value, *args): 12 | return value 13 | 14 | 15 | def pass_empty(_, value, *args): 16 | """ 17 | Used for EMPTY production alternative in collect. 18 | """ 19 | return [] 20 | 21 | 22 | def pass_single(_, nodes): 23 | """ 24 | Unpack single value and pass up. 25 | """ 26 | return nodes[0] 27 | 28 | 29 | def pass_inner(_, nodes): 30 | """ 31 | Pass inner value up, e.g. for stripping parentheses as in 32 | `( )`. 33 | """ 34 | n = nodes[1:-1] 35 | with contextlib.suppress(ValueError): 36 | n, = n 37 | return n 38 | 39 | 40 | def collect_first(_, nodes): 41 | """ 42 | Used for: 43 | Elements = Elements Element; 44 | """ 45 | e1, e2 = nodes 46 | if e2 is not None: 47 | e1 = list(e1) 48 | e1.append(e2) 49 | return e1 50 | 51 | 52 | def collect_first_sep(_, nodes): 53 | """ 54 | Used for: 55 | Elements = Elements "," Element; 56 | """ 57 | e1, _, e2 = nodes 58 | if e2 is not None: 59 | e1 = list(e1) 60 | e1.append(e2) 61 | return e1 62 | 63 | 64 | def collect_right_first(_, nodes): 65 | """ 66 | Used for: 67 | Elements = Element Elements; 68 | """ 69 | e1, e2 = [nodes[0]], nodes[1] 70 | e1.extend(e2) 71 | return e1 72 | 73 | 74 | def collect_right_first_sep(_, nodes): 75 | """ 76 | Used for: 77 | Elements = Element "," Elements; 78 | """ 79 | e1, e2 = [nodes[0]], nodes[2] 80 | e1.extend(e2) 81 | return e1 82 | 83 | 84 | # Used for productions of the form - one or more elements: 85 | # Elements: Elements Element | Element; 86 | collect = [ 87 | collect_first, 88 | pass_nochange 89 | ] 90 | 91 | # Used for productions of the form - one or more elements: 92 | # Elements: Elements "," Element | Element; 93 | collect_sep = [ 94 | collect_first_sep, 95 | pass_nochange 96 | ] 97 | 98 | # Used for productions of the form - zero or more elements: 99 | # Elements: Elements Element | Element | EMPTY; 100 | collect_optional = [ 101 | collect_first, 102 | pass_nochange, 103 | pass_empty 104 | ] 105 | 106 | # Used for productions of the form - zero or more elements: 107 | # Elements: Elements "," Element | Element | EMPTY; 108 | collect_sep_optional = [ 109 | collect_first_sep, 110 | pass_nochange, 111 | pass_empty 112 | ] 113 | 114 | # Used for productions of the form - one or more elements: 115 | # Elements: Element Elements | Element; 116 | collect_right = [ 117 | collect_right_first, 118 | pass_nochange 119 | ] 120 | 121 | # Used for productions of the form - one or more elements: 122 | # Elements: Element "," Elements | Element; 123 | collect_right_sep = [ 124 | collect_right_first_sep, 125 | pass_nochange 126 | ] 127 | 128 | # Used for productions of the form - zero or more elements: 129 | # Elements: Element Elements | Element | EMPTY; 130 | collect_right_optional = [ 131 | collect_right_first, 132 | pass_nochange, 133 | pass_empty 134 | ] 135 | 136 | # Used for productions of the form - zero or more elements: 137 | # Elements: Element "," Elements | Element | EMPTY; 138 | collect_right_sep_optional = [ 139 | collect_right_first_sep, 140 | pass_nochange, 141 | pass_empty 142 | ] 143 | 144 | # Used for the production of the form: 145 | # OptionalElement: Element | EMPTY; 146 | optional = [ 147 | pass_single, 148 | pass_none 149 | ] 150 | 151 | 152 | def obj(context, nodes, **attrs): 153 | """ 154 | Creates Python object with the attributes created from named matches. 155 | This action is used as a default action for rules with named matches. 156 | """ 157 | grammar = context.parser.grammar 158 | rule_name = context.production.symbol.fqn 159 | 160 | cls = grammar.classes[rule_name] 161 | instance = cls(**attrs) 162 | 163 | instance._pg_start_position = context.start_position 164 | instance._pg_end_position = context.end_position 165 | 166 | return instance 167 | -------------------------------------------------------------------------------- /parglare/closure.py: -------------------------------------------------------------------------------- 1 | from parglare.grammar import EMPTY, NonTerminal 2 | 3 | LR_0 = 0 4 | LR_1 = 1 5 | 6 | 7 | def closure(state, itemset_type, first_sets=None): 8 | """ 9 | For the given LRState calculates its LR(0)/LR(1) itemset closure. 10 | 11 | Args: 12 | state(LRState): 13 | itemset_type(int): LR_0 or LR_1 14 | first_sets(dict of sets): Used in LR_1 itemsets calculation. 15 | """ 16 | from parglare.tables import LRItem 17 | 18 | items_to_process = list(state.items) 19 | while items_to_process: 20 | item = items_to_process.pop() 21 | symbol = item.symbol_at_position 22 | if not isinstance(symbol, NonTerminal): 23 | continue 24 | 25 | # Calculate follow set that is possible after the 26 | # non-terminal at the given position of the current 27 | # item. 28 | if itemset_type is LR_1: 29 | follow = _new_item_follow(item, first_sets) 30 | for prod in [p for p in state.grammar.productions 31 | if p.symbol == symbol]: 32 | new_item = LRItem(prod, 0, 33 | set(follow) if itemset_type is LR_1 else None) 34 | if new_item not in state.items: 35 | # If the item doesn't exists yet add it and reprocess it. 36 | state.items.append(new_item) 37 | items_to_process.append(new_item) 38 | elif itemset_type is LR_1: 39 | # If the item already exists, this newly created item might 40 | # still have a wider follows set. If so, update with the 41 | # current new item follows set if we are building LR_1 items 42 | # set. 43 | existing_item = next(i for i in state.items if i == new_item) 44 | if not follow.issubset(existing_item.follow): 45 | existing_item.follow.update(follow) 46 | # If there was an update in the follow set of the existing 47 | # item we have to process it again as we have to update 48 | # follows of all items that were created from it. 49 | items_to_process.append(existing_item) 50 | 51 | 52 | def _new_item_follow(item, first_sets): 53 | """ 54 | Returns follow set of possible terminals after the item's current 55 | non-terminal. 56 | 57 | Args: 58 | item (LRItem): The source item which is causing the creation of the 59 | new item. 60 | first_sets(dict of sets): The dict of set of first items keyed by 61 | a grammar symbol. 62 | """ 63 | 64 | new_follow = set() 65 | for s in item.production.rhs[item.position + 1:]: 66 | new_follow.update(first_sets[s]) 67 | if EMPTY not in new_follow: 68 | # If EMPTY can't be derived at current position then we have found 69 | # the whole follow set. 70 | break 71 | else: 72 | # If the EMPTY is possible at current position in this loop we must 73 | # continue to include firsts of the next grammar symbol. EMPTY 74 | # can't be a member of the follow set. 75 | new_follow.remove(EMPTY) 76 | else: 77 | # If the rest of production can be EMPTY we shall inherit all elements 78 | # of the source item follow set. 79 | new_follow.update(item.follow) 80 | 81 | return new_follow 82 | -------------------------------------------------------------------------------- /parglare/export.py: -------------------------------------------------------------------------------- 1 | 2 | from parglare.common import dot_escape 3 | from parglare.parser import REDUCE, SHIFT 4 | 5 | HEADER = ''' 6 | digraph grammar { 7 | rankdir=LR 8 | fontname = "Bitstream Vera Sans" 9 | fontsize = 8 10 | node[ 11 | shape=record, 12 | style=filled, 13 | fillcolor=aliceblue 14 | ] 15 | nodesep = 0.3 16 | edge[dir=black,arrowtail=empty] 17 | 18 | 19 | ''' 20 | 21 | 22 | def grammar_pda_export(table, file_name): 23 | 24 | with open(file_name, 'w', encoding="utf-8") as f: 25 | f.write(HEADER) 26 | 27 | for state in table.states: 28 | kernel_items = "" 29 | for item in state.kernel_items: 30 | kernel_items += f"{dot_escape(str(item))}\\l" 31 | 32 | nonkernel_items = "|" if state.nonkernel_items else "" 33 | for item in state.nonkernel_items: 34 | nonkernel_items += f"{dot_escape(str(item))}\\l" 35 | 36 | # SHIFT actions and GOTOs will be encoded in links. 37 | # REDUCE actions will be presented inside each node. 38 | reduce_actions = [] 39 | for term, actions in state.actions.items(): 40 | r_actions = [a for a in actions if a.action is REDUCE] 41 | if r_actions: 42 | reduce_actions.append((term, r_actions)) 43 | 44 | reductions = "" 45 | if reduce_actions: 46 | reductions = "|Reductions:\\l{}".format( 47 | ", ".join(["{}:{}".format( 48 | dot_escape(x[0].name), x[1][0].prod.prod_id 49 | if len(x[1]) == 1 else "[{}]".format( 50 | ",".join([str(i.prod.prod_id) for i in x[1]]))) 51 | for x in reduce_actions])) 52 | 53 | # States 54 | f.write('{}[label="{}|{}{}{}"]\n' 55 | .format( 56 | state.state_id, 57 | dot_escape(f"{state.state_id}:{state.symbol}"), 58 | kernel_items, nonkernel_items, reductions)) 59 | 60 | f.write("\n") 61 | 62 | # SHIFT and GOTOs as links 63 | shacc = [] 64 | for term, actions in state.actions.items(): 65 | for a in [a for a in actions if a.action is SHIFT]: 66 | shacc.append((term, a)) 67 | for term, action in shacc: 68 | f.write('{} -> {} [label="{}:{}"]'.format( 69 | state.state_id, 70 | action.state.state_id, 71 | "SHIFT" if action.action is SHIFT else "ACCEPT", term)) 72 | 73 | for symb, goto_state in ((symb, goto) for symb, goto 74 | in state.gotos.items()): 75 | f.write(f'{state.state_id} -> {goto_state.state_id}' 76 | f' [label="GOTO:{symb}"]') 77 | 78 | f.write("\n}\n") 79 | -------------------------------------------------------------------------------- /parglare/tables/persist.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import OrderedDict 3 | 4 | 5 | def table_to_serializable(table): 6 | """Convert table object to serializable representation composed of 7 | lists and dicts.""" 8 | # states 9 | states = [] 10 | for state in table.states: 11 | states.append(_dump_state(state)) 12 | 13 | return states 14 | 15 | 16 | def save_table(file_name, table): 17 | with open(file_name, 'w') as f: 18 | json.dump(table_to_serializable(table), f, sort_keys=True) 19 | 20 | 21 | def table_from_serializable(serialized_states, grammar): 22 | """Convert serializable representation of a parsing table into 23 | LRTable object.""" 24 | from parglare.tables import Action, LRState, LRTable 25 | 26 | states = [] 27 | states_dict = {} 28 | for json_state in serialized_states: 29 | state = LRState(grammar, json_state['state_id'], 30 | grammar.get_symbol(json_state['symbol'])) 31 | states_dict[state.state_id] = state 32 | state.finish_flags = json_state['finish_flags'] 33 | state.actions = json_state['actions'] 34 | state.gotos = json_state['gotos'] 35 | states.append(state) 36 | 37 | # Unpack actions and gotos 38 | for state in states: 39 | 40 | actions = OrderedDict() 41 | for json_action_fqn in state.actions: 42 | terminal_fqn, json_actions = json_action_fqn 43 | term_acts = [] 44 | for json_action in json_actions: 45 | if 'state_id' in json_action: 46 | act_state = states_dict[json_action['state_id']] 47 | else: 48 | act_state = None 49 | if 'prod_id' in json_action: 50 | act_prod = grammar.productions[json_action['prod_id']] 51 | else: 52 | act_prod = None 53 | term_acts.append(Action(json_action['action'], 54 | act_state, act_prod)) 55 | 56 | actions[grammar.get_terminal(terminal_fqn)] = term_acts 57 | state.actions = actions 58 | 59 | gotos = OrderedDict() 60 | for json_goto_fqn in state.gotos: 61 | nonterm_fqn, goto_state = json_goto_fqn 62 | gotos[grammar.get_nonterminal(nonterm_fqn)] = \ 63 | states_dict[goto_state] 64 | state.gotos = gotos 65 | 66 | table = LRTable(states, calc_finish_flags=False) 67 | 68 | return table 69 | 70 | 71 | def load_table(file_name, grammar): 72 | with open(file_name) as f: 73 | return table_from_serializable(json.load(f), grammar) 74 | 75 | 76 | def _dump_state(state): 77 | s = {} 78 | s['state_id'] = state.state_id 79 | s['symbol'] = state.symbol.fqn 80 | action_items = list(state.actions.items()) 81 | s['actions'] = [[terminal.fqn, _dump_actions(actions)] 82 | for terminal, actions in action_items] 83 | goto_items = list(state.gotos.items()) 84 | s['gotos'] = [[nonterminal.fqn, st.state_id] 85 | for nonterminal, st in goto_items] 86 | s['finish_flags'] = state.finish_flags 87 | 88 | return s 89 | 90 | 91 | def _dump_actions(actions): 92 | alist = [] 93 | for action in actions: 94 | a = {} 95 | a['action'] = action.action 96 | if action.state is not None: 97 | a['state_id'] = action.state.state_id 98 | if action.prod is not None: 99 | a['prod_id'] = action.prod.prod_id 100 | alist.append(a) 101 | 102 | return alist 103 | -------------------------------------------------------------------------------- /parglare/termui.py: -------------------------------------------------------------------------------- 1 | 2 | import click 3 | 4 | colors = False 5 | 6 | S_ATTENTION = {'fg': 'red', 'bold': True} 7 | S_HEADER = {'fg': 'green'} 8 | S_EMPH = {'fg': 'yellow'} 9 | 10 | 11 | def prints(message, s=None): 12 | if s is None: 13 | s = {} 14 | click.echo(style(message, s), color=colors) 15 | 16 | 17 | def style_message(message, style): 18 | if colors: 19 | return click.style(message, **style) 20 | else: 21 | return message 22 | 23 | 24 | def s_header(message): 25 | return style_message(message, S_HEADER) 26 | 27 | 28 | def s_attention(message): 29 | return style_message(message, S_ATTENTION) 30 | 31 | 32 | def s_emph(message): 33 | return style_message(message, S_EMPH) 34 | 35 | 36 | def style(header, content, level=0, new_line=False, header_style=S_HEADER, 37 | width=120): 38 | if content: 39 | content_start = level * 8 + len(header) + 1 40 | content_width = width - content_start 41 | content = str(content) 42 | content = [content[start:start+content_width] 43 | for start in range(0, len(content), content_width)] 44 | content = ('\n' + ' ' * content_start).join(content) 45 | new_line = "\n" if new_line else "" 46 | level = ("\t" * level) if level else "" 47 | return new_line + level + style_message(str(header), header_style) \ 48 | + ((" " + str(content)) if content else "") 49 | 50 | 51 | def styled_print(header, content, level=0, new_line=False, 52 | header_style=S_HEADER, width=120): 53 | prints(style(header, content, level, new_line, header_style, width)) 54 | 55 | 56 | def h_print(header, content="", level=0, new_line=False): 57 | styled_print(header, content, level, new_line, S_HEADER) 58 | 59 | 60 | def a_print(header, content="", level=0, new_line=False): 61 | styled_print(header, content, level, new_line, S_ATTENTION) 62 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "parglare" 3 | version = "0.18.0" 4 | description = "A pure Python LR/GLR parser" 5 | authors = [ 6 | {name = "Igor R. Dejanović", email = "igor.dejanovic@gmail.com"}, 7 | ] 8 | maintainers = [ 9 | {name = "Igor R. Dejanović", email = "igor.dejanovic@gmail.com"}, 10 | ] 11 | readme = "README.rst" 12 | license = {text = "MIT"} 13 | keywords = ["parser", "lr", "glr"] 14 | classifiers = [ 15 | "Development Status :: 4 - Beta", 16 | "Intended Audience :: Developers", 17 | "Intended Audience :: Information Technology", 18 | "Intended Audience :: Science/Research", 19 | "Topic :: Software Development :: Interpreters", 20 | "Topic :: Software Development :: Compilers", 21 | "Topic :: Software Development :: Libraries :: Python Modules", 22 | "License :: OSI Approved :: MIT License", 23 | "Natural Language :: English", 24 | "Programming Language :: Python :: 3", 25 | "Programming Language :: Python :: 3.8", 26 | "Programming Language :: Python :: 3.9", 27 | "Programming Language :: Python :: 3.10", 28 | "Programming Language :: Python :: 3.11", 29 | "Programming Language :: Python :: 3.12", 30 | "Operating System :: OS Independent", 31 | ] 32 | 33 | requires-python = ">=3.8, <3.13" 34 | dependencies = [ 35 | "click >=7.0, <9.0" 36 | ] 37 | 38 | [project.urls] 39 | Homepage = "https://github.com/igordejanovic/parglare" 40 | Repository = "https://github.com/igordejanovic/parglare" 41 | Changelog = "https://github.com/igordejanovic/parglare/blob/master/CHANGELOG.md" 42 | 43 | [build-system] 44 | build-backend = "flit_core.buildapi" 45 | requires = ["flit_core >=3.8.0,<4"] 46 | 47 | [tool.flit.module] 48 | name = "parglare" 49 | 50 | [tool.flit.sdist] 51 | exclude = ["**/.*"] 52 | 53 | [project.optional-dependencies] 54 | dev = [ 55 | "ruff", 56 | "flit", 57 | "mkdocs", 58 | "mike", 59 | ] 60 | 61 | test = [ 62 | "ruff", 63 | "coverage", 64 | "coveralls", 65 | "pytest", 66 | ] 67 | 68 | [project.scripts] 69 | pglr = "parglare.cli:pglr" 70 | 71 | [tool.ruff] 72 | line-length = 90 73 | indent-width = 4 74 | 75 | [tool.ruff.lint] 76 | # https://docs.astral.sh/ruff/linter/#rule-selection 77 | select = [ 78 | # pycodestyle 79 | "E", 80 | # Pyflakes 81 | "F", 82 | # pyupgrade 83 | "UP", 84 | # flake8-bugbear 85 | "B", 86 | # flake8-simplify 87 | "SIM", 88 | # isort 89 | "I", 90 | ] 91 | -------------------------------------------------------------------------------- /runtests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Run all tests and generate coverage report 3 | 4 | coverage run --omit parglare/cli.py --source parglare -m pytest tests/func || exit 1 5 | coverage report --fail-under 90 || exit 1 6 | # Run this to generate html report 7 | # coverage html --directory=coverage 8 | ruff check parglare/ tests/func examples/ || exit 1 9 | -------------------------------------------------------------------------------- /scripts/parglare_qtree.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Produce LaTex qtree output from the parglare parse trees. 3 | 4 | from parglare import Grammar, GLRParser 5 | 6 | INPUT = '1 + 2 * 3 + 4' 7 | 8 | grammar = r''' 9 | E: E '+' E 10 | | E '*' E 11 | | '(' E ')' 12 | | number; 13 | 14 | terminals 15 | number: /\d+/; 16 | ''' 17 | 18 | g = Grammar.from_string(grammar) 19 | parser = GLRParser(g, build_tree=True) 20 | 21 | result = parser.parse(INPUT) 22 | 23 | 24 | def to_str(node, depth=0): 25 | indent = ' ' * depth 26 | if node.is_nonterm(): 27 | s = '\n{}[.{} {}\n{}]'.format(indent, 28 | node.production.symbol, 29 | ''.join([to_str(n, depth+1) 30 | for n in node.children]), 31 | indent) 32 | else: 33 | s = '\n{}[.{} ]'.format(indent, node.value) 34 | return s 35 | 36 | 37 | with open('qtree_out.txt', 'w') as f: 38 | f.write('\begin{{tabular}}{{{}}}\n'.format('c' * len(result))) 39 | trees = '&\n'.join(['\\Tree {}'.format(to_str(tree)) for tree in result]) 40 | f.write(trees) 41 | -------------------------------------------------------------------------------- /tests/func/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/__init__.py -------------------------------------------------------------------------------- /tests/func/actions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/actions/__init__.py -------------------------------------------------------------------------------- /tests/func/actions/collector/grammar.pg: -------------------------------------------------------------------------------- 1 | Model: INT+ Rule1 INT; 2 | Rule1: a=STRING; 3 | 4 | terminals 5 | INT: /\d+/; 6 | STRING: /'.*'/; 7 | -------------------------------------------------------------------------------- /tests/func/actions/collector/test_actions_get_collector.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from parglare import Grammar, Parser, get_collector 4 | 5 | THIS_FOLDER = os.path.abspath(os.path.dirname(__file__)) 6 | 7 | 8 | def test_action_explicit_get_collector(): 9 | """ 10 | Test the basic usage of `get_collector` API where we don't provide 11 | actions in a separate python module. 12 | """ 13 | 14 | action = get_collector() 15 | 16 | @action 17 | def INT(context, value): 18 | return int(value) 19 | 20 | @action 21 | def STRING(context, value): 22 | return f"#{value}#" 23 | 24 | grammar = Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg')) 25 | Parser(grammar, actions=action.all) 26 | 27 | 28 | def test_action_explicit_get_collector_missing_action(): 29 | """ 30 | Test when `get_collector` has a terminal without defined action nothing 31 | happens as the default implicit action will be used. 32 | """ 33 | 34 | action = get_collector() 35 | 36 | @action 37 | def INT(context, value): 38 | return int(value) 39 | 40 | grammar = Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg')) 41 | Parser(grammar, actions=action.all) 42 | 43 | 44 | def test_actions_explicit_get_collector_action_for_unexisting_terminal(): 45 | """ 46 | Test for situation when `get_collector` has an action for un-existing 47 | terminal. 48 | """ 49 | 50 | action = get_collector() 51 | 52 | @action 53 | def INT(context, value): 54 | return int(value) 55 | 56 | @action 57 | def STRING(context, value): 58 | return f"#{value}#" 59 | 60 | @action 61 | def STRING2(context, value): 62 | return f"#{value}#" 63 | 64 | grammar = Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg')) 65 | Parser(grammar, actions=action.all) 66 | -------------------------------------------------------------------------------- /tests/func/grammar/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/grammar/__init__.py -------------------------------------------------------------------------------- /tests/func/grammar/calc.pg: -------------------------------------------------------------------------------- 1 | Calc: Assignments E; 2 | Assignments: Assignment | Assignments Assignment | EMPTY; 3 | Assignment: VariableName "=" Number; 4 | 5 | E: E "+" E {left, 1} 6 | | E "-" E {left, 1} 7 | | E "*" E {left, 2} 8 | | E "/" E {left, 2} 9 | | "(" E ")" 10 | | VariableRef 11 | | Number 12 | ; 13 | 14 | VariableRef: VariableName; 15 | 16 | terminals 17 | VariableName: /[a-zA-Z_][_a-zA-Z0-9]*/; 18 | Number: /\d+(\.\d+)?/; 19 | -------------------------------------------------------------------------------- /tests/func/grammar/calcactions.py: -------------------------------------------------------------------------------- 1 | def act_assignment(context, nodes): 2 | name = nodes[0] 3 | number = nodes[2] 4 | 5 | # Use context.extra to collect variables 6 | if context.extra is None: 7 | context.extra = {} 8 | 9 | context.extra[name] = number 10 | 11 | 12 | actions = { 13 | "Calc": lambda _, nodes: nodes[1], 14 | "Assignment": act_assignment, 15 | "E": [lambda _, nodes: nodes[0] + nodes[2], 16 | lambda _, nodes: nodes[0] - nodes[2], 17 | lambda _, nodes: nodes[0] * nodes[2], 18 | lambda _, nodes: nodes[0] / nodes[2], 19 | lambda _, nodes: nodes[1], 20 | lambda _, nodes: nodes[0], 21 | lambda _, nodes: nodes[0]], 22 | "Number": lambda _, value: float(value), 23 | "VariableName": lambda _, value: value, 24 | "VariableRef": lambda context, nodes: context.extra[nodes[0]], 25 | } 26 | -------------------------------------------------------------------------------- /tests/func/grammar/expression_grammar.py: -------------------------------------------------------------------------------- 1 | from parglare import Grammar, NonTerminal, Terminal 2 | 3 | # Expression grammar 4 | E, T, F = (NonTerminal(name) for name in ['E', 'T', 'F']) 5 | PLUS, MULT, ID, OPEN, CLOSE = ( 6 | Terminal(value) for value in ['+', '*', 'id', '(', ')']) 7 | productions = [ 8 | (E, (E, PLUS, T)), 9 | (E, (T, )), 10 | (T, (T, MULT, F)), 11 | (T, (F, )), 12 | (F, (OPEN, E, CLOSE)), 13 | (F, (ID,)) 14 | ] 15 | 16 | 17 | def get_grammar(): 18 | return Grammar.from_struct(productions=productions, start_symbol=E) 19 | -------------------------------------------------------------------------------- /tests/func/grammar/expression_grammar_numbers.py: -------------------------------------------------------------------------------- 1 | from parglare import Grammar, NonTerminal, RegExRecognizer, Terminal 2 | 3 | 4 | def get_grammar(): 5 | 6 | # Expression grammar with float numbers 7 | E, T, F = (NonTerminal(name) for name in ['E', 'T', 'F']) 8 | PLUS, MULT, OPEN, CLOSE = ( 9 | Terminal(value) for value in ['+', '*', '(', ')']) 10 | NUMBER = Terminal('number', RegExRecognizer(r'\d+(\.\d+)?')) 11 | productions = [ 12 | (E, (E, PLUS, T)), 13 | (E, (T, )), 14 | (T, (T, MULT, F)), 15 | (T, (F, )), 16 | (F, (OPEN, E, CLOSE)), 17 | (F, (NUMBER,)) 18 | ] 19 | 20 | return Grammar.from_struct(productions=productions, start_symbol=E) 21 | -------------------------------------------------------------------------------- /tests/func/grammar/test_groups.py: -------------------------------------------------------------------------------- 1 | """ 2 | Grammar rules can use groups in parentheses. 3 | The group should be treated the same as any other rule reference, 4 | it can be used in assignments, repetitions etc. 5 | """ 6 | from parglare import GLRParser, Grammar 7 | from parglare.grammar import ASSOC_LEFT, MULT_ONE 8 | 9 | 10 | def test_group_with_sequence(): 11 | grammar_str = r''' 12 | a: (b* c); 13 | b: c; 14 | terminals 15 | c: "c"; 16 | ''' 17 | grammar = Grammar.from_string(grammar_str) 18 | 19 | # Check initial rule 20 | assert grammar.productions[0].rhs[0].name == 'a' 21 | 22 | # Check that A references A_g1 23 | assert grammar.get_productions('a')[0].rhs[0].name == 'a_g1' 24 | 25 | # Check group rule 26 | assert grammar.get_nonterminal('a_g1') 27 | prods = grammar.get_productions('a_g1') 28 | assert len(prods) == 1 29 | assert len(prods[0].rhs) == 2 30 | assert prods[0].rhs[0].name == 'b_0' 31 | assert prods[0].rhs[1].name == 'c' 32 | 33 | 34 | def test_group_with_choice(): 35 | grammar_str = r''' 36 | a: c (b* c | b); 37 | b: c; 38 | terminals 39 | c: "c"; 40 | ''' 41 | grammar = Grammar.from_string(grammar_str) 42 | 43 | # Check initial rule 44 | assert grammar.productions[0].rhs[0].name == 'a' 45 | 46 | # Check that A references A_g1 47 | assert grammar.get_productions('a')[0].rhs[1].name == 'a_g1' 48 | 49 | assert grammar.get_nonterminal('a_g1') 50 | prods = grammar.get_productions('a_g1') 51 | assert len(prods) == 2 52 | assert len(prods[0].rhs) == 2 53 | assert prods[0].rhs[0].name == 'b_0' 54 | assert prods[0].rhs[1].name == 'c' 55 | assert len(prods[1].rhs) == 1 56 | assert prods[1].rhs[0].name == 'b' 57 | 58 | 59 | def test_group_with_metadata(): 60 | grammar_str = r''' 61 | a: (b* c {left} | c); 62 | b: c; 63 | terminals 64 | c: "c"; 65 | ''' 66 | grammar = Grammar.from_string(grammar_str) 67 | assert grammar.get_nonterminal('a_g1') 68 | prods = grammar.get_productions('a_g1') 69 | assert len(prods) == 2 70 | assert len(prods[0].rhs) == 2 71 | assert prods[0].rhs[0].name == 'b_0' 72 | assert prods[0].rhs[1].name == 'c' 73 | assert prods[0].assoc == ASSOC_LEFT 74 | 75 | 76 | def test_group_with_assignment(): 77 | grammar_str = r''' 78 | a: c c=(b* c); 79 | terminals 80 | b: "b"; 81 | c: "c"; 82 | ''' 83 | grammar = Grammar.from_string(grammar_str) 84 | assert grammar.get_nonterminal('a_g1') 85 | prods = grammar.get_productions('a_g1') 86 | assert len(prods) == 1 87 | prods_a = grammar.get_productions('a') 88 | assert len(prods_a) == 1 89 | 90 | assert not prods[0].assignments 91 | assert prods_a[0].assignments 92 | assig_c = prods_a[0].assignments['c'] 93 | assert assig_c.op == '=' 94 | assert assig_c.multiplicity == MULT_ONE 95 | assert assig_c.symbol.name == 'a_g1' 96 | 97 | 98 | def test_group_complex(): 99 | grammar_str = r''' 100 | @obj 101 | s: (b c)*[comma]; 102 | s: (b c)*[comma] a=(a+ (b | c)*)+[comma]; 103 | terminals 104 | a: "a"; 105 | b: "b"; 106 | c: "c"; 107 | comma: ","; 108 | ''' 109 | grammar = Grammar.from_string(grammar_str) 110 | 111 | assert len(grammar.get_productions('s_g1')) == 1 112 | # B | C 113 | prods = grammar.get_productions('s_g3') 114 | assert len(prods) == 2 115 | assert prods[0].rhs[0].name == 'b' 116 | assert prods[1].rhs[0].name == 'c' 117 | 118 | # Nesting 119 | prods = grammar.get_productions('s_g2') 120 | assert len(prods) == 1 121 | assert prods[0].rhs[0].name == 'a_1' 122 | assert prods[0].rhs[1].name == 's_g3_0' 123 | assert grammar.get_productions('s')[1].rhs[1].name == 's_g2_1_comma' 124 | 125 | assert 's_g5' not in grammar 126 | 127 | parser = GLRParser(grammar) 128 | 129 | forest = parser.parse('b c, b c a a a b c c b, a b b') 130 | result = parser.call_actions(forest[0]) 131 | assert result.a == [[['a', 'a', 'a'], 132 | ['b', 'c', 'c', 'b']], [['a'], ['b', 'b']]] 133 | -------------------------------------------------------------------------------- /tests/func/grammar/test_keywords.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test special KEYWORD rule. 3 | """ 4 | # -*- coding: utf-8 -*- 5 | import pytest 6 | 7 | from parglare import Grammar, Parser, RegExRecognizer, StringRecognizer 8 | from parglare.exceptions import GrammarError, ParseError 9 | 10 | 11 | def test_keyword_must_be_regex(): 12 | grammar = r""" 13 | S: "for" name=ID "=" from=INT "to" to=INT; 14 | 15 | terminals 16 | KEYWORD: "id"; 17 | ID: /\w+/; 18 | INT: /\d+/; 19 | """ 20 | 21 | with pytest.raises(GrammarError) as e: 22 | Grammar.from_string(grammar) 23 | 24 | assert 'must have a regex recognizer defined' in str(e.value) 25 | 26 | 27 | def test_keyword_grammar_init(): 28 | grammar = r""" 29 | S: "for" name=ID "=" from=INT "to" to=INT; 30 | 31 | terminals 32 | KEYWORD: /\w+/; 33 | ID: /\w+/; 34 | INT: /\d+/; 35 | """ 36 | 37 | g = Grammar.from_string(grammar) 38 | 39 | # 'for' term matches KEYWORD rule so it'll be replaced by 40 | # RegExRecognizer instance. 41 | for_term = g.get_terminal('for') 42 | assert type(for_term.recognizer) is RegExRecognizer 43 | assert for_term.recognizer._regex == r'\bfor\b' 44 | 45 | # '=' term doesn't match KEYWORD rule so it will not change 46 | eq_term = g.get_terminal('=') 47 | assert type(eq_term.recognizer) is StringRecognizer 48 | 49 | 50 | def test_keyword_matches_on_word_boundary(): 51 | grammar = r""" 52 | S: "for" name=ID "=" from=INT "to" to=INT; 53 | 54 | terminals 55 | ID: /\w+/; 56 | INT: /\d+/; 57 | """ 58 | 59 | g = Grammar.from_string(grammar) 60 | 61 | parser = Parser(g) 62 | # This will not raise an error 63 | parser.parse('forid=10 to20') 64 | 65 | # We add KEYWORD rule to the grammar to match ID-like keywords. 66 | grammar += r"KEYWORD: /\w+/;" 67 | 68 | g = Grammar.from_string(grammar) 69 | parser = Parser(g) 70 | with pytest.raises(ParseError, match='forid=10 t" => Expected: for'): 71 | # This *will* raise an error 72 | parser.parse('forid=10 to20') 73 | with pytest.raises(ParseError, match='Expected: to'): 74 | # This *will* also raise an error 75 | parser.parse('for id=10 to20') 76 | 77 | # But this is OK 78 | parser.parse('for id=10 to 20') 79 | parser.parse('for for=10 to 20') 80 | 81 | 82 | def test_keyword_preferred_over_regexes(): 83 | """ 84 | Test that keyword matches (internally converted to regex matches) are 85 | preferred over ordinary regex matches of the same length. 86 | """ 87 | 88 | grammar = r""" 89 | S: "for"? name=ID? "=" from=INT "to" to=INT; 90 | 91 | terminals 92 | ID: /\w+/; 93 | INT: /\d+/; 94 | KEYWORD: /\w+/; 95 | """ 96 | g = Grammar.from_string(grammar) 97 | 98 | parser = Parser(g) 99 | 100 | # 'for' is ambiguous as it can be keyword or ID(name) 101 | # ParseError could be thrown but parglare will prefer 102 | # StringRecognizer and keywords over RegExRecognizer for 103 | # the match of the same length (i.e. "more specific match") 104 | parser.parse("for = 10 to 100") 105 | -------------------------------------------------------------------------------- /tests/func/grammar/test_load_from_file.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | import os 3 | from parglare import Grammar, Parser 4 | from .calcactions import actions 5 | 6 | 7 | def test_load_from_file(): 8 | 9 | grammar = Grammar.from_file(os.path.join( 10 | os.path.dirname(__file__), 'calc.pg')) 11 | parser = Parser(grammar, actions=actions, debug=True) 12 | 13 | res = parser.parse(""" 14 | a = 5 15 | b = 10 16 | 17 | 56.4 + a / 3 * 5 - b + 8 * 3 18 | """) 19 | 20 | res2 = 56.4 + 5. / 3 * 5 - 10 + 8 * 3 21 | print(res2, res) 22 | assert res == 56.4 + 5. / 3 * 5 - 10 + 8 * 3 23 | -------------------------------------------------------------------------------- /tests/func/grammar/test_terminals.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | import re 3 | from parglare import Parser, Grammar 4 | 5 | 6 | def test_str_terminals(): 7 | g = r""" 8 | A: "a" B C D 'b'; 9 | 10 | terminals 11 | B: "b\""; 12 | C: "\"c\" "; 13 | D: '\'d\''; 14 | """ 15 | grammar = Grammar.from_string(g) 16 | p = Parser(grammar) 17 | tree = p.parse(r''' a b" "c" 'd' b ''') 18 | assert tree 19 | 20 | 21 | def test_regex_terminals(): 22 | g = r""" 23 | A: Aterm B C D 'b'; 24 | C: 'c' Cterm; 25 | 26 | terminals 27 | Aterm: /a\//; 28 | Cterm: /a+/; 29 | B: /a'b[^"]/; 30 | D: /\d+\.\d+/; 31 | """ 32 | grammar = Grammar.from_string(g) 33 | p = Parser(grammar) 34 | tree = p.parse(r''' a/ a'bc c aaaa 4.56 b ''') 35 | assert tree 36 | 37 | # Test that re.VEROSE flag is the default for regex matches 38 | assert grammar.get_terminal('Aterm').recognizer.regex.flags & re.VERBOSE == re.VERBOSE 39 | -------------------------------------------------------------------------------- /tests/func/grammar/test_whitespaces.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from parglare import Grammar, Parser 4 | from parglare.exceptions import ParseError 5 | 6 | from .expression_grammar import get_grammar 7 | 8 | 9 | def test_default_whitespaces(): 10 | 11 | grammar = get_grammar() 12 | p = Parser(grammar) 13 | 14 | p.parse("""id+ id * (id 15 | +id ) 16 | """) 17 | 18 | 19 | def test_whitespace_redefinition(): 20 | 21 | grammar = get_grammar() 22 | 23 | # Make newline treated as non-ws characted 24 | p = Parser(grammar, ws=' \t') 25 | 26 | p.parse("""id+ id * (id +id ) """) 27 | 28 | try: 29 | p.parse("""id+ id * (id 30 | +id ) 31 | """) 32 | except ParseError as e: 33 | assert e.location.start_position == 13 34 | 35 | 36 | def test_whitespace_not_used_if_layout(): 37 | """ 38 | If LAYOUT rule is used, ws definition is ignored. 39 | """ 40 | grammar = """ 41 | S: 'a' 'b'; 42 | LAYOUT: 'k' | EMPTY; 43 | """ 44 | g = Grammar.from_string(grammar) 45 | parser = Parser(g) 46 | with pytest.raises(ParseError): 47 | parser.parse('a b') 48 | -------------------------------------------------------------------------------- /tests/func/import/basic/first.pg: -------------------------------------------------------------------------------- 1 | import 'second.pg'; 2 | import 'submodule/third.pg' as t; 3 | 4 | FirstRule: INT+ second.SecondRule t.ThirdRule; 5 | 6 | terminals 7 | INT: /\d+/; 8 | -------------------------------------------------------------------------------- /tests/func/import/basic/second.pg: -------------------------------------------------------------------------------- 1 | SecondRule: STRING; 2 | 3 | terminals 4 | STRING: /'[^']'/; 5 | -------------------------------------------------------------------------------- /tests/func/import/basic/submodule/third.pg: -------------------------------------------------------------------------------- 1 | ThirdRule: FLOAT; 2 | 3 | terminals 4 | FLOAT: /\d+\.\d+/; 5 | -------------------------------------------------------------------------------- /tests/func/import/basic/test_import.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from parglare import Grammar 4 | 5 | this_folder = os.path.dirname(__file__) 6 | 7 | 8 | def test_import(): 9 | g = Grammar.from_file(os.path.join(this_folder, 'first.pg')) 10 | assert g 11 | -------------------------------------------------------------------------------- /tests/func/import/diamond/base.pg: -------------------------------------------------------------------------------- 1 | terminals 2 | ID: /\w+/; 3 | FQN: /\w+(\.\w+)*/; 4 | COMMA: ','; 5 | -------------------------------------------------------------------------------- /tests/func/import/diamond/components.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | 3 | Component: 4 | 'component' name=base.ID extends=ComponentExtends? '{' 5 | slots=Slot* 6 | '}' 7 | ; 8 | 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA]; 10 | 11 | Slot: SlotIn|SlotOut; 12 | 13 | SlotIn: 'in' name=base.ID; 14 | SlotOut: 'out' name=base.ID; 15 | -------------------------------------------------------------------------------- /tests/func/import/diamond/model.pg: -------------------------------------------------------------------------------- 1 | import 'packages.pg'; 2 | import 'modules.pg' as m; 3 | 4 | Model: 5 | packages=packages.Package* 6 | modules=m.Module* 7 | ; 8 | -------------------------------------------------------------------------------- /tests/func/import/diamond/modules.pg: -------------------------------------------------------------------------------- 1 | import 'components.pg' as c; 2 | import 'base.pg'; 3 | 4 | Module: 'module' name=base.ID '{' 5 | components=c.Component* 6 | '}'; 7 | -------------------------------------------------------------------------------- /tests/func/import/diamond/packages.pg: -------------------------------------------------------------------------------- 1 | import 'components.pg'; 2 | import 'base.pg'; 3 | 4 | Package: 5 | 'package' name=base.ID body=PackageBody? 6 | ; 7 | 8 | PackageBody: 9 | '{' 10 | components=components.Component* 11 | '}' 12 | ; 13 | -------------------------------------------------------------------------------- /tests/func/import/diamond/test_diamond.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from parglare import Grammar, Parser 4 | 5 | this_folder = os.path.dirname(__file__) 6 | 7 | 8 | def test_diamond_import_resolving_and_model_creation(): 9 | g = Grammar.from_file(os.path.join(this_folder, 'model.pg')) 10 | assert g 11 | assert g.get_terminal('packages.components.base.COMMA') 12 | assert g.get_nonterminal('Model') 13 | 14 | # First path used for import of Component is going 15 | # packages->components->Component 16 | component_nonterminal = g.get_nonterminal('packages.components.Component') 17 | assert component_nonterminal 18 | 19 | input_str = ''' 20 | 21 | package First 22 | package Second { 23 | component packageComponent { 24 | 25 | } 26 | } 27 | 28 | module SomeModule { 29 | 30 | component myComponent { 31 | in SomeInputSlot 32 | out SomeOutputSlot 33 | } 34 | 35 | 36 | } 37 | 38 | ''' 39 | 40 | model = Parser(g).parse(input_str) 41 | assert model 42 | assert model.__class__.__name__ == 'Model' 43 | assert isinstance(model.packages, list) 44 | assert len(model.packages) == 2 45 | assert model.packages[0].name == 'First' 46 | assert isinstance(model.modules, list) 47 | assert len(model.modules) == 1 48 | 49 | packageComponent = model.packages[1].body.components[0] 50 | assert packageComponent.name == 'packageComponent' 51 | 52 | module = model.modules[0] 53 | assert module.__class__.__name__ == 'm.Module' 54 | assert module.name == 'SomeModule' 55 | assert len(module.components) == 1 56 | 57 | component = module.components[0] 58 | assert type(component) is type(packageComponent) 59 | assert component.name == 'myComponent' 60 | assert len(component.slots) == 2 61 | 62 | slot = component.slots[1] 63 | assert slot.__class__.__name__ == 'packages.components.SlotOut' 64 | assert slot.name == 'SomeOutputSlot' 65 | -------------------------------------------------------------------------------- /tests/func/import/fqn/A.pg: -------------------------------------------------------------------------------- 1 | import 'B.pg'; 2 | import 'C.pg'; 3 | 4 | ARule: C.CRule ATerm B.BTerm C.CTerm; 5 | 6 | terminals 7 | ATerm: "ATerm"; 8 | -------------------------------------------------------------------------------- /tests/func/import/fqn/B.pg: -------------------------------------------------------------------------------- 1 | import 'C.pg'; 2 | 3 | BRule: BTerm C.CTerm; 4 | 5 | terminals 6 | BTerm: "dummy"; 7 | -------------------------------------------------------------------------------- /tests/func/import/fqn/C.pg: -------------------------------------------------------------------------------- 1 | import 'D.pg'; 2 | 3 | CRule: CTerm; 4 | 5 | terminals 6 | CTerm: "CTerm"; 7 | -------------------------------------------------------------------------------- /tests/func/import/fqn/D.pg: -------------------------------------------------------------------------------- 1 | SomeRule: dummy; 2 | 3 | terminals 4 | dummy: "dummy"; 5 | -------------------------------------------------------------------------------- /tests/func/import/fqn/test_fqn.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from parglare import Grammar 4 | 5 | this_folder = os.path.dirname(__file__) 6 | 7 | 8 | def test_fqn_constructed_by_first_import_path(): 9 | 10 | g = Grammar.from_file(os.path.join(this_folder, 'A.pg')) 11 | 12 | assert g.get_terminal('B.C.CTerm') 13 | assert not g.get_terminal('C.CTerm') 14 | assert g.get_nonterminal('B.C.CRule') 15 | assert not g.get_nonterminal('C.CRule') 16 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_action_name/base.pg: -------------------------------------------------------------------------------- 1 | terminals 2 | ID: /\w+/; 3 | @number NUMERIC_ID: /\d+/; 4 | FQN: /\w+(\.\w+)*/; 5 | COMMA: ','; 6 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_action_name/base_actions.py: -------------------------------------------------------------------------------- 1 | from parglare import get_collector 2 | 3 | action = get_collector() 4 | 5 | 6 | @action 7 | def number(_, value): 8 | return float(value) 9 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_action_name/components.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | 3 | Component: 4 | 'component' name=base.ID extends=ComponentExtends? '{' 5 | slots=Slot* 6 | '}' 7 | ; 8 | 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA]; 10 | 11 | Slot: SlotIn|SlotOut; 12 | 13 | SlotIn: 'in' name=base.ID; 14 | SlotOut: 'out' name=base.ID; 15 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_action_name/model.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | import 'components.pg' as c; 3 | 4 | Model: 5 | 'modelID' modelID = base.NUMERIC_ID 6 | components=c.Component* 7 | ; 8 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_decorator_action_name/base.pg: -------------------------------------------------------------------------------- 1 | terminals 2 | ID: /\w+/; 3 | @number NUMERIC_ID: /\d+/; 4 | FQN: /\w+(\.\w+)*/; 5 | COMMA: ','; 6 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_decorator_action_name/base_actions.py: -------------------------------------------------------------------------------- 1 | from parglare import get_collector 2 | 3 | action = get_collector() 4 | 5 | 6 | @action('number') 7 | def NUMERIC(_, value): 8 | return float(value) 9 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_decorator_action_name/components.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | 3 | Component: 4 | 'component' name=base.ID extends=ComponentExtends? '{' 5 | slots=Slot* 6 | '}' 7 | ; 8 | 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA]; 10 | 11 | Slot: SlotIn|SlotOut; 12 | 13 | SlotIn: 'in' name=base.ID; 14 | SlotOut: 'out' name=base.ID; 15 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_decorator_action_name/model.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | import 'components.pg' as c; 3 | 4 | Model: 5 | 'modelID' modelID = base.NUMERIC_ID 6 | components=c.Component* 7 | ; 8 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_symbol_name/base.pg: -------------------------------------------------------------------------------- 1 | terminals 2 | ID: /\w+/; 3 | NUMERIC_ID: /\d+/; 4 | FQN: /\w+(\.\w+)*/; 5 | COMMA: ','; 6 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_symbol_name/base_actions.py: -------------------------------------------------------------------------------- 1 | from parglare import get_collector 2 | 3 | action = get_collector() 4 | 5 | 6 | @action 7 | def NUMERIC_ID(_, value): 8 | return float(value) 9 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_symbol_name/components.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | 3 | Component: 4 | 'component' name=base.ID extends=ComponentExtends? '{' 5 | slots=Slot* 6 | '}' 7 | ; 8 | 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA]; 10 | 11 | Slot: SlotIn|SlotOut; 12 | 13 | SlotIn: 'in' name=base.ID; 14 | SlotOut: 'out' name=base.ID; 15 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/by_symbol_name/model.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | import 'components.pg' as c; 3 | 4 | Model: 5 | 'modelID' modelID = base.NUMERIC_ID 6 | components=c.Component* 7 | ; 8 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/in_grammar_by_action_name/base.pg: -------------------------------------------------------------------------------- 1 | terminals 2 | ID: /\w+/; 3 | @numeric NUMERIC_ID: /\d+/; 4 | FQN: /\w+(\.\w+)*/; 5 | COMMA: ','; 6 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/in_grammar_by_action_name/base_actions.py: -------------------------------------------------------------------------------- 1 | from parglare import get_collector 2 | 3 | action = get_collector() 4 | 5 | 6 | @action('numeric') 7 | def NUMERIC_ID(_, value): 8 | return float(value) 9 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/in_grammar_by_action_name/components.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | 3 | Component: 4 | 'component' name=base.ID extends=ComponentExtends? '{' 5 | slots=Slot* 6 | '}' 7 | ; 8 | 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA]; 10 | 11 | Slot: SlotIn|SlotOut; 12 | 13 | SlotIn: 'in' name=base.ID; 14 | SlotOut: 'out' name=base.ID; 15 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/in_grammar_by_action_name/model.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | import 'components.pg' as c; 3 | 4 | Model: 5 | 'modelID' modelID = base.NUMERIC_ID 6 | components=c.Component* 7 | ; 8 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/in_grammar_by_action_name/model_actions.py: -------------------------------------------------------------------------------- 1 | from parglare import get_collector 2 | 3 | action = get_collector() 4 | 5 | 6 | @action('base.numeric') 7 | def number(_, value): 8 | "This action is overriding by action name in 'base' module." 9 | return 43 10 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/in_grammar_by_symbol_name/base.pg: -------------------------------------------------------------------------------- 1 | terminals 2 | ID: /\w+/; 3 | NUMERIC_ID: /\d+/; 4 | FQN: /\w+(\.\w+)*/; 5 | COMMA: ','; 6 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/in_grammar_by_symbol_name/base_actions.py: -------------------------------------------------------------------------------- 1 | from parglare import get_collector 2 | 3 | action = get_collector() 4 | 5 | 6 | @action 7 | def NUMERIC_ID(_, value): 8 | return float(value) 9 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/in_grammar_by_symbol_name/components.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | 3 | Component: 4 | 'component' name=base.ID extends=ComponentExtends? '{' 5 | slots=Slot* 6 | '}' 7 | ; 8 | 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA]; 10 | 11 | Slot: SlotIn|SlotOut; 12 | 13 | SlotIn: 'in' name=base.ID; 14 | SlotOut: 'out' name=base.ID; 15 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/in_grammar_by_symbol_name/model.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | import 'components.pg' as c; 3 | 4 | Model: 5 | 'modelID' modelID = base.NUMERIC_ID 6 | components=c.Component* 7 | ; 8 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/in_grammar_by_symbol_name/model_actions.py: -------------------------------------------------------------------------------- 1 | from parglare import get_collector 2 | 3 | action = get_collector() 4 | 5 | 6 | @action('base.NUMERIC_ID') 7 | def numeric(_, value): 8 | return 43 9 | -------------------------------------------------------------------------------- /tests/func/import/imported_actions/test_imported_actions.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from parglare import Grammar, Parser 4 | 5 | this_folder = os.path.dirname(__file__) 6 | 7 | model_str = ''' 8 | modelID 42 9 | component myComponent { 10 | in SomeInputSlot 11 | out SomeOutputSlot 12 | } 13 | ''' 14 | 15 | 16 | def test_imported_actions_connect_by_symbol_name(): 17 | g = Grammar.from_file(os.path.join(this_folder, 'by_symbol_name/model.pg')) 18 | model = Parser(g).parse(model_str) 19 | # Check that base.pg actions are properly loaded and triggered. 20 | assert model.modelID == 42 21 | 22 | 23 | def test_imported_actions_connect_by_action_name(): 24 | g = Grammar.from_file(os.path.join(this_folder, 'by_action_name/model.pg')) 25 | model = Parser(g).parse(model_str) 26 | # Check that base.pg actions are properly loaded and triggered. 27 | assert model.modelID == 42 28 | 29 | 30 | def test_imported_actions_connect_by_decorator_action_name(): 31 | g = Grammar.from_file(os.path.join(this_folder, 32 | 'by_decorator_action_name/model.pg')) 33 | model = Parser(g).parse(model_str) 34 | # Check that base.pg actions are properly loaded and triggered. 35 | assert model.modelID == 42 36 | 37 | 38 | def test_imported_actions_override(): 39 | """ 40 | Test that actions loaded from `*_actions.py` files can be overriden by 41 | users actions. 42 | """ 43 | 44 | # We can override either by fqn of symbol 45 | g = Grammar.from_file(os.path.join(this_folder, 'by_symbol_name/model.pg')) 46 | actions = { 47 | 'base.NUMERIC_ID': lambda _, value: 43 48 | } 49 | model = Parser(g, actions=actions).parse(model_str) 50 | assert model.modelID == 43 51 | 52 | # Or by action name used in grammar for the given symbol 53 | g = Grammar.from_file(os.path.join(this_folder, 'by_action_name/model.pg')) 54 | actions = { 55 | 'base.number': lambda _, value: 43 56 | } 57 | model = Parser(g, actions=actions).parse(model_str) 58 | assert model.modelID == 43 59 | 60 | # Override by FQN takes precendence 61 | g = Grammar.from_file(os.path.join(this_folder, 'by_action_name/model.pg')) 62 | actions = { 63 | 'base.NUMERIC_ID': lambda _, value: 43 64 | } 65 | model = Parser(g, actions=actions).parse(model_str) 66 | assert model.modelID == 43 67 | 68 | 69 | def test_imported_actions_override_by_grammar_actions(): 70 | """ 71 | Test that actions loaded from `*_actions.py` files can override actions 72 | imported from other grammar files. 73 | """ 74 | 75 | g = Grammar.from_file(os.path.join(this_folder, 76 | 'in_grammar_by_symbol_name/model.pg')) 77 | model = Parser(g).parse(model_str) 78 | assert model.modelID == 43 79 | 80 | g = Grammar.from_file(os.path.join(this_folder, 81 | 'in_grammar_by_action_name/model.pg')) 82 | model = Parser(g).parse(model_str) 83 | assert model.modelID == 43 84 | -------------------------------------------------------------------------------- /tests/func/import/imported_recognizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/import/imported_recognizers/__init__.py -------------------------------------------------------------------------------- /tests/func/import/imported_recognizers/base.pg: -------------------------------------------------------------------------------- 1 | terminals 2 | // We will define some of the terminals in `*_recognizers.py` file 3 | ID: /\w+/; 4 | @number NUMERIC_ID: ; 5 | FQN: ; 6 | COMMA: ; 7 | -------------------------------------------------------------------------------- /tests/func/import/imported_recognizers/base_recognizers.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from parglare import get_collector 4 | 5 | recognizer = get_collector() 6 | number_re = re.compile(r'\d+(\.\d+)*') 7 | fqn_re = re.compile(r'\w+(\.\w+)*') 8 | 9 | 10 | @recognizer('NUMERIC_ID') 11 | def number(input, pos): 12 | number_match = number_re.match(input[pos:]) 13 | if number_match: 14 | return input[pos:pos + len(number_match.group())] 15 | 16 | 17 | @recognizer 18 | def FQN(input, pos): 19 | fqn_match = fqn_re.match(input[pos:]) 20 | if fqn_match: 21 | return input[pos:pos + len(fqn_match.group())] 22 | -------------------------------------------------------------------------------- /tests/func/import/imported_recognizers/components.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | 3 | Component: 4 | 'component' name=base.ID extends=ComponentExtends? '{' 5 | slots=Slot* 6 | '}' 7 | ; 8 | 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA]; 10 | 11 | Slot: SlotIn|SlotOut; 12 | 13 | SlotIn: 'in' name=base.ID; 14 | SlotOut: 'out' name=base.ID; 15 | -------------------------------------------------------------------------------- /tests/func/import/imported_recognizers/model.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | import 'components.pg' as c; 3 | 4 | Model: 5 | 'modelID' modelID = base.NUMERIC_ID 6 | components=c.Component* 7 | ; 8 | -------------------------------------------------------------------------------- /tests/func/import/imported_recognizers/model_override.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | import 'components.pg' as c; 3 | 4 | Model: 5 | 'modelID' modelID = base.NUMERIC_ID 6 | components=c.Component* 7 | ; 8 | -------------------------------------------------------------------------------- /tests/func/import/imported_recognizers/model_override_recognizers.py: -------------------------------------------------------------------------------- 1 | from parglare import get_collector 2 | 3 | recognizer = get_collector() 4 | 5 | 6 | @recognizer('base.NUMERIC_ID') 7 | def number(input, pos): 8 | '''Check override''' 9 | pass 10 | 11 | 12 | @recognizer('base.COMMA') 13 | def comma_recognizer(input, pos): 14 | if input[pos] == ',': 15 | return input[pos:pos + 1] 16 | -------------------------------------------------------------------------------- /tests/func/import/imported_recognizers/test_imported_recognizers.py: -------------------------------------------------------------------------------- 1 | import os 2 | from types import FunctionType 3 | 4 | import pytest 5 | 6 | from parglare import Grammar, GrammarError, ParseError, Parser 7 | 8 | from .base_recognizers import number 9 | 10 | this_folder = os.path.dirname(__file__) 11 | 12 | model_str = ''' 13 | modelID 42.23.5 14 | component myComponent extends some.fqn.name { 15 | in SomeInputSlot 16 | out SomeOutputSlot 17 | } 18 | ''' 19 | 20 | 21 | def comma_recognizer(input, pos): 22 | if input[pos] == ',': 23 | return input[pos:pos + 1] 24 | 25 | 26 | def test_imported_recognizers_error_undefined_recognizer(): 27 | 28 | with pytest.raises(GrammarError, 29 | match=r'has no recognizer defined and no recognizers ' 30 | 'are given'): 31 | Grammar.from_file(os.path.join(this_folder, 'model.pg')) 32 | 33 | # If we define COMMA recognizer grammar will construct without exceptions. 34 | g = Grammar.from_file(os.path.join(this_folder, 'model.pg'), 35 | recognizers={'base.COMMA': comma_recognizer}) 36 | assert g 37 | 38 | 39 | def test_imported_recognizers_connect_from_external_file(): 40 | g = Grammar.from_file(os.path.join(this_folder, 'model.pg'), 41 | recognizers={'base.COMMA': comma_recognizer}) 42 | 43 | # Check that recognizers are loaded and connected. 44 | rec_fqn = g.get_terminal('base.FQN') 45 | assert rec_fqn.recognizer 46 | assert type(rec_fqn.recognizer) is FunctionType 47 | assert rec_fqn.recognizer.__name__ == 'FQN' 48 | 49 | rec_fqn = g.get_terminal('base.NUMERIC_ID') 50 | assert rec_fqn.recognizer 51 | assert type(rec_fqn.recognizer) is FunctionType 52 | assert rec_fqn.recognizer.__name__ == 'number' 53 | 54 | 55 | def test_imported_recognizers_override(): 56 | """ 57 | Test that recognizers loaded from `*_recognizers.py` files can be 58 | overriden by users provided recognizers. 59 | """ 60 | 61 | called = [False, False] 62 | 63 | def numeric_id(input, pos): 64 | called[0] = True 65 | 66 | def fqn(input, pos): 67 | called[0] = True 68 | 69 | recognizers = { 70 | 'base.COMMA': comma_recognizer, 71 | 'base.NUMERIC_ID': numeric_id, 72 | 'base.FQN': fqn 73 | } 74 | 75 | g = Grammar.from_file(os.path.join(this_folder, 'model.pg'), 76 | recognizers=recognizers) 77 | assert g 78 | with pytest.raises(ParseError): 79 | Parser(g).parse(model_str) 80 | assert any(called) 81 | 82 | called = [False] 83 | 84 | def numeric_id(input, pos): 85 | called[0] = True 86 | return number(input, pos) 87 | 88 | recognizers = { 89 | 'base.COMMA': comma_recognizer, 90 | 'base.NUMERIC_ID': numeric_id, 91 | } 92 | 93 | g = Grammar.from_file(os.path.join(this_folder, 'model.pg'), 94 | recognizers=recognizers) 95 | assert g 96 | Parser(g).parse(model_str) 97 | assert called[0] 98 | 99 | 100 | def test_imported_recognizers_override_by_importing_grammar_file(): 101 | """ 102 | Test that recognizers loaded from `*_recognizers.py` files can be 103 | overriden in importing grammar `*_recognizers.py` file by providing 104 | FQN of the imported terminal relative from the importing grammar file. 105 | """ 106 | 107 | g = Grammar.from_file(os.path.join(this_folder, 'model_override.pg')) 108 | assert g 109 | 110 | t = g.get_terminal('base.NUMERIC_ID') 111 | assert t is not None 112 | 113 | assert t.recognizer.__doc__ == 'Check override' 114 | -------------------------------------------------------------------------------- /tests/func/import/override/base.pg: -------------------------------------------------------------------------------- 1 | import 'first.pg' as f; 2 | 3 | S: f.s.A; 4 | 5 | // This rule overrides imported rule C from the second grammar 6 | // Each rule that reference old rule C should now 7 | // reference overriden rule. 8 | f.s.C: 'k' f.s.B; 9 | 10 | terminals 11 | f.s.B: 'bb'; // also all rules referencing terminal B now use overriden version 12 | -------------------------------------------------------------------------------- /tests/func/import/override/first.pg: -------------------------------------------------------------------------------- 1 | import 'second.pg' as s; 2 | S: s.A s.C; 3 | 4 | terminals 5 | // This grammar override terminal match; 6 | s.B: 'bf'; 7 | -------------------------------------------------------------------------------- /tests/func/import/override/nonexisting.pg: -------------------------------------------------------------------------------- 1 | import 'first.pg' as f; 2 | 3 | S: f.S; 4 | 5 | f.NonExisting: f.s.B; 6 | -------------------------------------------------------------------------------- /tests/func/import/override/second.pg: -------------------------------------------------------------------------------- 1 | A: B+ C; 2 | C: 'sec' B; 3 | 4 | terminals 5 | B: 'bs'; 6 | -------------------------------------------------------------------------------- /tests/func/import/override/test_override.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from parglare import Grammar, GrammarError, Parser 6 | 7 | this_folder = os.path.dirname(__file__) 8 | 9 | 10 | def test_override_base(): 11 | """ 12 | Test overrides with two level of nesting. 13 | """ 14 | g = Grammar.from_file(os.path.join(this_folder, 'base.pg')) 15 | p = Parser(g) 16 | result = p.parse('bb bb k bb') 17 | assert result 18 | 19 | 20 | def test_override_first(): 21 | """ 22 | Loading grammar from the lower level of import hierarchy works correctly 23 | also. 24 | """ 25 | g = Grammar.from_file(os.path.join(this_folder, 'first.pg')) 26 | p = Parser(g) 27 | result = p.parse('bf bf sec bf sec bf') 28 | assert result 29 | 30 | 31 | def test_override_nonexisting_symbol(): 32 | """ 33 | Test override that doesn't exist. By default it could go unnoticed and 34 | the intended rule would not be overriden. This verifies that typo errors 35 | would not go unnoticed. 36 | """ 37 | with pytest.raises(GrammarError, 38 | match='Unexisting name for symbol override f.NonExisting'): 39 | Grammar.from_file(os.path.join(this_folder, 'nonexisting.pg')) 40 | -------------------------------------------------------------------------------- /tests/func/import/recursion/base.pg: -------------------------------------------------------------------------------- 1 | terminals 2 | ID: /\w+/; 3 | FQN: /\w+(\.\w+)*/; 4 | COMMA: ','; 5 | -------------------------------------------------------------------------------- /tests/func/import/recursion/components.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | import 'modules.pg'; // Recursive import of 'modules' 3 | 4 | Component: 5 | 'component' name=base.ID extends=ComponentExtends? '{' 6 | slots=Slot* 7 | modules=modules.Module* // recursive reference to Module 8 | '}' 9 | ; 10 | 11 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA]; 12 | 13 | Slot: SlotIn|SlotOut; 14 | 15 | SlotIn: 'in' name=base.ID; 16 | SlotOut: 'out' name=base.ID; 17 | -------------------------------------------------------------------------------- /tests/func/import/recursion/model.pg: -------------------------------------------------------------------------------- 1 | import 'packages.pg'; 2 | import 'modules.pg' as m; 3 | 4 | Model: 5 | packages=packages.Package* 6 | modules=m.Module* 7 | ; 8 | -------------------------------------------------------------------------------- /tests/func/import/recursion/modules.pg: -------------------------------------------------------------------------------- 1 | import 'components.pg' as c; // recursive import of 'components' 2 | import 'base.pg'; 3 | 4 | Module: 'module' name=base.ID '{' 5 | components=c.Component* 6 | '}'; 7 | -------------------------------------------------------------------------------- /tests/func/import/recursion/packages.pg: -------------------------------------------------------------------------------- 1 | import 'components.pg'; 2 | import 'base.pg'; 3 | 4 | Package: 5 | 'package' name=base.ID 6 | ; 7 | -------------------------------------------------------------------------------- /tests/func/import/recursion/test_recursion.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from parglare import Grammar, Parser 4 | 5 | this_folder = os.path.dirname(__file__) 6 | 7 | 8 | def test_recursive_grammar_import(): 9 | g = Grammar.from_file(os.path.join(this_folder, 'model.pg')) 10 | assert g 11 | 12 | input_str = ''' 13 | 14 | package First 15 | package Second 16 | 17 | module SomeModule { 18 | 19 | component myComponent { 20 | in SomeInputSlot 21 | out SomeOutputSlot 22 | } 23 | 24 | 25 | } 26 | 27 | ''' 28 | 29 | result = Parser(g).parse(input_str) 30 | assert result 31 | -------------------------------------------------------------------------------- /tests/func/parsing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/parsing/__init__.py -------------------------------------------------------------------------------- /tests/func/parsing/parsing_errors.txt: -------------------------------------------------------------------------------- 1 | id + id * + id 2 | -------------------------------------------------------------------------------- /tests/func/parsing/parsing_from_file.txt: -------------------------------------------------------------------------------- 1 | id + id + id 2 | 3 | + id 4 | -------------------------------------------------------------------------------- /tests/func/parsing/test_build_tree.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | from parglare import Grammar, Parser 3 | 4 | 5 | def test_call_actions_during_tree_build(): 6 | grammar = """ 7 | Program: "begin" MoveCommand* "end"; 8 | MoveCommand: "move" Direction; 9 | Direction: "up" | "down" | "left" | "right"; 10 | """ 11 | 12 | g = Grammar.from_string(grammar) 13 | 14 | code = """ 15 | begin 16 | move left 17 | move left 18 | move up 19 | move down 20 | end 21 | """ 22 | 23 | left_moves = [] 24 | 25 | def left_dir_collector(_, nodes): 26 | """Finds all 'left' moves and adds them into a list.""" 27 | term = nodes[0] 28 | if term.value == "left": 29 | left_moves.append(term) 30 | 31 | parser = Parser(g, build_tree=True, 32 | actions={"Direction": left_dir_collector}) 33 | parser.parse(code) 34 | 35 | # call_actions_during_tree_build is False by default, so left_dir_collector 36 | # will not be called. 37 | assert len(left_moves) == 0 38 | 39 | parser.call_actions_during_tree_build = True 40 | parser.parse(code) 41 | 42 | assert len(left_moves) == 2 43 | -------------------------------------------------------------------------------- /tests/func/parsing/test_conflicts.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | from parglare import Grammar, Parser 3 | from parglare.exceptions import SRConflicts, RRConflicts 4 | 5 | 6 | def test_sr_conflict(): 7 | grammar = """ 8 | S: As A A; 9 | As: As A | A; 10 | 11 | terminals 12 | A:"a"; 13 | """ 14 | g = Grammar.from_string(grammar, _no_check_recognizers=True) 15 | with pytest.raises(SRConflicts) as e: 16 | Parser(g, prefer_shifts=False) 17 | assert "whether to shift or reduce by production(s) '2: As = As A'" in \ 18 | str(e.value.conflicts[0]) 19 | 20 | 21 | def test_rr_empty_conflict(): 22 | grammar = """ 23 | S: A B C | A D C; 24 | B: B1 | EMPTY; 25 | D: D1 | EMPTY; 26 | 27 | terminals 28 | A:; 29 | C:; 30 | B1:; 31 | D1:; 32 | """ 33 | g = Grammar.from_string(grammar, _no_check_recognizers=True) 34 | with pytest.raises(RRConflicts) as e: 35 | Parser(g) 36 | 37 | # For B and D empty reductions both "A B C" and "A D C" can reduce to S 38 | assert "'6: D = EMPTY' or '4: B = EMPTY'" \ 39 | in str(e.value.conflicts[0]) 40 | 41 | 42 | def test_rr_nonempty_conflict(): 43 | grammar = """ 44 | S: A | B; 45 | A: A1 B1; 46 | B: A1 B1; 47 | 48 | terminals 49 | A1: ; 50 | B1: ; 51 | """ 52 | g = Grammar.from_string(grammar, _no_check_recognizers=True) 53 | with pytest.raises(RRConflicts) as e: 54 | Parser(g) 55 | 56 | # "A1 B1" can reduce to both A and B 57 | assert "'4: B = A1 B1' or '3: A = A1 B1'" \ 58 | in str(e.value.conflicts[0]) 59 | -------------------------------------------------------------------------------- /tests/func/parsing/test_dynamic_disambiguation_filters.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | from parglare import GLRParser, Grammar, Parser, SHIFT, REDUCE 3 | from parglare.exceptions import SRConflicts 4 | 5 | 6 | grammar = r""" 7 | E: E op_sum E {dynamic} 8 | | E op_mul E {dynamic} 9 | | number; 10 | 11 | terminals 12 | number: /\d+/; 13 | op_sum: '+' {dynamic}; 14 | op_mul: '*' {dynamic}; 15 | """ 16 | instr1 = '1 + 2 * 5 + 3' 17 | instr2 = '1 * 2 + 5 * 3' 18 | 19 | actions = { 20 | 'E': [lambda _, nodes: nodes[0] + nodes[2], 21 | lambda _, nodes: nodes[0] * nodes[2], 22 | lambda _, nodes: float(nodes[0])] 23 | } 24 | 25 | 26 | g = Grammar.from_string(grammar) 27 | 28 | 29 | operations = [] 30 | 31 | 32 | def custom_disambiguation_filter(context, from_state, to_state, action, 33 | production, subresults): 34 | """ 35 | Make first operation that appears in the input as lower priority. 36 | This demonstrates how priority rule can change dynamically depending 37 | on the input or how disambiguation can be decided during parsing. 38 | """ 39 | global operations 40 | 41 | # At the start of parsing this function is called with actions set to None 42 | # to give a chance for the strategy to initialize. 43 | if action is None: 44 | operations = [] 45 | return 46 | 47 | assert subresults is None or isinstance(subresults, list) 48 | 49 | operation = context.token.symbol if action is SHIFT else context.token_ahead.symbol 50 | 51 | actions = from_state.actions[operation] 52 | if operation not in operations and operation.name != 'STOP': 53 | operations.append(operation) 54 | 55 | if action is SHIFT: 56 | shifts = [a for a in actions if a.action is SHIFT] 57 | if not shifts: 58 | return False 59 | 60 | reductions = [a for a in actions if a.action is REDUCE] 61 | if not reductions: 62 | return True 63 | 64 | red_op = reductions[0].prod.rhs[1] 65 | return operations.index(operation) > operations.index(red_op) 66 | 67 | elif action is REDUCE: 68 | 69 | # Current reduction operation 70 | red_op = production.rhs[1] 71 | 72 | # If operation ahead is STOP or is of less or equal priority -> reduce. 73 | return ((operation not in operations) 74 | or (operations.index(operation) 75 | <= operations.index(red_op))) 76 | 77 | 78 | def test_dynamic_disambiguation(): 79 | """ 80 | Test disambiguation determined at run-time based on the input. 81 | This tests LR parsing. 82 | """ 83 | 84 | # This grammar is ambiguous if no prefer_shift strategy is used. 85 | with pytest.raises(SRConflicts): 86 | Parser(g, prefer_shifts=False) 87 | 88 | # But if we provide dynamic disambiguation filter 89 | # the conflicts can be handled at run-time. 90 | p = Parser(g, actions=actions, prefer_shifts=False, 91 | dynamic_filter=custom_disambiguation_filter) 92 | 93 | # * operation will be of higher priority as it appears later in the stream. 94 | result1 = p.parse(instr1) 95 | assert result1 == 1 + (2 * 5) + 3 96 | 97 | # + operation will be of higher priority here. 98 | result2 = p.parse(instr2) 99 | assert result2 == 1 * (2 + 5) * 3 100 | 101 | 102 | def test_dynamic_disambiguation_glr(): 103 | """ 104 | Test disambiguation determined at run-time based on the input. 105 | This tests GLR parsing. 106 | """ 107 | p = GLRParser(g, actions=actions, 108 | dynamic_filter=custom_disambiguation_filter) 109 | 110 | # * operation will be of higher priority as it appears later in the stream. 111 | result1 = p.parse(instr1) 112 | assert len(result1) == 1 113 | assert p.call_actions(result1[0]) == 1 + (2 * 5) + 3 114 | 115 | # + operation will be of higher priority here. 116 | result2 = p.parse(instr2) 117 | assert len(result2) == 1 118 | assert p.call_actions(result2[0]) == 1 * (2 + 5) * 3 119 | -------------------------------------------------------------------------------- /tests/func/parsing/test_glr_error_recovery.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | from parglare import GLRParser, Grammar, ParseError 3 | from parglare.parser import Token 4 | from parglare.actions import pass_single, pass_inner 5 | 6 | grammar = r""" 7 | E: E '+' E 8 | | E '-' E 9 | | E '*' E 10 | | E '/' E 11 | | E '^' E 12 | | '(' E ')' 13 | | number; 14 | 15 | terminals 16 | number: /\d+(\.\d+)?/; 17 | """ 18 | 19 | actions = { 20 | "E": [lambda _, nodes: nodes[0] + nodes[2], 21 | lambda _, nodes: nodes[0] - nodes[2], 22 | lambda _, nodes: nodes[0] * nodes[2], 23 | lambda _, nodes: nodes[0] / nodes[2], 24 | lambda _, nodes: nodes[0] ** nodes[2], 25 | pass_inner, 26 | pass_single], 27 | "number": lambda _, value: float(value), 28 | } 29 | 30 | g = Grammar.from_string(grammar) 31 | 32 | 33 | def test_glr_recovery_default(): 34 | """ 35 | Test default error recovery in GLR parsing. Default recovery should report 36 | the error, drop current input at position and try to recover. 37 | In case of multiple subsequent errouneous chars only one error should be 38 | reported. 39 | """ 40 | parser = GLRParser(g, actions=actions, error_recovery=True) 41 | 42 | results = parser.parse('1 + 2 + * 3 & 89 - 5') 43 | 44 | assert len(parser.errors) == 2 45 | e1, e2 = parser.errors 46 | 47 | # First errors is '*' at position 8 and of length 2 48 | assert e1.location.start_position == 8 49 | assert e1.location.end_position == 10 50 | 51 | # Second error is '& 89' at position 12 and length 5 52 | assert e2.location.start_position == 12 53 | assert e2.location.end_position == 17 54 | 55 | # There are 5 trees for '1 + 2 + 3 - 5' 56 | # All results are the same 57 | assert len(results) == 5 58 | result_set = set([parser.call_actions(tree) for tree in results]) 59 | assert len(result_set) == 1 60 | assert 1 in set(result_set) 61 | 62 | 63 | def test_glr_recovery_custom_new_position(): 64 | """ 65 | Test that custom recovery that increment position works. 66 | """ 67 | 68 | def custom_recovery(head, error): 69 | # This recovery will just skip over erroneous part of input '& 89'. 70 | head.position += 4 71 | return head.parser.default_error_recovery(head) 72 | 73 | parser = GLRParser(g, actions=actions, error_recovery=custom_recovery) 74 | 75 | results = parser.parse('1 + 5 & 89 - 2') 76 | 77 | assert len(parser.errors) == 1 78 | assert len(results) == 2 79 | result_set = set([parser.call_actions(tree) for tree in results]) 80 | assert len(result_set) == 1 81 | # Calculated result should be '1 + 5 - 2' 82 | assert result_set.pop() == 4 83 | 84 | 85 | def test_glr_recovery_custom_new_token(): 86 | """ 87 | Test that custom recovery that introduces new token works. 88 | """ 89 | 90 | def custom_recovery(head, error): 91 | # Here we will introduce missing operation token 92 | head.token_ahead = Token(g.get_terminal('-'), '-', head.position, length=0) 93 | return True 94 | 95 | parser = GLRParser(g, actions=actions, error_recovery=custom_recovery) 96 | 97 | results = parser.parse('1 + 5 8 - 2') 98 | 99 | assert len(parser.errors) == 1 100 | assert len(results) == 5 101 | result_set = set([parser.call_actions(tree) for tree in results]) 102 | assert len(result_set) == 2 103 | assert -4 in result_set 104 | assert 0 in result_set 105 | 106 | 107 | def test_glr_recovery_custom_unsuccessful(): 108 | """ 109 | Test unsuccessful error recovery. 110 | """ 111 | 112 | def custom_recovery(head, error): 113 | return False 114 | 115 | parser = GLRParser(g, actions=actions, error_recovery=custom_recovery) 116 | 117 | with pytest.raises(ParseError) as e: 118 | parser.parse('1 + 5 8 - 2') 119 | 120 | error = e.value 121 | assert error.location.start_position == 6 122 | -------------------------------------------------------------------------------- /tests/func/parsing/test_parse_context.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | from parglare import Grammar, Parser 3 | from parglare.actions import pass_single 4 | 5 | 6 | grammar = r""" 7 | E: E '+' E {left} 8 | | number; 9 | 10 | terminals 11 | number: /\d+(\.\d+)?/; 12 | """ 13 | 14 | called = [False, False] 15 | 16 | 17 | def act_sum(is_tree): 18 | def act_sum(context, nodes): 19 | called[0] = True 20 | assert context.parser 21 | assert context.state.symbol.name == 'E' 22 | assert context.production.symbol.name == 'E' 23 | assert len(context.production.rhs) == 3 24 | assert context.layout_content == ' ' 25 | assert context.start_position == 3 26 | assert context.end_position == 8 27 | if is_tree: 28 | # If parse tree is constructed `node` is available on 29 | # the context. 30 | assert context.node.is_nonterm() \ 31 | and context.node.symbol.name == 'E' 32 | else: 33 | assert context.node is None 34 | 35 | return act_sum 36 | 37 | 38 | def act_number(context, value): 39 | called[1] = True 40 | value = float(value) 41 | assert context.symbol.name == 'number' 42 | if value == 1: 43 | assert context.start_position == 3 44 | assert context.end_position == 4 45 | assert context.layout_content == ' ' 46 | else: 47 | assert context.start_position == 7 48 | assert context.end_position == 8 49 | assert context.layout_content == ' ' 50 | return value 51 | 52 | 53 | actions = { 54 | "Result": pass_single, 55 | "E": [None, pass_single], 56 | "number": act_number, 57 | } 58 | 59 | g = Grammar.from_string(grammar) 60 | 61 | 62 | def test_parse_context(): 63 | global called 64 | called = [False, False] 65 | 66 | actions["E"][0] = act_sum(is_tree=False) 67 | parser = Parser(g, actions=actions) 68 | 69 | parser.parse(" 1 + 2 ") 70 | 71 | assert all(called) 72 | 73 | 74 | def test_parse_context_call_actions(): 75 | """ 76 | Test that valid context attributes are available when calling 77 | actions using `call_actions`. 78 | """ 79 | global called 80 | called = [False, False] 81 | 82 | actions["E"][0] = act_sum(is_tree=True) 83 | parser = Parser(g, build_tree=True, actions=actions) 84 | 85 | tree = parser.parse(" 1 + 2 ") 86 | 87 | parser.call_actions(tree) 88 | 89 | assert all(called) 90 | -------------------------------------------------------------------------------- /tests/func/parsing/test_parsing.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | 3 | import pytest 4 | 5 | from parglare import Grammar, Parser 6 | from parglare.exceptions import ParseError 7 | 8 | from ..grammar.expression_grammar import get_grammar 9 | 10 | 11 | def test_parsing(): 12 | grammar = get_grammar() 13 | p = Parser(grammar) 14 | assert p.parse("id+id+id") 15 | 16 | 17 | def test_parsing_from_file(): 18 | grammar = get_grammar() 19 | p = Parser(grammar) 20 | assert p.parse_file(join(dirname(__file__), 'parsing_from_file.txt')) 21 | 22 | 23 | def test_partial_parse(): 24 | """ 25 | Test `consume_input` parser parameter. 26 | """ 27 | grammar = """ 28 | S: 'a' B; 29 | B: 'b'; 30 | """ 31 | g = Grammar.from_string(grammar) 32 | parser = Parser(g, consume_input=False) 33 | 34 | # Parser should succesfuly parse 'ab' at the beginning. 35 | parser.parse('abc') 36 | 37 | # But if `consume_input` is not set to `False` it should be `True` by 38 | # default and the parser will not accept partial parses. 39 | grammar = """ 40 | S: 'a' B; 41 | B: 'b'; 42 | """ 43 | g = Grammar.from_string(grammar) 44 | parser = Parser(g) 45 | parser.parse('a b') 46 | with pytest.raises(ParseError): 47 | parser.parse('a b c') 48 | -------------------------------------------------------------------------------- /tests/func/parsing/test_parsing_errors.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | import os 3 | from parglare import Grammar, Parser, GLRParser, ParseError 4 | from ..grammar.expression_grammar import get_grammar 5 | 6 | 7 | parsers = pytest.mark.parametrize("parser_class", [Parser, GLRParser]) 8 | 9 | 10 | @parsers 11 | def test_grammar_in_error(parser_class): 12 | 13 | grammar = get_grammar() 14 | p = parser_class(grammar) 15 | 16 | with pytest.raises(ParseError) as e: 17 | p.parse("id+id*+id") 18 | 19 | assert e.value.grammar is grammar 20 | 21 | 22 | def test_glr_last_heads_in_error(): 23 | 24 | grammar = get_grammar() 25 | p = GLRParser(grammar) 26 | 27 | with pytest.raises(ParseError) as e: 28 | p.parse("id+id*+id") 29 | 30 | assert len(e.value.last_heads) == 1 31 | 32 | 33 | @parsers 34 | def test_invalid_input(parser_class): 35 | 36 | grammar = get_grammar() 37 | p = parser_class(grammar) 38 | 39 | with pytest.raises(ParseError) as e: 40 | p.parse("id+id*+id") 41 | 42 | assert e.value.location.start_position == 6 43 | assert "(" in str(e.value) 44 | assert "id" in str(e.value) 45 | assert '*' in [s.name for s in e.value.symbols_before] 46 | assert '+' in [t.value for t in e.value.tokens_ahead] 47 | expected_names = [s.name for s in e.value.symbols_expected] 48 | assert 'id' in expected_names 49 | assert '(' in expected_names 50 | 51 | 52 | @parsers 53 | def test_premature_end(parser_class): 54 | 55 | grammar = get_grammar() 56 | p = parser_class(grammar) 57 | 58 | with pytest.raises(ParseError) as e: 59 | p.parse("id+id*") 60 | 61 | assert e.value.location.start_position == 6 62 | expected_names = [s.name for s in e.value.symbols_expected] 63 | assert 'id' in expected_names 64 | assert '(' in expected_names 65 | assert '*' in [s.name for s in e.value.symbols_before] 66 | assert e.value.tokens_ahead == [] 67 | 68 | 69 | def test_ambiguous_glr(): 70 | grammar = r""" 71 | E: E '+' E 72 | | E '*' E 73 | | number; 74 | 75 | terminals 76 | number: /\d+(\.\d+)?/; 77 | """ 78 | g = Grammar.from_string(grammar) 79 | parser = GLRParser(g) 80 | 81 | with pytest.raises(ParseError) as e: 82 | parser.parse("1 + 2 * 3 / 5") 83 | 84 | assert e.value.location.start_position == 10 85 | assert 'number' in [s.name for s in e.value.symbols_before] 86 | 87 | 88 | @parsers 89 | def test_line_column(parser_class): 90 | grammar = get_grammar() 91 | p = parser_class(grammar) 92 | 93 | with pytest.raises(ParseError) as e: 94 | p.parse("""id + id * id + id + error * id""") 95 | 96 | loc = e.value.location 97 | assert loc.start_position == 20 98 | assert loc.line == 1 99 | assert loc.column == 20 100 | 101 | with pytest.raises(ParseError) as e: 102 | p.parse("""id + id * id + id + error * id 103 | 104 | """) 105 | loc = e.value.location 106 | assert loc.start_position == 20 107 | assert loc.line == 1 108 | assert loc.column == 20 109 | 110 | with pytest.raises(ParseError) as e: 111 | p.parse(""" 112 | 113 | id + id * id + id + error * id""") 114 | loc = e.value.location 115 | assert loc.start_position == 22 116 | assert loc.line == 3 117 | assert loc.column == 20 118 | 119 | with pytest.raises(ParseError) as e: 120 | p.parse(""" 121 | 122 | id + id * id + id + error * id 123 | 124 | """) 125 | loc = e.value.location 126 | assert loc.start_position == 22 127 | assert loc.line == 3 128 | assert loc.column == 20 129 | 130 | 131 | @parsers 132 | def test_file_name(parser_class): 133 | "Test that file name is given in the error string when parsing file." 134 | grammar = get_grammar() 135 | p = parser_class(grammar) 136 | 137 | input_file = os.path.join(os.path.dirname(__file__), 138 | 'parsing_errors.txt') 139 | 140 | with pytest.raises(ParseError) as e: 141 | p.parse_file(input_file) 142 | 143 | assert 'parsing_errors.txt' in str(e.value) 144 | assert 'parsing_errors.txt' in e.value.location.file_name 145 | -------------------------------------------------------------------------------- /tests/func/parsing/test_to_dot.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | from parglare import Parser, GLRParser, Grammar 3 | from ..grammar.expression_grammar import get_grammar 4 | 5 | 6 | def test_to_dot(): 7 | 8 | grammar = get_grammar() 9 | p = Parser(grammar, build_tree=True) 10 | 11 | res = p.parse("""id+ id * (id 12 | +id ) 13 | """) 14 | 15 | ts = res.to_dot() 16 | 17 | assert '[label="T[11-13]"];' in ts 18 | assert '[label="+[2-3]"];' in ts 19 | 20 | 21 | def test_forest_to_dot(): 22 | 23 | grammar = Grammar.from_string(r''' 24 | E: E "+" E | E "-" E | "(" E ")" | "id"; 25 | ''') 26 | p = GLRParser(grammar) 27 | 28 | forest = p.parse("""id+ id - (id 29 | +id ) 30 | """) 31 | 32 | ts = forest.to_dot() 33 | 34 | assert '[label="+[18-19]"];' in ts 35 | assert '[label="E[5-7]"];' in ts 36 | assert '[label="Amb(E[0-24],2)" shape=box];' in ts 37 | -------------------------------------------------------------------------------- /tests/func/parsing/test_to_str.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | from parglare import Parser, GLRParser, Grammar 3 | from ..grammar.expression_grammar import get_grammar 4 | 5 | 6 | def test_parse_tree_to_str(): 7 | 8 | grammar = get_grammar() 9 | p = Parser(grammar, build_tree=True) 10 | 11 | res = p.parse("""id+ id * (id 12 | +id ) 13 | """) 14 | 15 | ts = res.to_str() 16 | 17 | assert '+[18->19, "+"]' in ts 18 | assert ')[23->24, ")"]' in ts 19 | assert 'F[10->24]' in ts 20 | 21 | 22 | def test_forest_to_str(): 23 | 24 | grammar = Grammar.from_string(r''' 25 | E: E "+" E | E "-" E | "(" E ")" | "id"; 26 | ''') 27 | p = GLRParser(grammar) 28 | 29 | forest = p.parse("""id+ id - (id 30 | +id ) 31 | """) 32 | 33 | ts = forest.to_str() 34 | 35 | assert 'E - ambiguity[2]' in ts 36 | assert 'E[10->24]' in ts 37 | assert ' E[11->21]' in ts 38 | assert ' +[18->19, "+"]' in ts 39 | 40 | 41 | def test_ast_to_str(): 42 | """ 43 | Test produced str tree from dynamically constructed AST object. 44 | """ 45 | grammar = r""" 46 | S: "1" second=Second third=Third+ fourth=Fourth; 47 | Second: val="2"; 48 | Third: "3"; 49 | Fourth: "4" val=Second; 50 | """ 51 | 52 | g = Grammar.from_string(grammar) 53 | parser = Parser(g) 54 | 55 | result = parser.parse('1 2 3 3 3 4 2') 56 | print(result.to_str()) 57 | assert result.to_str().strip() == """ 58 | S [0->13] 59 | second=Second [2->3] 60 | val='2' 61 | third= [ 62 | '3' 63 | '3' 64 | '3' 65 | ] 66 | fourth=Fourth [10->13] 67 | val=Second [12->13] 68 | val='2'""".strip() 69 | 70 | 71 | def test_ast_to_str_with_bnf_extensions(): 72 | """ 73 | Tests `to_str` with lists returned by BNF extensions. 74 | """ 75 | grammar = r""" 76 | S: "1" second=Second third=Third+ fourth=Fourth; 77 | Second: val="2"; 78 | Third: val="3"; 79 | Fourth: "4" val=Second; 80 | """ 81 | 82 | g = Grammar.from_string(grammar) 83 | parser = Parser(g) 84 | 85 | result = parser.parse('1 2 3 3 3 4 2') 86 | print(result.to_str()) 87 | assert result.to_str().strip() == """ 88 | S [0->13] 89 | second=Second [2->3] 90 | val='2' 91 | third= [ 92 | Third [4->5] 93 | val='3' 94 | Third [6->7] 95 | val='3' 96 | Third [8->9] 97 | val='3' 98 | ] 99 | fourth=Fourth [10->13] 100 | val=Second [12->13] 101 | val='2'""".strip() 102 | -------------------------------------------------------------------------------- /tests/func/persistence/calc_with_actions/calc.pg: -------------------------------------------------------------------------------- 1 | import 'variable.pg' as v; 2 | 3 | Calc: Assignments E; 4 | @collect_optional 5 | Assignments: Assignments Assignment | Assignment | EMPTY; 6 | Assignment: v.VariableName "=" Number; 7 | 8 | E: E "+" E {left, 1} 9 | | E "-" E {left, 1} 10 | | E "*" E {left, 2} 11 | | E "/" E {left, 2} 12 | | "(" E ")" 13 | | v.VariableRef 14 | | Number 15 | ; 16 | 17 | terminals 18 | Number: /\d+(\.\d+)?/; 19 | -------------------------------------------------------------------------------- /tests/func/persistence/calc_with_actions/calc_actions.py: -------------------------------------------------------------------------------- 1 | from parglare import get_collector 2 | 3 | action = get_collector() 4 | 5 | 6 | @action 7 | def Calc(_, nodes): 8 | return nodes[-1] 9 | 10 | 11 | @action 12 | def Assignment(context, nodes): 13 | var_name, _, value = nodes 14 | context.extra[var_name] = float(value) 15 | 16 | 17 | E = [ 18 | lambda _, n: n[0] + n[2], 19 | lambda _, n: n[0] - n[2], 20 | lambda _, n: n[0] * n[2], 21 | lambda _, n: n[0] / n[2], 22 | lambda _, n: n[1], 23 | lambda context, n: context.extra[n[0]], 24 | lambda _, n: float(n[0]) 25 | ] 26 | action('E')(E) 27 | -------------------------------------------------------------------------------- /tests/func/persistence/calc_with_actions/test_table_persistance.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import os 3 | import time 4 | 5 | from parglare import Grammar, Parser 6 | 7 | this_folder = os.path.dirname(__file__) 8 | 9 | 10 | def test_save_load_table(): 11 | """ 12 | Test basic table save/load cycle with table file creation. 13 | """ 14 | calc_file = os.path.join(this_folder, 'calc.pg') 15 | variable_file = os.path.join(this_folder, 'variable.pg') 16 | input_str = 'a = 5 1 + 2 * a - 7' 17 | input_str_result = 1 + 2 * 5 - 7 18 | grammar = Grammar.from_file(calc_file) 19 | 20 | table_file = os.path.join(this_folder, 'calc.pgt') 21 | # remove table file if exists 22 | with contextlib.suppress(OSError): 23 | os.remove(table_file) 24 | 25 | parser = Parser(grammar) 26 | assert parser.parse(input_str) == input_str_result 27 | 28 | # Table file must be produced by parser construction. 29 | assert os.path.exists(table_file) 30 | 31 | last_mtime = os.path.getmtime(table_file) 32 | time.sleep(1) 33 | 34 | parser = Parser(grammar) 35 | 36 | # Last generated table should be used during parser construction. 37 | # Currently, it is hard to check this so we'll only check if 38 | # table_file is not regenerated. 39 | assert last_mtime == os.path.getmtime(table_file) 40 | # Parser constructed from persisted table should produce the same result. 41 | assert parser.parse(input_str) == input_str_result 42 | 43 | # We are now touching variable.pg file 44 | # This should trigger table file regeneration 45 | with open(variable_file, 'a'): 46 | os.utime(variable_file, None) 47 | parser = Parser(grammar) 48 | assert parser.parse(input_str) == input_str_result 49 | # We verify that the table file is newer. 50 | assert last_mtime < os.path.getmtime(table_file) 51 | 52 | # Now we test that force_load_table will load table even if not 53 | # newer than the grammar. 54 | time.sleep(1) 55 | with open(variable_file, 'a'): 56 | os.utime(variable_file, None) 57 | last_mtime = os.path.getmtime(table_file) 58 | parser = Parser(grammar, force_load_table=True) 59 | assert last_mtime == os.path.getmtime(table_file) 60 | parser = Parser(grammar) 61 | assert last_mtime < os.path.getmtime(table_file) 62 | -------------------------------------------------------------------------------- /tests/func/persistence/calc_with_actions/variable.pg: -------------------------------------------------------------------------------- 1 | VariableRef: VariableName; 2 | 3 | terminals 4 | VariableName: /[a-zA-Z_][_a-zA-Z0-9]*/; 5 | Number: /\d+(\.\d+)?/; 6 | -------------------------------------------------------------------------------- /tests/func/persistence/compare_table/base.pg: -------------------------------------------------------------------------------- 1 | terminals 2 | ID: /\w+/; 3 | FQN: /\w+(\.\w+)*/; 4 | COMMA: ','; 5 | -------------------------------------------------------------------------------- /tests/func/persistence/compare_table/components.pg: -------------------------------------------------------------------------------- 1 | import 'base.pg'; 2 | 3 | Component: 4 | 'component' name=base.ID extends=ComponentExtends? '{' 5 | slots=Slot* 6 | '}' 7 | ; 8 | 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA]; 10 | 11 | Slot: SlotIn|SlotOut; 12 | 13 | SlotIn: 'in' name=base.ID; 14 | SlotOut: 'out' name=base.ID; 15 | -------------------------------------------------------------------------------- /tests/func/persistence/compare_table/model.pg: -------------------------------------------------------------------------------- 1 | import 'packages.pg'; 2 | import 'modules.pg' as m; 3 | 4 | Model: 5 | packages=packages.Package* 6 | modules=m.Module* 7 | ; 8 | -------------------------------------------------------------------------------- /tests/func/persistence/compare_table/modules.pg: -------------------------------------------------------------------------------- 1 | import 'components.pg' as c; 2 | import 'base.pg'; 3 | 4 | Module: 'module' name=base.ID '{' 5 | components=c.Component* 6 | '}'; 7 | -------------------------------------------------------------------------------- /tests/func/persistence/compare_table/packages.pg: -------------------------------------------------------------------------------- 1 | import 'components.pg'; 2 | import 'base.pg'; 3 | 4 | Package: 5 | 'package' name=base.ID body=PackageBody? 6 | ; 7 | 8 | PackageBody: 9 | '{' 10 | components=components.Component* 11 | '}' 12 | ; 13 | -------------------------------------------------------------------------------- /tests/func/persistence/compare_table/test_compare_table.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import filecmp 3 | import os 4 | 5 | from parglare import Grammar, Parser 6 | 7 | this_folder = os.path.dirname(__file__) 8 | 9 | input_str = ''' 10 | 11 | package First 12 | package Second { 13 | component packageComponent { 14 | 15 | } 16 | } 17 | 18 | module SomeModule { 19 | 20 | component myComponent { 21 | in SomeInputSlot 22 | out SomeOutputSlot 23 | } 24 | 25 | 26 | } 27 | 28 | ''' 29 | 30 | 31 | def test_diamond_import_resolving_and_model_creation(): 32 | 33 | grammar_file = os.path.join(this_folder, 'model.pg') 34 | table_file = os.path.join(this_folder, 'model.pgt') 35 | table_cmp_file = os.path.join(this_folder, 'model_compare.pgt') 36 | 37 | g = Grammar.from_file(grammar_file) 38 | with contextlib.suppress(Exception): 39 | os.remove(table_file) 40 | 41 | parser = Parser(g) 42 | 43 | # Check generated table file. 44 | assert filecmp.cmp(table_file, table_cmp_file, shallow=False) 45 | 46 | # Check that parser loaded from the table will correctly parse 47 | parser = Parser(g, force_load_table=True) 48 | 49 | model = parser.parse(input_str) 50 | assert model 51 | assert model.__class__.__name__ == 'Model' 52 | assert isinstance(model.packages, list) 53 | assert len(model.packages) == 2 54 | assert model.packages[0].name == 'First' 55 | assert isinstance(model.modules, list) 56 | assert len(model.modules) == 1 57 | 58 | packageComponent = model.packages[1].body.components[0] 59 | assert packageComponent.name == 'packageComponent' 60 | 61 | module = model.modules[0] 62 | assert module.__class__.__name__ == 'm.Module' 63 | assert module.name == 'SomeModule' 64 | assert len(module.components) == 1 65 | 66 | component = module.components[0] 67 | assert type(component) is type(packageComponent) 68 | assert component.name == 'myComponent' 69 | assert len(component.slots) == 2 70 | 71 | slot = component.slots[1] 72 | assert slot.__class__.__name__ == 'packages.components.SlotOut' 73 | assert slot.name == 'SomeOutputSlot' 74 | -------------------------------------------------------------------------------- /tests/func/pglr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/pglr/__init__.py -------------------------------------------------------------------------------- /tests/func/pglr/grammar.pg: -------------------------------------------------------------------------------- 1 | A: B C D; 2 | B: C D; 3 | 4 | // Terminal rules without recognizers 5 | terminals 6 | C:; 7 | D:; 8 | -------------------------------------------------------------------------------- /tests/func/pglr/test_pglr.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | import subprocess 3 | import os 4 | import contextlib 5 | 6 | CURRENT_DIR = os.path.abspath(os.path.dirname(__file__)) 7 | GRAMMAR_FILE = os.path.join(CURRENT_DIR, 'grammar.pg') 8 | 9 | 10 | @pytest.mark.skipif("TRAVIS" in os.environ 11 | and os.environ["TRAVIS"] == "true", 12 | reason="Test fails under TRAVIS") 13 | def test_pglr_check(): 14 | """ 15 | Test pglr command for grammar checking. 16 | """ 17 | result = subprocess.call(['pglr', 'compile', GRAMMAR_FILE]) 18 | assert result == 0 19 | 20 | 21 | @pytest.mark.skipif("TRAVIS" in os.environ 22 | and os.environ["TRAVIS"] == "true", 23 | reason="Test fails under TRAVIS") 24 | def test_pglr_viz(): 25 | """ 26 | Test pglr command for PDA visualization. 27 | """ 28 | DOT_FILE = os.path.join(CURRENT_DIR, f'{GRAMMAR_FILE}.dot') 29 | with contextlib.suppress(Exception): 30 | os.remove(DOT_FILE) 31 | assert not os.path.exists(DOT_FILE) 32 | result = subprocess.call(['pglr', '--no-colors', 'viz', GRAMMAR_FILE]) 33 | assert result == 0 34 | assert os.path.exists(DOT_FILE) 35 | with open(DOT_FILE) as f: 36 | assert 'digraph grammar' in f.read() 37 | -------------------------------------------------------------------------------- /tests/func/recognizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/recognizers/__init__.py -------------------------------------------------------------------------------- /tests/func/recognizers/collector/grammar.pg: -------------------------------------------------------------------------------- 1 | Model: INT+ Rule1 INT; 2 | Rule1: a=STRING; 3 | 4 | terminals 5 | INT:; 6 | STRING:; 7 | -------------------------------------------------------------------------------- /tests/func/recognizers/collector/test_recognizers_get_collector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | import pytest 5 | 6 | from parglare import Grammar, GrammarError, Parser, get_collector 7 | 8 | THIS_FOLDER = os.path.abspath(os.path.dirname(__file__)) 9 | 10 | 11 | def test_recognizer_explicit_get_collector(): 12 | """ 13 | Test the basic usage of `get_collector` API where we don't provide 14 | recognizers in a separate python module. 15 | """ 16 | 17 | recognizer = get_collector() 18 | 19 | @recognizer 20 | def INT(input, pos): 21 | return re.compile(r'\d+').match(input[pos:]) 22 | 23 | @recognizer 24 | def STRING(input, pos): 25 | return re.compile(r'\d+').match(input[pos:]) 26 | 27 | grammar = Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg'), 28 | recognizers=recognizer.all) 29 | parser = Parser(grammar) 30 | assert parser 31 | 32 | 33 | def test_recognizer_explicit_get_collector_missing_recognizer(): 34 | """ 35 | Test when `get_collector` has a terminal without defined recognizer an 36 | exception is raised. 37 | """ 38 | 39 | recognizer = get_collector() 40 | 41 | @recognizer 42 | def INT(input, pos): 43 | return re.compile(r'\d+').match(input[pos:]) 44 | 45 | with pytest.raises(GrammarError, 46 | match=r'Terminal "STRING" has no recognizer defined.'): 47 | Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg'), 48 | recognizers=recognizer.all) 49 | 50 | 51 | def test_recognizer_explicit_get_collector_recognizer_for_unexisting_terminal(): # noqa 52 | """ 53 | Test for situation when `get_collector` has a recognizer for un-existing 54 | terminal. 55 | """ 56 | 57 | recognizer = get_collector() 58 | 59 | @recognizer 60 | def INT(input, pos): 61 | return re.compile(r'\d+').match(input[pos:]) 62 | 63 | @recognizer 64 | def STRING(input, pos): 65 | return re.compile(r'\d+').match(input[pos:]) 66 | 67 | @recognizer 68 | def STRING2(input, pos): 69 | return re.compile(r'\d+').match(input[pos:]) 70 | 71 | grammar = Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg'), 72 | recognizers=recognizer.all) 73 | parser = Parser(grammar) 74 | assert parser 75 | -------------------------------------------------------------------------------- /tests/func/recognizers/test_recognizer_context.py: -------------------------------------------------------------------------------- 1 | import pytest # noqa 2 | from parglare import Grammar, Parser 3 | import re 4 | 5 | 6 | def test_recognizer_context(): 7 | grammar = """ 8 | program: expression+[semicolon]; 9 | expression: term+; 10 | 11 | terminals 12 | semicolon: ";"; 13 | term:; 14 | """ 15 | 16 | term_re = re.compile(r"[a-zA-Z_]+") 17 | 18 | def term(context, input, pos): 19 | match = term_re.match(input, pos) 20 | if match is None: 21 | return None 22 | return input[pos:match.end()] 23 | 24 | g = Grammar.from_string(grammar, recognizers={'term': term}) 25 | parser = Parser(g) 26 | assert parser.parse("a bb cc; d ee f; g hh i") 27 | -------------------------------------------------------------------------------- /tests/func/regressions/issue38/names.pg: -------------------------------------------------------------------------------- 1 | LINE: FIO|SYMBOL; 2 | 3 | terminals 4 | FIO: /МИША|САША/ {15}; 5 | SYMBOL: /\w+/; 6 | -------------------------------------------------------------------------------- /tests/func/regressions/issue38/test_issue_38_unicode_py2.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from parglare import Grammar, Parser 4 | 5 | 6 | def test_grammar_with_unicode(): 7 | this_folder = os.path.dirname(__file__) 8 | grammar = Grammar.from_file(os.path.join(this_folder, "names.pg")) 9 | parser = Parser(grammar, consume_input=False) 10 | inp = 'МИША МЫЛ РАМУ' 11 | result = parser.parse(inp) 12 | assert result 13 | -------------------------------------------------------------------------------- /tests/func/regressions/issue52/grammar.pg: -------------------------------------------------------------------------------- 1 | PROG: EXPR+; 2 | 3 | ///// SIMPLE EXPRESSIONS ///// 4 | 5 | EXPR: BUILTIN '(' EXPR* ')' {800} 6 | | 'if' '(' EXPR_PAIR+ EXPR ')' {800} 7 | | 'switch' '(' EXPR EXPR_PAIR+ EXPR ')' {800} 8 | | '[' EXPR 'for' KVP+ ']' 9 | | OOO_PAREN EXPR ')' 10 | | EXPR F_PAREN EXPR+ KVP* ')' {left, 700} 11 | | EXPR DOT POST_DOT {left, 700} 12 | | EXPR '%' EXPR {left, 600} 13 | | EXPR '^' EXPR {left, 600} 14 | | EXPR '+' EXPR {left, 500} 15 | | EXPR '/' EXPR {left, 500} 16 | | EXPR '\\' EXPR {left, 500} 17 | // logical operators on string expressions 18 | // lower precedence than string-string operators, 19 | // higher than pure logic operators 20 | | EXPR '==' EXPR {left, 450} 21 | | EXPR '=~' EXPR {left, 450} 22 | // logical operators, which can't really be chained with string operators 23 | // (even if a nonzero-length string can evaluate to "true"): 24 | // !a.b.c+e.f makes no sense as !(a.b.c)+e.f -> (false)+e.f 25 | // so they will have lower precedence 26 | | '!' EXPR {right, 400} 27 | | EXPR '&&' EXPR {left, 300} 28 | | EXPR '||' EXPR {left, 250} 29 | | LITERAL {200} 30 | | VAR {100}; 31 | 32 | POST_DOT: MODIFIER '(' EXPR* ')' {1000} 33 | | VAR {100}; 34 | 35 | ////// COMPOUND EXPRESSIONS ////// 36 | 37 | KVP: VAR '=' EXPR; 38 | EXPR_PAIR: EXPR ':' EXPR; 39 | 40 | ///// MODIFIERS ///// 41 | 42 | MODIFIER: 'strip' | 'lower' | 'pre' | 'post' | 'regex' | 'wrap'; 43 | 44 | ///// BUILTINS ///// 45 | 46 | BUILTIN: 'chmod' | 'dynamic' | 'table' | 'sha256' | 'cat'; 47 | 48 | ///// LITERALS ///// 49 | 50 | LITERAL: FUNC | DICT | LIST | STRING | INT | BOOL | PUKE | NULL; 51 | FUNC: '{' VAR+ KVP* '->' EXPR '}'; 52 | DICT: '<' KVP* '>'; 53 | BOOL: 'true' | 'false'; 54 | STRING: DQSTRING | SQSTRING; 55 | LIST: '[' LIST_ELEM* ']'; 56 | LIST_ELEM: EXPR | EXPAND | SKIP; 57 | EXPAND: '*' EXPR; 58 | 59 | ///// TERMINALS ///// 60 | 61 | terminals 62 | // F_PAREN is a paren that must be a function call 63 | F_PAREN: /(?<=[a-zA-Z_0-9)}])\(/; 64 | // OOO_PAREN is a parent that must not be a function call 65 | OOO_PAREN: /(? getConvertibleTypes() { 39 | return Collections.singleton(new ConvertiblePair(Duration.class, String.class)); 40 | } 41 | 42 | @Override 43 | public Object convert(Object source, TypeDescriptor sourceType, TypeDescriptor targetType) { 44 | if (source == null) { 45 | return null; 46 | } 47 | return convert((Duration) source, getDurationStyle(sourceType), getDurationUnit(sourceType)); 48 | } 49 | 50 | private ChronoUnit getDurationUnit(TypeDescriptor sourceType) { 51 | DurationUnit annotation = sourceType.getAnnotation(DurationUnit.class); 52 | return (annotation != null) ? annotation.value() : null; 53 | } 54 | 55 | private DurationStyle getDurationStyle(TypeDescriptor sourceType) { 56 | DurationFormat annotation = sourceType.getAnnotation(DurationFormat.class); 57 | return (annotation != null) ? annotation.value() : null; 58 | } 59 | 60 | private String convert(Duration source, DurationStyle style, ChronoUnit unit) { 61 | style = (style != null) ? style : DurationStyle.ISO8601; 62 | return style.print(source, unit); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /tests/perf/test_cpu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ####################################################################### 3 | # Testing parsing speed. This is used for the purpose of testing 4 | # of performance gains/loses for various approaches. 5 | # Author: Igor R. Dejanovic 6 | # Copyright: (c) 2017-2021 Igor R. Dejanovic 7 | # License: MIT License 8 | ####################################################################### 9 | import time 10 | import io 11 | import gc 12 | from itertools import groupby 13 | from os.path import dirname, join, getsize 14 | from parglare import Grammar, Parser, GLRParser 15 | from tests import TESTS 16 | 17 | INPUTS = 6 18 | REPEAT = 5 19 | 20 | 21 | class TestResult: 22 | def __init__(self, name): 23 | self.name = name 24 | self.input_idx = None 25 | self.size = None 26 | self.ambig = None 27 | self.time = None 28 | self.speed = None 29 | 30 | # Grammar/Table sizes 31 | self.nonterminals = None 32 | self.productions = None 33 | self.states = None 34 | 35 | 36 | def cpu_tests(): 37 | results = [] 38 | for test_idx, test in enumerate(TESTS): 39 | for parsing in ['LR', 'GLR']: 40 | if ((not test.lr and parsing == 'LR') or 41 | (not test.glr and parsing == 'GLR')): 42 | continue 43 | 44 | parser_class = Parser if parsing == 'LR' else GLRParser 45 | for input_idx in range(INPUTS): 46 | result = TestResult(f'{test.name} {parsing}') 47 | result.input_idx = input_idx + 1 48 | test_root = join(dirname(__file__), f'test{test_idx+1}') 49 | file_name = join(test_root, f'input{input_idx+1}') 50 | result.size = getsize(file_name) 51 | 52 | g = Grammar.from_file(join(test_root, 'g.pg')) 53 | parser = parser_class(g) 54 | result.nonterminals = len(g.nonterminals) 55 | result.productions = len(g.productions) 56 | result.states = len(parser.table.states) 57 | 58 | with io.open(file_name, 'r', encoding='utf-8') as f: 59 | content = f.read() 60 | gcold = gc.isenabled() 61 | gc.disable() 62 | try: 63 | t_start = time.time() 64 | for i in range(REPEAT): 65 | forest = parser.parse(content) 66 | t_end = time.time() 67 | finally: 68 | if gcold: 69 | gc.enable() 70 | 71 | result.time = t_end - t_start 72 | result.speed = int(result.size / (t_end - t_start)*REPEAT) 73 | if parsing == 'GLR': 74 | result.ambig = forest.ambiguities 75 | 76 | results.append(result) 77 | 78 | with open(join(dirname(__file__), 'reports', 'cpu-report.txt'), 'w') as f: 79 | inputs = '|'.join(f' I{i+1} ' for i in range(INPUTS)) 80 | f.write(f'| |{inputs}|\n') 81 | previous_name = 'None' 82 | for name, results in groupby(results, lambda r: r.name): 83 | results = list(results) 84 | if not name.startswith(previous_name): 85 | sizes_str = '|'.join(f'{r.size:^9,d}' for r in results) 86 | title = '{:15s}'.format(name[:-3] + ' sizes') 87 | f.write(f'|{title}|{sizes_str}|\n') 88 | results_str = '|'.join(f'{r.speed:^9,d}' for r in results) 89 | f.write(f'|{name:15s}|{results_str}|\n') 90 | if name.endswith('GLR'): 91 | ambig_str = '|'.join(f'{r.ambig:^9,d}' for r in results) 92 | title = '{:15s}'.format(name[:-4] + ' ambig') 93 | f.write(f'|{title}|{ambig_str}|\n') 94 | previous_name = ''.join(name.split()[:-1]) 95 | 96 | 97 | if __name__ == '__main__': 98 | cpu_tests() 99 | -------------------------------------------------------------------------------- /tests/perf/test_mem.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ####################################################################### 3 | # Testing memory utilization. This is used for the purpose of testing 4 | # of performance gains/loses for various approaches. 5 | # Author: Igor R. Dejanovic 6 | # Copyright: (c) 2021 Igor R. Dejanovic 7 | # License: MIT License 8 | ####################################################################### 9 | import io 10 | import tracemalloc 11 | import gc 12 | from itertools import groupby 13 | from os.path import dirname, join, getsize 14 | from parglare import Grammar, Parser, GLRParser 15 | from tests import TESTS 16 | 17 | INPUTS = 6 18 | REPEAT = 5 19 | 20 | 21 | class TestResult: 22 | def __init__(self, name): 23 | self.name = name 24 | self.input_idx = None 25 | self.size = None 26 | self.mem = None 27 | self.ambig = None 28 | 29 | 30 | def mem_tests(): 31 | results = [] 32 | for test_idx, test in enumerate(TESTS): 33 | for parsing in ['LR', 'GLR']: 34 | if ((not test.lr and parsing == 'LR') or 35 | (not test.glr and parsing == 'GLR')): 36 | continue 37 | 38 | parser_class = Parser if parsing == 'LR' else GLRParser 39 | for input_idx in range(INPUTS): 40 | result = TestResult(f'{test.name} {parsing}') 41 | result.input_idx = input_idx + 1 42 | test_root = join(dirname(__file__), f'test{test_idx+1}') 43 | file_name = join(test_root, f'input{input_idx+1}') 44 | result.size = getsize(file_name) 45 | 46 | g = Grammar.from_file(join(test_root, 'g.pg')) 47 | parser = parser_class(g) 48 | 49 | with io.open(file_name, 'r', encoding='utf-8') as f: 50 | content = f.read() 51 | 52 | gc.collect() 53 | tracemalloc.start() 54 | forest = parser.parse(content) 55 | _, peak = tracemalloc.get_traced_memory() 56 | result.mem = peak // 1000 57 | tracemalloc.stop() 58 | 59 | if parsing == 'GLR': 60 | result.ambig = forest.ambiguities 61 | 62 | results.append(result) 63 | 64 | with open(join(dirname(__file__), 'reports', 'mem-report.txt'), 'w') as f: 65 | inputs = '|'.join(f' I{i+1} ' for i in range(INPUTS)) 66 | f.write(f'| |{inputs}|\n') 67 | previous_name = 'None' 68 | for name, results in groupby(results, lambda r: r.name): 69 | results = list(results) 70 | if not name.startswith(previous_name): 71 | sizes_str = '|'.join(f'{r.size:^9,d}' for r in results) 72 | title = '{:15s}'.format(name[:-3] + ' sizes') 73 | f.write(f'|{title}|{sizes_str}|\n') 74 | results_str = '|'.join(f'{r.mem:^9,d}' for r in results) 75 | f.write(f'|{name:15s}|{results_str}|\n') 76 | if name.endswith('GLR'): 77 | ambig_str = '|'.join(f'{r.ambig:^9,d}' for r in results) 78 | title = '{:15s}'.format(name[:-4] + ' ambig') 79 | f.write(f'|{title}|{ambig_str}|\n') 80 | previous_name = ''.join(name.split()[:-1]) 81 | 82 | 83 | if __name__ == '__main__': 84 | mem_tests() 85 | -------------------------------------------------------------------------------- /tests/perf/tests.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | Test = namedtuple('Test', 'name lr glr') 4 | 5 | TESTS = [ 6 | # Name, LR, GLR 7 | Test('JSON', True, True), 8 | Test('BibTeX', True, True), 9 | Test('Java', False, True) 10 | ] 11 | --------------------------------------------------------------------------------