├── .editorconfig
├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE.md
    └── workflows
    │   └── ci-linux-ubuntu.yml
├── .gitignore
├── AUTHORS.md
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── PULL_REQUEST_TEMPLATE.md
├── README.rst
├── docs
    ├── about
    │   ├── CONTRIBUTING.md
    │   └── LICENSE.md
    ├── actions.md
    ├── common.md
    ├── css
    │   └── version-select.css
    ├── debugging.md
    ├── disambiguation.md
    ├── getting_started.md
    ├── grammar.md
    ├── grammar_language.md
    ├── grammar_modularization.md
    ├── handling_errors.md
    ├── images
    │   ├── calc.pg
    │   ├── calc.pg.dot.png
    │   ├── calc_forest.dot.png
    │   ├── calc_trace.dot.png
    │   ├── glr_infinite_recursion.svg
    │   ├── parglare-logo.png
    │   └── parglare-logo.svg
    ├── index.md
    ├── js
    │   └── version-select.js
    ├── lr_parsing.md
    ├── parse_forest_trees.md
    ├── parser.md
    ├── pglr.md
    ├── recognizers.md
    ├── release_notes
    │   ├── release_0_14.md
    │   └── release_0_15.md
    └── style.css
├── examples
    ├── bibtex
    │   ├── bibtex.pg
    │   ├── bibtex.py
    │   └── test.bib
    ├── c
    │   ├── README.md
    │   ├── c.pg
    │   ├── c2.pg
    │   └── c_example.py
    ├── calc
    │   └── calc.py
    ├── csv
    │   └── csv.py
    ├── custom_table_caching
    │   ├── .gitignore
    │   ├── README.md
    │   ├── compile.py
    │   ├── grammar.py
    │   └── parser.py
    ├── java
    │   ├── README.md
    │   ├── TomcatServletWebServerFactory.java
    │   ├── java16.pg
    │   └── java_example.py
    ├── json
    │   ├── example1.json
    │   ├── example2.json
    │   ├── example3.json
    │   ├── example4.json
    │   ├── example5.json
    │   ├── json.pg
    │   └── json_example.py
    ├── molecular_formulas
    │   ├── README.md
    │   ├── parglare_mw.py
    │   ├── ply_mw.py
    │   └── run_test.py
    ├── quick_intro.py
    ├── rhapsody
    │   ├── LightSwitch.rpy
    │   ├── rhapsody.pg
    │   └── rhapsody.py
    └── robot
    │   ├── README.md
    │   ├── program.rbt
    │   ├── robot.pg
    │   ├── robot.pg.dot.png
    │   └── robot.py
├── install-dev.sh
├── install-test.sh
├── mkdocs.yml
├── parglare
    ├── __init__.py
    ├── actions.py
    ├── cli.py
    ├── closure.py
    ├── common.py
    ├── exceptions.py
    ├── export.py
    ├── glr.py
    ├── grammar.py
    ├── parser.py
    ├── tables
    │   ├── __init__.py
    │   └── persist.py
    ├── termui.py
    └── trees.py
├── pyproject.toml
├── runtests.sh
├── scripts
    └── parglare_qtree.py
└── tests
    ├── func
        ├── __init__.py
        ├── actions
        │   ├── __init__.py
        │   ├── collector
        │   │   ├── grammar.pg
        │   │   └── test_actions_get_collector.py
        │   ├── test_actions.py
        │   └── test_builtin_actions.py
        ├── grammar
        │   ├── __init__.py
        │   ├── calc.pg
        │   ├── calcactions.py
        │   ├── expression_grammar.py
        │   ├── expression_grammar_numbers.py
        │   ├── test_grammar.py
        │   ├── test_groups.py
        │   ├── test_keywords.py
        │   ├── test_layout.py
        │   ├── test_load_from_file.py
        │   ├── test_meta_data.py
        │   ├── test_repeatable.py
        │   ├── test_special_grammars.py
        │   ├── test_terminals.py
        │   └── test_whitespaces.py
        ├── import
        │   ├── basic
        │   │   ├── first.pg
        │   │   ├── second.pg
        │   │   ├── submodule
        │   │   │   └── third.pg
        │   │   └── test_import.py
        │   ├── diamond
        │   │   ├── base.pg
        │   │   ├── components.pg
        │   │   ├── model.pg
        │   │   ├── modules.pg
        │   │   ├── packages.pg
        │   │   └── test_diamond.py
        │   ├── fqn
        │   │   ├── A.pg
        │   │   ├── B.pg
        │   │   ├── C.pg
        │   │   ├── D.pg
        │   │   └── test_fqn.py
        │   ├── imported_actions
        │   │   ├── by_action_name
        │   │   │   ├── base.pg
        │   │   │   ├── base_actions.py
        │   │   │   ├── components.pg
        │   │   │   └── model.pg
        │   │   ├── by_decorator_action_name
        │   │   │   ├── base.pg
        │   │   │   ├── base_actions.py
        │   │   │   ├── components.pg
        │   │   │   └── model.pg
        │   │   ├── by_symbol_name
        │   │   │   ├── base.pg
        │   │   │   ├── base_actions.py
        │   │   │   ├── components.pg
        │   │   │   └── model.pg
        │   │   ├── in_grammar_by_action_name
        │   │   │   ├── base.pg
        │   │   │   ├── base_actions.py
        │   │   │   ├── components.pg
        │   │   │   ├── model.pg
        │   │   │   └── model_actions.py
        │   │   ├── in_grammar_by_symbol_name
        │   │   │   ├── base.pg
        │   │   │   ├── base_actions.py
        │   │   │   ├── components.pg
        │   │   │   ├── model.pg
        │   │   │   └── model_actions.py
        │   │   └── test_imported_actions.py
        │   ├── imported_recognizers
        │   │   ├── __init__.py
        │   │   ├── base.pg
        │   │   ├── base_recognizers.py
        │   │   ├── components.pg
        │   │   ├── model.pg
        │   │   ├── model_override.pg
        │   │   ├── model_override_recognizers.py
        │   │   └── test_imported_recognizers.py
        │   ├── override
        │   │   ├── base.pg
        │   │   ├── first.pg
        │   │   ├── nonexisting.pg
        │   │   ├── second.pg
        │   │   └── test_override.py
        │   └── recursion
        │   │   ├── base.pg
        │   │   ├── components.pg
        │   │   ├── model.pg
        │   │   ├── modules.pg
        │   │   ├── packages.pg
        │   │   └── test_recursion.py
        ├── parsing
        │   ├── __init__.py
        │   ├── parsing_errors.txt
        │   ├── parsing_from_file.txt
        │   ├── test_build_tree.py
        │   ├── test_conflicts.py
        │   ├── test_dynamic_disambiguation_filters.py
        │   ├── test_error_recovery.py
        │   ├── test_glr_error_recovery.py
        │   ├── test_glr_forest.py
        │   ├── test_glr_forest_disambiguation.py
        │   ├── test_glr_parsing.py
        │   ├── test_greedy.py
        │   ├── test_lexical_disambiguation.py
        │   ├── test_objects.py
        │   ├── test_parse_context.py
        │   ├── test_parser_construction.py
        │   ├── test_parsing.py
        │   ├── test_parsing_errors.py
        │   ├── test_to_dot.py
        │   └── test_to_str.py
        ├── persistence
        │   ├── calc_with_actions
        │   │   ├── calc.pg
        │   │   ├── calc_actions.py
        │   │   ├── test_table_persistance.py
        │   │   └── variable.pg
        │   └── compare_table
        │   │   ├── base.pg
        │   │   ├── components.pg
        │   │   ├── model.pg
        │   │   ├── model.pgt
        │   │   ├── model_compare.pgt
        │   │   ├── modules.pg
        │   │   ├── packages.pg
        │   │   └── test_compare_table.py
        ├── pglr
        │   ├── __init__.py
        │   ├── grammar.pg
        │   └── test_pglr.py
        ├── recognizers
        │   ├── __init__.py
        │   ├── collector
        │   │   ├── grammar.pg
        │   │   └── test_recognizers_get_collector.py
        │   ├── test_recognizer_context.py
        │   └── test_recognizers.py
        ├── regressions
        │   ├── issue38
        │   │   ├── names.pg
        │   │   └── test_issue_38_unicode_py2.py
        │   ├── issue52
        │   │   ├── grammar.pg
        │   │   └── test_issue52_table_caching.py
        │   ├── issue97
        │   │   ├── first.pg
        │   │   ├── second.pg
        │   │   └── test_issue97.py
        │   ├── test_glr_list_building_bug.py
        │   ├── test_issue147.py
        │   ├── test_issue31_glr_drop_parses_on_lexical_ambiguity.py
        │   ├── test_issue32.py
        │   ├── test_issue64.py
        │   ├── test_issue73.py
        │   ├── test_issue_110_wrong_positions_with_glr_and_empty.py
        │   ├── test_issue_112.py
        │   ├── test_issue_114.py
        │   ├── test_issue_22_wrong_associativity_behaviour.py
        │   ├── test_issue_23_multiple_rules_with_assignments.py
        │   ├── test_issue_54_collector_unicode.py
        │   ├── test_recognizer_nonexisting_terminal
        │   │   ├── grammar.pg
        │   │   ├── grammar_recognizers.py
        │   │   └── test_recognizer_nonexisting_terminal.py
        │   ├── test_regex_alternative_match_bug.py
        │   └── test_terminal_exists_noerror_on_terminal_definition_before.py
        ├── test_common_errors.py
        ├── test_examples.py
        └── test_export.py
    └── perf
        ├── profile.sh
        ├── report_grammar.py
        ├── reports
            ├── cpu-report.txt
            ├── grammar-sizes.txt
            └── mem-report.txt
        ├── requirements.txt
        ├── runall.sh
        ├── test1
            ├── g.pg
            ├── input1
            ├── input2
            ├── input3
            ├── input4
            ├── input5
            └── input6
        ├── test2
            ├── g.pg
            ├── input1
            ├── input2
            ├── input3
            ├── input4
            ├── input5
            └── input6
        ├── test3
            ├── g.pg
            ├── input1
            ├── input2
            ├── input3
            ├── input4
            ├── input5
            └── input6
        ├── test_cpu.py
        ├── test_mem.py
        └── tests.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [*.bat]
14 | indent_style = tab
15 | end_of_line = crlf
16 | 
17 | [LICENSE]
18 | insert_final_newline = false
19 | 
20 | [Makefile]
21 | indent_style = tab
22 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | tests/perf/test_inputs/* linguist-vendored
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | * parglare version:
 2 | * Python version:
 3 | * Operating System:
 4 | 
 5 | ### Description
 6 | 
 7 | Describe what you were trying to get done.
 8 | Tell us what happened, what went wrong, and what you expected to happen.
 9 | 
10 | ### What I Did
11 | 
12 | ```
13 | Paste the command(s) you ran and the output.
14 | If there was a crash, please include the traceback here.
15 | ```
16 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-linux-ubuntu.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches-ignore:
 6 |       - 'gh-pages'
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-20.04
13 |     strategy:
14 |       matrix:
15 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v4
19 |     - name: Set up Python ${{ matrix.python-version }}
20 |       uses: actions/setup-python@v5
21 |       with:
22 |         python-version: ${{ matrix.python-version }}
23 |     - name: Install test dependencies
24 |       run: |
25 |         ./install-test.sh
26 |     - name: Run unit tests
27 |       run: |
28 |         ./runtests.sh
29 |     - name: Coveralls
30 |       if: github.ref == 'refs/heads/master'
31 |       env:
32 |         COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }}
33 |       run: |
34 |         coveralls
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | # pyenv python configuration file
62 | .python-version
63 | 
64 | # Other
65 | *.dot
66 | .pytest_cache
67 | site
68 | issues-temp
69 | tests/perf/*.pstats
70 | *.pgt
71 | /.tool-versions
72 | /venv/
73 | /issues/
74 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
1 | ##Development Lead
2 | 
3 | - Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
4 | 
5 | ##Contributors
6 | 
7 | For a full list of contributors and their activity
8 | see [here](https://github.com/igordejanovic/parglare/graphs/contributors).
9 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | See [docs/about/CONTRIBUTING.md](docs/about/CONTRIBUTING.md)
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016-2020, Igor R. Dejanovic and contributors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include AUTHORS.md
 2 | 
 3 | include CONTRIBUTING.md
 4 | include CHANGELOG.md
 5 | include LICENSE
 6 | include README.rst
 7 | 
 8 | recursive-include tests *
 9 | recursive-exclude * __pycache__
10 | recursive-exclude * *.py[co]
11 | recursive-exclude * *.log
12 | recursive-exclude * *.dot
13 | recursive-exclude * *.pdf
14 | recursive-exclude * *.pstats
15 | recursive-exclude * *.report
16 | recursive-exclude * *.txt
17 | 
18 | recursive-include docs *.md *.png
19 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean clean-test clean-pyc clean-build docs help lint
 2 | .DEFAULT_GOAL := help
 3 | define BROWSER_PYSCRIPT
 4 | import os, webbrowser, sys
 5 | try:
 6 | 	from urllib import pathname2url
 7 | except:
 8 | 	from urllib.request import pathname2url
 9 | 
10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
11 | endef
12 | export BROWSER_PYSCRIPT
13 | 
14 | define PRINT_HELP_PYSCRIPT
15 | import re, sys
16 | 
17 | for line in sys.stdin:
18 | 	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
19 | 	if match:
20 | 		target, help = match.groups()
21 | 		print("%-20s %s" % (target, help))
22 | endef
23 | export PRINT_HELP_PYSCRIPT
24 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
25 | 
26 | help:
27 | 	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
28 | 
29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
30 | 
31 | 
32 | clean-build: ## remove build artifacts
33 | 	rm -fr build/
34 | 	rm -fr dist/
35 | 	rm -fr .eggs/
36 | 	find . -name '*.egg-info' -exec rm -fr {} +
37 | 	find . -name '*.egg' -exec rm -f {} +
38 | 
39 | clean-pyc: ## remove Python file artifacts
40 | 	find . -name '*.pyc' -exec rm -f {} +
41 | 	find . -name '*.pyo' -exec rm -f {} +
42 | 	find . -name '*~' -exec rm -f {} +
43 | 	find . -name '__pycache__' -exec rm -fr {} +
44 | 
45 | clean-test: ## remove test and coverage artifacts
46 | 	rm -fr .tox/
47 | 	rm -f .coverage
48 | 	rm -fr htmlcov/
49 | 
50 | lint: ## check style with flake8
51 | 	flake8
52 | 
53 | test: ## run tests quickly with the default Python
54 | 	py.test tests/func
55 | 
56 | 
57 | test-all: ## run tests on every Python version with tox
58 | 	tox
59 | 
60 | coverage: ## check code coverage quickly with the default Python
61 | 	coverage run --source parglare -m pytest tests/func
62 | 
63 | 		coverage report -m
64 | 		coverage html
65 | 		$(BROWSER) htmlcov/index.html
66 | 
67 | docs: ## generate MkDocs HTML documentation
68 | 	mkdocs build
69 | 	$(BROWSER) docs/_build/html/index.html
70 | 
71 | servedocs: ## compile the docs watching for changes
72 | 	mkdocs serve
73 | 	$(BROWSER) "http://localhost:8000/"
74 | 
75 | release: clean ## package and upload a release
76 | 	python setup.py sdist upload
77 | 	python setup.py bdist_wheel upload
78 | 
79 | dist: clean ## builds source and wheel package
80 | 	python setup.py sdist
81 | 	python setup.py bdist_wheel
82 | 	ls -l dist
83 | 
84 | install: clean ## install the package to the active Python's site-packages
85 | 	python setup.py install
86 | 


--------------------------------------------------------------------------------
/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!-- Please don't remove/change code review checklist -->
 2 | 
 3 | ## Code review checklist
 4 | 
 5 | - [ ] Pull request represents a single change (i.e. not fixing disparate/unrelated things in a single PR)
 6 | - [ ] Title summarizes what is changing
 7 | - [ ] Commit messages are meaningful (see [this][commit messages] for details)
 8 | - [ ] Tests have been included and/or updated
 9 | - [ ] Docstrings have been included and/or updated, as appropriate
10 | - [ ] Standalone docs have been updated accordingly
11 | - [ ] Changelog(s) has/have been updated, as needed (see `CHANGELOG.md`, no need
12 |       to update for typo fixes and such).
13 | 
14 | 
15 | [commit messages]: https://chris.beams.io/posts/git-commit/
16 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. image:: https://raw.githubusercontent.com/igordejanovic/parglare/master/docs/images/parglare-logo.png
  2 | 
  3 | |build-status| |coverage| |docs| |status| |license| |python-versions|
  4 | 
  5 | 
  6 | A pure Python scannerless LR/GLR parser.
  7 | 
  8 | 
  9 | For more information see `the docs <http://www.igordejanovic.net/parglare/>`_.
 10 | 
 11 | 
 12 | Quick intro
 13 | -----------
 14 | 
 15 | This is just a small example to get the general idea. This example shows how to
 16 | parse and evaluate expressions with 5 operations with different priority and
 17 | associativity. Evaluation is done using semantic/reduction actions.
 18 | 
 19 | The whole expression evaluator is done in under 30 lines of code!
 20 | 
 21 | .. code:: python
 22 | 
 23 |     from parglare import Parser, Grammar
 24 | 
 25 |     grammar = r"""
 26 |     E: E '+' E  {left, 1}
 27 |      | E '-' E  {left, 1}
 28 |      | E '*' E  {left, 2}
 29 |      | E '/' E  {left, 2}
 30 |      | E '^' E  {right, 3}
 31 |      | '(' E ')'
 32 |      | number;
 33 | 
 34 |     terminals
 35 |     number: /\d+(\.\d+)?/;
 36 |     """
 37 | 
 38 |     actions = {
 39 |         "E": [lambda _, n: n[0] + n[2],
 40 |               lambda _, n: n[0] - n[2],
 41 |               lambda _, n: n[0] * n[2],
 42 |               lambda _, n: n[0] / n[2],
 43 |               lambda _, n: n[0] ** n[2],
 44 |               lambda _, n: n[1],
 45 |               lambda _, n: n[0]],
 46 |         "number": lambda _, value: float(value),
 47 |     }
 48 | 
 49 |     g = Grammar.from_string(grammar)
 50 |     parser = Parser(g, debug=True, actions=actions)
 51 | 
 52 |     result = parser.parse("34 + 4.6 / 2 * 4^2^2 + 78")
 53 | 
 54 |     print("Result = ", result)
 55 | 
 56 |     # Output
 57 |     # -- Debugging/tracing output with detailed info about grammar, productions,
 58 |     # -- terminals and nonterminals, DFA states, parsing progress,
 59 |     # -- and at the end of the output:
 60 |     # Result = 700.8
 61 | 
 62 | 
 63 | Installation
 64 | ------------
 65 | 
 66 | - Stable version:
 67 | 
 68 | .. code:: shell
 69 | 
 70 |     $ pip install parglare
 71 | 
 72 | - Development version:
 73 | 
 74 | .. code:: shell
 75 | 
 76 |     $ git clone git@github.com:igordejanovic/parglare.git
 77 |     $ pip install -e parglare
 78 | 
 79 | Citing parglare
 80 | ---------------
 81 | 
 82 | If you use parglare in your research please cite this paper:
 83 | 
 84 | .. code:: text
 85 | 
 86 |     Igor Dejanović, Parglare: A LR/GLR parser for Python,
 87 |     Science of Computer Programming, issn:0167-6423, p.102734,
 88 |     DOI:10.1016/j.scico.2021.102734, 2021.
 89 | 
 90 |     @article{dejanovic2021b,
 91 |         author = {Igor Dejanović},
 92 |         title = {Parglare: A LR/GLR parser for Python},
 93 |         doi = {10.1016/j.scico.2021.102734},
 94 |         issn = {0167-6423},
 95 |         journal = {Science of Computer Programming},
 96 |         keywords = {parsing, LR, GLR, Python, visualization},
 97 |         pages = {102734},
 98 |         url = {https://www.sciencedirect.com/science/article/pii/S0167642321001271},
 99 |         year = {2021}
100 |     }
101 | 
102 | License
103 | -------
104 | 
105 | MIT
106 | 
107 | Python versions
108 | ---------------
109 | 
110 | Tested with 3.6-3.11
111 | 
112 | Credits
113 | -------
114 | 
115 | Initial layout/content of this package was created with `Cookiecutter
116 | <https://github.com/audreyr/cookiecutter>`_ and the
117 | `audreyr/cookiecutter-pypackage <https://github.com/audreyr/cookiecutter-pypackage>`_ project template.
118 | 
119 | 
120 | .. |build-status| image:: https://github.com/igordejanovic/parglare/actions/workflows/ci-linux-ubuntu.yml/badge.svg
121 |    :target: https://github.com/igordejanovic/parglare/actions
122 | 
123 | .. |coverage| image:: https://coveralls.io/repos/github/igordejanovic/parglare/badge.svg?branch=master
124 |    :target: https://coveralls.io/github/igordejanovic/parglare?branch=master
125 | 
126 | .. |docs| image:: https://img.shields.io/badge/docs-latest-green.svg
127 |    :target: http://www.igordejanovic.net/parglare/latest/
128 | 
129 | .. |status| image:: https://img.shields.io/pypi/status/parglare.svg
130 | 
131 | .. |license| image:: https://img.shields.io/badge/License-MIT-blue.svg
132 |    :target: https://opensource.org/licenses/MIT
133 | 
134 | .. |python-versions| image:: https://img.shields.io/pypi/pyversions/parglare.svg
135 | 


--------------------------------------------------------------------------------
/docs/about/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing
  2 | 
  3 | Contributions are welcome, and they are greatly appreciated! Every little bit
  4 | helps, and credit will always be given.
  5 | 
  6 | You can contribute in many ways:
  7 | 
  8 | 
  9 | ## Types of Contributions
 10 | 
 11 | ### Report Bugs
 12 | 
 13 | Report bugs at https://github.com/igordejanovic/parglare/issues.
 14 | 
 15 | If you are reporting a bug, please include:
 16 | 
 17 | - Your operating system name and version.
 18 | - Any details about your local setup that might be helpful in troubleshooting.
 19 | - Detailed steps to reproduce the bug.
 20 | 
 21 | 
 22 | ### Fix Bugs
 23 | 
 24 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
 25 | wanted" is open to whoever wants to implement it.
 26 | 
 27 | 
 28 | ### Implement Features
 29 | 
 30 | Look through the GitHub issues for features. Anything tagged with "enhancement"
 31 | and "help wanted" is open to whoever wants to implement it.
 32 | 
 33 | 
 34 | ### Write Documentation
 35 | 
 36 | parglare could always use more documentation, whether as part of the official
 37 | parglare docs, in docstrings, or even on the web in blog posts, articles, and
 38 | such.
 39 | 
 40 | 
 41 | ### Submit Feedback
 42 | 
 43 | The best way to send feedback is to file an issue at
 44 | https://github.com/igordejanovic/parglare/issues.
 45 | 
 46 | If you are proposing a feature:
 47 | 
 48 | - Explain in detail how it would work.
 49 | - Keep the scope as narrow as possible, to make it easier to implement.
 50 | - Remember that this is a volunteer-driven project, and that contributions are
 51 |   welcome :)
 52 | 
 53 | 
 54 | ## Get Started!
 55 | 
 56 | Ready to contribute? Here's how to set up `parglare` for local development.
 57 | 
 58 | 1. Fork the `parglare` repo on GitHub.
 59 | 2. Clone your fork locally:
 60 | 
 61 |         $ git clone git@github.com:your_name_here/parglare.git
 62 | 
 63 | 3. Install your local copy into a virtual environment. This is how you set up
 64 |    your fork for local development:
 65 | 
 66 |         $ cd parglare/
 67 |         $ python -m venv venv
 68 |         $ source venv/bin/activate
 69 |         $ ./install-dev.sh
 70 | 
 71 |    This is needed just the first time. To work on parglare later you just need
 72 |    to activate the virtual environment for each new terminal session:
 73 | 
 74 |         $ cd parglare/
 75 |         $ source venv/bin/activate
 76 | 
 77 | 4. Create a branch for local development::
 78 | 
 79 |         $ git checkout -b name-of-your-bugfix-or-feature
 80 | 
 81 |    Now you can make your changes locally.
 82 | 
 83 | 5. When you're done making changes, run tests:
 84 | 
 85 |         $ ./runtests.sh
 86 | 
 87 |    and verify that all tests pass.
 88 | 
 89 | 6. Commit your changes and push your branch to GitHub:
 90 | 
 91 |         $ git add .
 92 |         $ git commit -m "Your detailed description of your changes."
 93 |         $ git push origin name-of-your-bugfix-or-feature
 94 | 
 95 |    Check [this](https://chris.beams.io/posts/git-commit/) on how to write nice
 96 |    git log messages.
 97 | 
 98 | 7. Submit a pull request through the GitHub website. CI will run the tests for
 99 |    all supported Python versions. Check in the GitHub UI that all pipelines pass.
100 | 
101 | 
102 | ## Pull Request Guidelines
103 | 
104 | Before you submit a pull request, check that it meets these guidelines:
105 | 
106 | 1. The pull request should include tests.
107 | 2. If the pull request adds/changes functionality, the docs should be updated.
108 | 3. The pull request should work for Python 3.8-3.12. Check
109 |    https://travis-ci.org/igordejanovic/parglare/pull_requests and make sure that
110 |    the tests pass for all supported Python versions.
111 | 
112 | 
113 | ## Tips
114 | 
115 | To run a subset of tests:
116 | 
117 | ```
118 | $ py.test tests/func/mytest.py
119 | ```
120 | 
121 | or a single test:
122 | 
123 | ```
124 | $ py.test tests/func/mytest.py::some_test
125 | ```
126 | 


--------------------------------------------------------------------------------
/docs/about/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016-2018, Igor R. Dejanović and contributors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/docs/common.md:
--------------------------------------------------------------------------------
 1 | # Common classes and functions
 2 | 
 3 | ## The Context object
 4 | 
 5 | An object of this kind object is passed to various callback functions (actions,
 6 | recognizers, error recovery etc.). It is not always an instance of the same
 7 | class, but all context objects have the following properties:
 8 | 
 9 | - **start_position/end_position** - the beginning and the end in the input
10 |   stream where the match occurred. `start_position` is the location of the first
11 |   element/character in the input while the `end_position` is one past the last
12 |   element/character of the match. Thus `end_position - start_position` will give
13 |   the length of the match including the layout. You can use
14 |   `parglare.pos_to_line_col(input, position)` function to get line and column of
15 |   the position. This function returns a tuple `(line, column)`.
16 | 
17 | - **file_name** - the name/path of the file being parsed. `None` if Python
18 |   string is parsed.
19 | 
20 | - **input_str** - the input string (or list of objects) that is being parsed.
21 | 
22 | - **layout_content** - is the layout (whitespaces, comments etc.) that are
23 |   collected from the previous non-layout match.
24 | 
25 | - **layout_content_ahead** - layout content before `token_ahead`.
26 | 
27 | - **token**- the token shifted during SHIFT operation. Instance of
28 |   `parglare.parser.Token`.
29 | 
30 | - **token_ahead** - the token recognized as a lookahead.
31 | 
32 | - **production** - an instance of `parglare.grammar.Production` class available
33 |   only on reduction actions (not on shifts). Represents the grammar production.
34 | 
35 | - **state** - An instance of `parglare.tables.LRState`. The LR state of the
36 |   parser automata. This object contains information of the possible actions in
37 |   this state.
38 | 
39 | - **node** - this is available only if the actions are called over the parse tree
40 |   using `call_actions`. It represents the instance of `NodeNonTerm` or `NodeTerm`
41 |   classes from the parse tree where the actions is executed.
42 | 
43 | - **parser** - is the reference to the parser instance. You should use this only
44 |   to investigate parser configuration not to alter its state.
45 | 
46 | - **head** - is a reference to the Graph-structured stack node (`GSSNode`). Only
47 |   used for GLR parsing.
48 | 
49 | - **extra** - this attribute can store arbitrary user information for state
50 |   tracking. If not given as a parameter to `parse` call a `dict` is used.
51 | 
52 | 
53 | ## Location class
54 | 
55 | Used at various places in parglare to define location and span in the files
56 | (e.g. for error reporting).
57 | 
58 | ### Attributes
59 | 
60 | - **input_str** - the input string being parsed.
61 | 
62 | - **file_name** (property) - the name of the file being parsed (`None` if string
63 |   is parsed),
64 | 
65 | - **start_position/end_position** - an absolute position in the input where the
66 |   span starts/ends,
67 | 
68 | - **line**/**column** (properties) - line and column where the span starts.
69 | 
70 | - **line_end**/**column_end** (properties) - line and column where the span
71 |   ends.
72 | 
73 | 
74 | If there is an error in the grammar itself parglare will raise
75 | `parglare.GrammarError` exception.
76 | 


--------------------------------------------------------------------------------
/docs/css/version-select.css:
--------------------------------------------------------------------------------
1 | #version-selector {
2 |   display: block;
3 |   margin: -10px auto 0.809em;
4 |   padding: 2px;
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/getting_started.md:
--------------------------------------------------------------------------------
 1 | # Getting started
 2 | 
 3 | The first thing to do is to write your language grammar using
 4 | the [parglare grammar language](./grammar_language.md). You write the grammar
 5 | either as a Python string in your source code or as a separate file. In case you
 6 | are writing a grammar of a complex language I would suggest the separate file
 7 | approach. Although not mandatory, the convention is that parglare grammar files
 8 | have `.pg` extension.
 9 | 
10 | The next step is to create the instance of the `Grammar` class. This is achieved
11 | by importing the `Grammar` class and calling either `from_file` or `from_str`
12 | methods supplying the file name for the former and the Python string for the
13 | later call.
14 | 
15 | ```python
16 | from parglare import Grammar
17 | 
18 | file_name = .....
19 | grammar = Grammar.from_file(file_name)
20 | ```
21 | 
22 | If there is no errors in the grammar you now have the grammar instance. For more
23 | information see the [section about `Grammar` class](./grammar.md).
24 | 
25 | 
26 | !!! tip
27 | 
28 |     There is also a handy [pglr command line tool](./pglr.md) that can be
29 |     used for grammar checking, visualization and debugging.
30 | 
31 | The next step is to create an instance of the parser. There are two options. If
32 | you want to use LR parser instantiate `Parser` class. For GLR instantiate
33 | `GLRParser` class.
34 | 
35 | 
36 | ```python
37 | from parglare import Parser
38 | parser = Parser(grammar)
39 | ```
40 | 
41 | or
42 | 
43 | ```python
44 | from parglare import GLRParser
45 | parser = GLRParser(grammar)
46 | ```
47 | 
48 | You can provide additional [parser parameters](./parser.md) during instantiation.
49 | 
50 | !!! note
51 | 
52 |     LR parser is faster as the GLR machinery brings a significant overhead. So,
53 |     the general advice is to stick to the LR parsing until you are sure that you
54 |     need additional power of GLR, i.e. either you need more than one token of
55 |     lookahead or your language is inherently ambiguous. pglr tool will help you in
56 |     investigating why you have LR conflicts in your grammar and there are some
57 |     nice [disambiguation features](./lr_parsing.md#resolving-conflicts) in parglare
58 |     that will help you resolve some of those conflicts.
59 | 
60 | Now parse your input calling `parse` method on the parser instance.
61 | 
62 | ```python
63 | result = parser.parse(input_str)
64 | ```
65 | 
66 | Depending on whether you have configured [actions](./actions.md) and what
67 | parameters you used for parser instance you will
68 | get either:
69 | 
70 | - a nested lists if no actions are used,
71 | - a parse tree if [`build_tree` parser param](./parser.md#build_tree) is set to
72 |   `True`,
73 | - some other representation of your input if custom actions are used.
74 | 
75 | In case of the GLR parser you will get a list of all possible results (a.k.a.
76 | _the parse forest_).
77 | 
78 | ## Where to go next?
79 | 
80 | You can investigate various topics in the docs.
81 | The [examples](https://github.com/igordejanovic/parglare/tree/master/examples)
82 | and
83 | the [tests](https://github.com/igordejanovic/parglare/tree/master/tests/func)
84 | are also a good source of information.
85 | 


--------------------------------------------------------------------------------
/docs/handling_errors.md:
--------------------------------------------------------------------------------
 1 | # Handling errors
 2 | 
 3 | When parglare encounters a situation in which no SHIFT or REDUCE operation could
 4 | be performed it will report an error by raising an instance of
 5 | `parglare.ParseError` class.
 6 | 
 7 | `ParseError` has the following attributes:
 8 | 
 9 | - **location** - an instance of the [Location class](./common.md#location-class)
10 |   with information of the span of the error.
11 | 
12 | - **symbols_expected (list)** - a list of expected symbol at the location.
13 | 
14 | - **tokens_ahead (list)** - a list of tokens recognized at the position by
15 |   trying all terminal symbols recognizers from the grammar. Note that this list
16 |   might be empty in case nothing can be recognized at the position or it might
17 |   have more than one element if more recognizers succeeds (lexical ambiguity).
18 | 
19 | - **symbols_before (list)** - a list of last seen symbols. In the case of LR
20 |   parser it will always be a single element list. In the case of GLR there might
21 |   be more symbols if there were multiple parser heads.
22 | 
23 | - **last_heads (list)** - A list of last GLR parser heads. Available only for
24 |   GLR parsing.
25 | 
26 | - **grammar (Grammar)** - An instance of `parglare.Grammar` class used for
27 |   parsing.
28 | 
29 | 
30 | # Error recovery
31 | 
32 | There are a lot of situations where you would want parser to report all the
33 | errors in one go. To do this, parser has to recover from errors, i.e. get to
34 | the valid state and continue.
35 | 
36 | To enable error recovery set `error_recovery` [parameter of parser
37 | construction](./parser.md#error_recovery) to `True`. This will enable implicit
38 | error recovery strategy that will try to search for expected tokens in the input
39 | ahead and when the first is found the parsing will continue. All errors will be
40 | collected as an `errors` list on the parser instance.
41 | 
42 | Each error is an instance of [`ParseError` class](#handling-errors). In case no
43 | recovery is possible last `ParseError` will be raised. `ParserError` has a
44 | location which represents the span of the error in the input (e.g.
45 | `error.location.start_position` and `error.location.end_position`).
46 | 
47 | 
48 | ## Custom recovery strategy
49 | 
50 | To provide a custom strategy for error recovery set `error_recovery` parser
51 | constructor parameter to a Python function. This function should have the
52 | following signature:
53 | 
54 |     def error_recovery_strategy(context, error):
55 |         ...
56 | 
57 | 
58 | - **context***- context like object (usually the parser head).
59 | - **error** - [`ParseError` instance](#handling-errors).
60 | 
61 | Using the head object you can query the state of the parser. E.g. to get the
62 | position use `context.position`, to get the parser state use `context.state`, to
63 | get expected symbols in this state use `context.state.actions.keys()`.
64 | 
65 | To get information about the error use `error` object. E.g. to get expected
66 | symbols at this position for which parser can successfully continue use
67 | `error.symbols_expected`.
68 | 
69 | The recovery function should modify the head (e.g. its position and/or
70 | `token_ahead`) and bring it to a state which can continue. If the recovery is
71 | successful the function should return `True`, otherwise `False`.
72 | 
73 | You can call a default error recovery from your custom recovery by
74 | `context.parser.default_error_recovery(context)`
75 | 


--------------------------------------------------------------------------------
/docs/images/calc.pg:
--------------------------------------------------------------------------------
1 | E: E "+" E | E "*" E | number;
2 | terminals
3 | number: /\d+/;
4 | 


--------------------------------------------------------------------------------
/docs/images/calc.pg.dot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/docs/images/calc.pg.dot.png


--------------------------------------------------------------------------------
/docs/images/calc_forest.dot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/docs/images/calc_forest.dot.png


--------------------------------------------------------------------------------
/docs/images/calc_trace.dot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/docs/images/calc_trace.dot.png


--------------------------------------------------------------------------------
/docs/images/parglare-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/docs/images/parglare-logo.png


--------------------------------------------------------------------------------
/docs/js/version-select.js:
--------------------------------------------------------------------------------
 1 | window.addEventListener("DOMContentLoaded", function() {
 2 |   function normalizePath(path) {
 3 |     var normalized = [];
 4 |     path.split("/").forEach(function(bit, i) {
 5 |       if (bit === "." || (bit === "" && i !== 0)) {
 6 |         return;
 7 |       } else if (bit === "..") {
 8 |         if (normalized.length === 1 && normalized[0] === "") {
 9 |           // We must be trying to .. past the root!
10 |           throw new Error("invalid path");
11 |         } else if (normalized.length === 0 ||
12 |                    normalized[normalized.length - 1] === "..") {
13 |           normalized.push("..");
14 |         } else {
15 |           normalized.pop();
16 |         }
17 |       } else {
18 |         normalized.push(bit);
19 |       }
20 |     });
21 |     return normalized.join("/");
22 |   }
23 | 
24 |   // `base_url` comes from the base.html template for this theme.
25 |   var REL_BASE_URL = base_url;
26 |   var ABS_BASE_URL = normalizePath(window.location.pathname + "/" +
27 |                                    REL_BASE_URL);
28 |   var CURRENT_VERSION = ABS_BASE_URL.split("/").pop();
29 | 
30 |   function makeSelect(options, selected) {
31 |     var select = document.createElement("select");
32 | 
33 |     options.forEach(function(i) {
34 |       var option = new Option(i.text, i.value, undefined,
35 |                               i.value === selected);
36 |       select.add(option);
37 |     });
38 | 
39 |     return select;
40 |   }
41 | 
42 |   var xhr = new XMLHttpRequest();
43 |   xhr.open("GET", REL_BASE_URL + "/../versions.json");
44 |   xhr.onload = function() {
45 |     var versions = JSON.parse(this.responseText);
46 | 
47 |     var realVersion = versions.find(function(i) {
48 |       return i.version === CURRENT_VERSION ||
49 |              i.aliases.includes(CURRENT_VERSION);
50 |     }).version;
51 | 
52 |     var select = makeSelect(versions.map(function(i) {
53 |       return {text: i.title, value: i.version};
54 |     }), realVersion);
55 |     select.id = "version-selector";
56 |     select.addEventListener("change", function(event) {
57 |       window.location.href = REL_BASE_URL + "/../" + this.value;
58 |     });
59 | 
60 |     var title = document.querySelector("div.wy-side-nav-search");
61 |     title.insertBefore(select, title.querySelector(".icon-home").nextSibling);
62 |   };
63 |   xhr.send();
64 | });
65 | 


--------------------------------------------------------------------------------
/docs/release_notes/release_0_14.md:
--------------------------------------------------------------------------------
 1 | # Release notes for 0.14
 2 | 
 3 | ## Parenthesized groups
 4 | 
 5 | RHS in grammar rules now can used parentheses to group element. These groups
 6 | behave similar to any other rule reference. E.g. repetitions and assignments can
 7 | be applied.
 8 | 
 9 | Previously:
10 | 
11 | ```nohiglight
12 | S: a_then_b*[comma] c;
13 | a_then_b: a b;
14 | ...
15 | 
16 | ```
17 | 
18 | Now you can write:
19 | 
20 | ```nohiglight
21 | S: (a b)*[comma] c;
22 | ...
23 | 
24 | ```
25 | 
26 | You can nest groups, combine with choice operator etc.
27 | 
28 | ```nohiglight
29 | S: ( (a c)+ | b)*[comma] c;
30 | 
31 | ```
32 | For more info see a [new section in the docs](../grammar_language.md#parenthesized-groups).
33 | 
34 | 
35 | ## GLR forest
36 | 
37 | GLR now returns `Forest` object. This object represents all the possible solutions.
38 | Forest can be iterated, indexed, yielding lazy parse trees.
39 | 
40 | See [more info in the docs](../parse_forest_trees.md).
41 | 
42 | 
43 | ## Extensions to `pglr` command
44 | 
45 | `pglr trace` now provides `--frontier` flag to organize GSS nodes into
46 | frontiers. See [the docs](../pglr.md#tracing-glr-parsing).
47 | 
48 | `pglr parse` is added for parsing input strings and files and producing forests
49 | and trees as either string or graphical dot representation. See [the
50 | docs](../pglr.md#parsing-inputs).
51 | 
52 | 
53 | ## Support for visitor
54 | 
55 | *Visitor pattern* is supported as a `visitor` function enabling depth-first
56 | processing of tree-like structures. See [the
57 | docs](../parse_forest_trees.md#visitor).
58 | 
59 | 
60 | ## New examples
61 | 
62 | Several new examples are added:
63 | 
64 | - [JSON](https://github.com/igordejanovic/parglare/tree/master/examples/json)
65 | - [BibTeX](https://github.com/igordejanovic/parglare/tree/master/examples/bibtex)
66 | - [Java](https://github.com/igordejanovic/parglare/tree/master/examples/java) (based on Java SE 16 version)
67 | 
68 | 
69 | ## Performance tests
70 | 
71 | New performance tests based on new example grammars are provided in
72 | [tests/perf](https://github.com/igordejanovic/parglare/tree/master/tests/perf).
73 | Run `runall.sh` and read the reports in
74 | [tests/perf/reports](https://github.com/igordejanovic/parglare/tree/master/tests/perf/reports).
75 | 


--------------------------------------------------------------------------------
/docs/release_notes/release_0_15.md:
--------------------------------------------------------------------------------
 1 | # Release notes for 0.15
 2 | 
 3 | This release should be fully backward compatible so the upgrade should require
 4 | no changes.
 5 | 
 6 | ## Greedy repetitions
 7 | 
 8 | The most important new feature in this release is a support for greedy
 9 | repetition. Read more in [the docs](../../grammar_language/#greedy-repetitions).
10 | 
11 | ## New way to disambiguate the GLR forest
12 | 
13 | A new and recommended way for dynamic disambiguation is by using
14 | `forest.disambiguate`. Read more in [the docs](../../disambiguation/#disambiguation-of-a-glr-forest).
15 | 
16 | ## Optimized getting of the first tree from the GLR forest
17 | 
18 | If you are not interested into analyzing the forest and comparing trees but just
19 | want to get any valid tree you can use `forest.get_first_tree()` which is
20 | optimized to avoid tree enumeration that might be costly. The returned tree is
21 | fully unpacked and doesn't use proxies, i.e. it contains only `NodeTerm` and
22 | `NodeNonTerm` instances.
23 | 


--------------------------------------------------------------------------------
/docs/style.css:
--------------------------------------------------------------------------------
1 | .rst-content .section ol p, .rst-content .section ul p { margin-bottom: 6px; margin-top: 12px;}
2 | 


--------------------------------------------------------------------------------
/examples/bibtex/bibtex.pg:
--------------------------------------------------------------------------------
 1 | BibFile:
 2 |     entries=BibEntry+
 3 | ;
 4 | 
 5 | BibEntry: BibLineComment | BibComment | BibPreamble | BibString | BibRefEntry;
 6 | 
 7 | BibLineComment:
 8 |     text=BibCommentLine
 9 | ;
10 | 
11 | BibComment: '@' 'comment' '{'
12 |         text=BlockCommentBody
13 |     '}'
14 | ;
15 | 
16 | BibPreamble: '@' 'preamble' '{'
17 |         value=Value
18 |     '}'
19 | ;
20 | 
21 | BibString: '@' type='string' '{'
22 |         fields=BibField*[Comma]
23 |     '}'
24 | ;
25 | 
26 | BibRefEntry: '@' type=BibType '{' key=BibKey Comma
27 |         fields=BibField*[Comma]
28 |         Comma?
29 |     '}'
30 | ;
31 | 
32 | BibField: name=Ident '=' value=Value;
33 | 
34 | 
35 | Value: '"' Piece+[Hash] '"' | Piece+[Hash];
36 | Piece: '{' Piece* '}' | InBraces;
37 | 
38 | 
39 | terminals
40 | 
41 | Comma: ',';
42 | Hash: '#';
43 | //BibType: /(?!(string|comment|preamble))\w+/;
44 | BibType: /\w+/;
45 | BibKey: /[^, =\t\}\n]+/;
46 | Ident:  /[^\d]([^ \t\"#%\'\(\),={}])+/;
47 | InBraces: /[^{}]+/;
48 | 
49 | BibCommentLine: /[^@][^\n]+/;
50 | BlockCommentBody: /[^\}]*/;
51 | 


--------------------------------------------------------------------------------
/examples/bibtex/bibtex.py:
--------------------------------------------------------------------------------
 1 | from os.path import dirname, join
 2 | 
 3 | from parglare import GLRParser, Grammar
 4 | 
 5 | 
 6 | def main(debug=False):
 7 |     THIS_DIR = dirname(__file__)
 8 |     g = Grammar.from_file(join(THIS_DIR, 'bibtex.pg'))
 9 | 
10 |     parser = GLRParser(g)
11 | 
12 |     forest = parser.parse_file(join(THIS_DIR, 'test.bib'))
13 |     print(f'Solutions: {len(forest)}')
14 |     print(f'Ambiguities: {forest.ambiguities}')
15 |     if debug:
16 |         with open('forest.txt', 'w') as f:
17 |             f.write(forest.to_str())
18 |         print('See forest.txt')
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     main(debug=True)
23 | 


--------------------------------------------------------------------------------
/examples/bibtex/test.bib:
--------------------------------------------------------------------------------
 1 | @comment{x-kbibtex-encoding=utf-8}
 2 | 
 3 | @article{knuth1965,
 4 |     title = {On the translation of languages from left to right},
 5 |     author = {Knuth, Donald E},
 6 |     year = {1965},
 7 |     journal = {Information and control},
 8 |     number = {6},
 9 |     pages = {607–639},
10 |     publisher = {Elsevier},
11 |     volume = {8},
12 |     x-stars = {94.89},
13 |     keywords = {parsing, LR, dslbook}
14 | }
15 | 
16 | @inproceedings{tomita1984,
17 |     title = {LR parsers for natural languages},
18 |     author = {Tomita, Masaru},
19 |     booktitle = {10th International Conference on Computational Linguistics and 22nd Annual Meeting of the Association for Computational Linguistics},
20 |     pages = {354--357},
21 |     year = {1984}
22 | }
23 | 
24 | @inproceedings{tomita1985,
25 |     title = {{An Efficient Context-Free Parsing Algorithm for Natural Languages.}},
26 |     author = {Tomita, Masaru},
27 |     year = {1985},
28 |     booktitle = {{IJCAI}},
29 |     pages = {756–764},
30 |     volume = {2}
31 | }
32 | 
33 | @incollection{nozohoor1991,
34 |     title = {GLR Parsing for $\varepsilon$-Grammers},
35 |     author = {Nozohoor-Farshi, Rahman},
36 |     booktitle = {Generalized LR parsing},
37 |     file = {docs/tomita2012.pdf},
38 |     pages = {61--75},
39 |     year = {1991},
40 |     publisher = {Springer}
41 | }
42 | 
43 | @article{scott2007,
44 |     title = {BRNGLR: a cubic Tomita-style GLR parsing algorithm},
45 |     author = {Scott, Elizabeth and Johnstone, Adrian and Economopoulos, Rob},
46 |     journal = {Acta informatica},
47 |     volume = {44},
48 |     number = {6},
49 |     pages = {427--461},
50 |     year = {2007},
51 |     publisher = {Springer}
52 | }
53 | 
54 | @inproceedings{mcpeak2004,
55 |     title = {Elkhound: A fast, practical GLR parser generator},
56 |     author = {McPeak, Scott and Necula, George C},
57 |     booktitle = {International Conference on Compiler Construction},
58 |     pages = {73--88},
59 |     year = {2004},
60 |     organization = {Springer}
61 | }
62 | 


--------------------------------------------------------------------------------
/examples/c/README.md:
--------------------------------------------------------------------------------
1 | This is an example of C parsing. Still WIP.
2 | 


--------------------------------------------------------------------------------
/examples/c/c_example.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This is work in progress
 3 | """
 4 | import os
 5 | import re
 6 | 
 7 | from parglare import GLRParser, Grammar
 8 | 
 9 | 
10 | def main(debug=False):
11 |     this_folder = os.path.dirname(__file__)
12 |     g = Grammar.from_file(os.path.join(this_folder, 'c.pg'),
13 |                           re_flags=re.MULTILINE | re.VERBOSE)
14 |     parser = GLRParser(g, debug=debug, debug_colors=True)
15 | 
16 |     # The input is C code after preprocessing
17 |     forest = parser.parse_file(os.path.join(this_folder, 'example.c'))
18 | 
19 |     print('Solutions: ', len(forest))
20 |     print('Ambiguities: ', forest.ambiguities)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     main(debug=False)
25 | 


--------------------------------------------------------------------------------
/examples/calc/calc.py:
--------------------------------------------------------------------------------
 1 | from parglare import Grammar, Parser
 2 | from parglare.actions import pass_inner, pass_single
 3 | 
 4 | grammar = r"""
 5 | Calc: Assignments E;
 6 | Assignments: Assignment | Assignments Assignment | EMPTY;
 7 | Assignment: VariableName "=" Number;
 8 | 
 9 | E: E "+" E {left, 1}
10 |  | E "-" E {left, 1}
11 |  | E "*" E {left, 2}
12 |  | E "/" E {left, 2}
13 |  | "(" E ")"
14 |  | VariableRef
15 |  | Number
16 | ;
17 | 
18 | VariableRef: VariableName;
19 | 
20 | terminals
21 | VariableName: /[a-zA-Z_][_a-zA-Z0-9]*/;
22 | Number: /\d+(\.\d+)?/;
23 | """
24 | 
25 | 
26 | # Semantic Actions
27 | def act_assignment(context, nodes):
28 |     """Semantic action for variable assignment."""
29 | 
30 |     name = nodes[0]
31 |     number = nodes[2]
32 | 
33 |     if context.extra is None:
34 |         context.extra = {}
35 | 
36 |     context.extra[name] = number
37 | 
38 | 
39 | actions = {
40 |     "Calc": lambda _, nodes: nodes[1],
41 |     "Assignment": act_assignment,
42 |     "E": [lambda _, nodes: nodes[0] + nodes[2],
43 |           lambda _, nodes: nodes[0] - nodes[2],
44 |           lambda _, nodes: nodes[0] * nodes[2],
45 |           lambda _, nodes: nodes[0] / nodes[2],
46 |           pass_inner,
47 |           pass_single,
48 |           pass_single],
49 |     "Number": lambda _, value: float(value),
50 |     "VariableRef": lambda context, nodes: context.extra[nodes[0]],
51 | }
52 | 
53 | 
54 | def main(debug=False):
55 |     g = Grammar.from_string(grammar, debug=debug, debug_colors=True)
56 |     parser = Parser(g, actions=actions, debug=debug, debug_colors=True)
57 | 
58 |     input_str = """
59 |     a = 5
60 |     b = 10
61 | 
62 |     a + 56.4 / 3 * 5 - b + 8 * 3
63 |     """
64 | 
65 |     res = parser.parse(input_str)
66 | 
67 |     assert res == 5. + 56.4 / 3 * 5 - 10 + 8 * 3
68 |     print("Input:\n", input_str)
69 |     print("Result = ", res)
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     main(debug=True)
74 | 


--------------------------------------------------------------------------------
/examples/csv/csv.py:
--------------------------------------------------------------------------------
 1 | from parglare import Grammar, Parser
 2 | 
 3 | grammar = r"""
 4 | @pass_inner
 5 | CSVFile: OptionalNewLines Records OptionalNewLines;
 6 | @collect_sep
 7 | Records: Records OptionalNewLines Record| Record;
 8 | @pass_single
 9 | Record: Fields NewLine;
10 | @collect_sep
11 | Fields: Fields "," Field | Field;
12 | Field: QuotedField | FieldContent;
13 | NewLines: NewLine | NewLines NewLine;
14 | OptionalNewLines: NewLines | EMPTY;
15 | @pass_inner
16 | QuotedField: "\"" FieldContentQuoted "\"";
17 | 
18 | terminals
19 | FieldContent: /[^,\n]+/;
20 | FieldContentQuoted: /(("")|([^"]))+/;
21 | NewLine: "\n";
22 | """
23 | 
24 | 
25 | def main(debug=False):
26 |     g = Grammar.from_string(grammar)
27 |     parser = Parser(g, ws='\t ', debug=debug, debug_colors=True)
28 | 
29 |     input_str = """
30 |     First, Second with multiple words, "Third, quoted with comma"
31 | 
32 | 
33 |     Next line, Previous line has newlines, 2
34 |     Another Line, 34.45, "Quoted", field
35 | 
36 | 
37 |     """
38 | 
39 |     res = parser.parse(input_str)
40 | 
41 |     print("Input:\n", input_str)
42 |     print("Result = ", res)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main(debug=True)
47 | 


--------------------------------------------------------------------------------
/examples/custom_table_caching/.gitignore:
--------------------------------------------------------------------------------
1 | _table.py
2 | 


--------------------------------------------------------------------------------
/examples/custom_table_caching/README.md:
--------------------------------------------------------------------------------
 1 | Custom parse table caching example
 2 | ==================================
 3 | 
 4 | Parse table is stored as a python module (`_table.py`). To generate it, in this directory run:
 5 | 
 6 |     python compile.py
 7 | 
 8 | Then precomputed parse table is used in parser script
 9 | 
10 |     python parser.py
11 | 


--------------------------------------------------------------------------------
/examples/custom_table_caching/compile.py:
--------------------------------------------------------------------------------
 1 | from grammar import grammar, start_symbol
 2 | 
 3 | from parglare.tables import LALR, create_table
 4 | from parglare.tables.persist import table_to_serializable
 5 | 
 6 | table = create_table(
 7 |     grammar,
 8 |     start_production=grammar.get_production_id(start_symbol),
 9 |     itemset_type=LALR,
10 |     prefer_shifts=False,
11 |     prefer_shifts_over_empty=False,
12 | )
13 | serializable_table = table_to_serializable(table)
14 | 
15 | with open('_table.py', 'w') as f:
16 |     f.write('table = ')
17 |     f.write(repr(serializable_table))
18 | 


--------------------------------------------------------------------------------
/examples/custom_table_caching/grammar.py:
--------------------------------------------------------------------------------
1 | from parglare import Grammar
2 | 
3 | grammar = Grammar.from_string("""
4 |     start: ab;
5 |     ab: "a" ab "b" | EMPTY;
6 | """)
7 | 
8 | start_symbol = 'start'
9 | 


--------------------------------------------------------------------------------
/examples/custom_table_caching/parser.py:
--------------------------------------------------------------------------------
 1 | from _table import table
 2 | from grammar import grammar
 3 | 
 4 | from parglare import GLRParser
 5 | from parglare.tables.persist import table_from_serializable
 6 | 
 7 | table = table_from_serializable(table, grammar)
 8 | parser = GLRParser(grammar, table=table)
 9 | 
10 | print(parser.parse('aaabbb'))
11 | 


--------------------------------------------------------------------------------
/examples/java/README.md:
--------------------------------------------------------------------------------
 1 | This is an implementation of Java SE 16 Edition based on the spec defined in
 2 | https://docs.oracle.com/javase/specs/jls/se16/html/jls-19.html
 3 | 
 4 | Since the grammar is derived from the official specification, please see the
 5 | legal terms from the specification if you plan to use it in your project.
 6 | https://docs.oracle.com/javase/specs/jls/se16/html/index.html
 7 | 
 8 | The test input file is from the [Spring Boot
 9 | project](https://github.com/spring-projects/spring-boot).
10 | 


--------------------------------------------------------------------------------
/examples/java/java_example.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | from parglare import GLRParser, Grammar
 5 | 
 6 | 
 7 | def main(debug=False):
 8 |     this_folder = os.path.dirname(__file__)
 9 | 
10 |     g = Grammar.from_file(os.path.join(this_folder, 'java16.pg'))
11 |     parser = GLRParser(g, debug=debug, debug_colors=True)
12 | 
13 |     file_name = os.path.join(this_folder, 'TomcatServletWebServerFactory.java')
14 |     file_size = os.path.getsize(file_name)
15 | 
16 |     t_start = time.time()
17 |     forest = parser.parse_file(file_name)
18 |     t_end = time.time()
19 | 
20 |     print(f'Elapsed time: {t_end - t_start:.2f}', 'sec')
21 |     print(f'Speed = {file_size/1000/(t_end - t_start):.2f}',
22 |           'KB/sec\n')
23 |     print('Solutions: ', forest.solutions)
24 |     print('Ambiguities: ', forest.ambiguities)
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main(debug=False)
29 | 


--------------------------------------------------------------------------------
/examples/json/example1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "glossary": {
 3 |         "title": "example glossary",
 4 | 		"GlossDiv": {
 5 |             "title": "S",
 6 | 			"GlossList": {
 7 |                 "GlossEntry": {
 8 |                     "ID": "SGML",
 9 | 					"SortAs": "SGML",
10 | 					"GlossTerm": "Standard Generalized Markup Language",
11 | 					"Acronym": "SGML",
12 | 					"Abbrev": "ISO 8879:1986",
13 | 					"GlossDef": {
14 |                         "para": "A meta-markup language, used to create markup languages such as DocBook.",
15 | 						"GlossSeeAlso": ["GML", "XML"]
16 |                     },
17 | 					"GlossSee": "markup"
18 |                 }
19 |             }
20 |         }
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/json/example2.json:
--------------------------------------------------------------------------------
 1 | {"menu": {
 2 |   "id": "file",
 3 |   "value": "File",
 4 |   "popup": {
 5 |     "menuitem": [
 6 |       {"value": "New", "onclick": "CreateNewDoc()"},
 7 |       {"value": "Open", "onclick": "OpenDoc()"},
 8 |       {"value": "Close", "onclick": "CloseDoc()"}
 9 |     ]
10 |   }
11 | }}
12 | 


--------------------------------------------------------------------------------
/examples/json/example3.json:
--------------------------------------------------------------------------------
 1 | {"widget": {
 2 |     "debug": "on",
 3 |     "window": {
 4 |         "title": "Sample Konfabulator Widget",
 5 |         "name": "main_window",
 6 |         "width": 500,
 7 |         "height": 500
 8 |     },
 9 |     "image": { 
10 |         "src": "Images/Sun.png",
11 |         "name": "sun1",
12 |         "hOffset": 250,
13 |         "vOffset": 250,
14 |         "alignment": "center"
15 |     },
16 |     "text": {
17 |         "data": "Click Here",
18 |         "size": 36,
19 |         "style": "bold",
20 |         "name": "text1",
21 |         "hOffset": 250,
22 |         "vOffset": 100,
23 |         "alignment": "center",
24 |         "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;"
25 |     }
26 | }}
27 | 
28 | 


--------------------------------------------------------------------------------
/examples/json/example4.json:
--------------------------------------------------------------------------------
 1 | {"web-app": {
 2 |   "servlet": [   
 3 |     {
 4 |       "servlet-name": "cofaxCDS",
 5 |       "servlet-class": "org.cofax.cds.CDSServlet",
 6 |       "init-param": {
 7 |         "configGlossary:installationAt": "Philadelphia, PA",
 8 |         "configGlossary:adminEmail": "ksm@pobox.com",
 9 |         "configGlossary:poweredBy": "Cofax",
10 |         "configGlossary:poweredByIcon": "/images/cofax.gif",
11 |         "configGlossary:staticPath": "/content/static",
12 |         "templateProcessorClass": "org.cofax.WysiwygTemplate",
13 |         "templateLoaderClass": "org.cofax.FilesTemplateLoader",
14 |         "templatePath": "templates",
15 |         "templateOverridePath": "",
16 |         "defaultListTemplate": "listTemplate.htm",
17 |         "defaultFileTemplate": "articleTemplate.htm",
18 |         "useJSP": false,
19 |         "jspListTemplate": "listTemplate.jsp",
20 |         "jspFileTemplate": "articleTemplate.jsp",
21 |         "cachePackageTagsTrack": 200,
22 |         "cachePackageTagsStore": 200,
23 |         "cachePackageTagsRefresh": 60,
24 |         "cacheTemplatesTrack": 100,
25 |         "cacheTemplatesStore": 50,
26 |         "cacheTemplatesRefresh": 15,
27 |         "cachePagesTrack": 200,
28 |         "cachePagesStore": 100,
29 |         "cachePagesRefresh": 10,
30 |         "cachePagesDirtyRead": 10,
31 |         "searchEngineListTemplate": "forSearchEnginesList.htm",
32 |         "searchEngineFileTemplate": "forSearchEngines.htm",
33 |         "searchEngineRobotsDb": "WEB-INF/robots.db",
34 |         "useDataStore": true,
35 |         "dataStoreClass": "org.cofax.SqlDataStore",
36 |         "redirectionClass": "org.cofax.SqlRedirection",
37 |         "dataStoreName": "cofax",
38 |         "dataStoreDriver": "com.microsoft.jdbc.sqlserver.SQLServerDriver",
39 |         "dataStoreUrl": "jdbc:microsoft:sqlserver://LOCALHOST:1433;DatabaseName=goon",
40 |         "dataStoreUser": "sa",
41 |         "dataStorePassword": "dataStoreTestQuery",
42 |         "dataStoreTestQuery": "SET NOCOUNT ON;select test='test';",
43 |         "dataStoreLogFile": "/usr/local/tomcat/logs/datastore.log",
44 |         "dataStoreInitConns": 10,
45 |         "dataStoreMaxConns": 100,
46 |         "dataStoreConnUsageLimit": 100,
47 |         "dataStoreLogLevel": "debug",
48 |         "maxUrlLength": 500}},
49 |     {
50 |       "servlet-name": "cofaxEmail",
51 |       "servlet-class": "org.cofax.cds.EmailServlet",
52 |       "init-param": {
53 |       "mailHost": "mail1",
54 |       "mailHostOverride": "mail2"}},
55 |     {
56 |       "servlet-name": "cofaxAdmin",
57 |       "servlet-class": "org.cofax.cds.AdminServlet"},
58 |  
59 |     {
60 |       "servlet-name": "fileServlet",
61 |       "servlet-class": "org.cofax.cds.FileServlet"},
62 |     {
63 |       "servlet-name": "cofaxTools",
64 |       "servlet-class": "org.cofax.cms.CofaxToolsServlet",
65 |       "init-param": {
66 |         "templatePath": "toolstemplates/",
67 |         "log": 1,
68 |         "logLocation": "/usr/local/tomcat/logs/CofaxTools.log",
69 |         "logMaxSize": "",
70 |         "dataLog": 1,
71 |         "dataLogLocation": "/usr/local/tomcat/logs/dataLog.log",
72 |         "dataLogMaxSize": "",
73 |         "removePageCache": "/content/admin/remove?cache=pages&id=",
74 |         "removeTemplateCache": "/content/admin/remove?cache=templates&id=",
75 |         "fileTransferFolder": "/usr/local/tomcat/webapps/content/fileTransferFolder",
76 |         "lookInContext": 1,
77 |         "adminGroupID": 4,
78 |         "betaServer": true}}],
79 |   "servlet-mapping": {
80 |     "cofaxCDS": "/",
81 |     "cofaxEmail": "/cofaxutil/aemail/*",
82 |     "cofaxAdmin": "/admin/*",
83 |     "fileServlet": "/static/*",
84 |     "cofaxTools": "/tools/*"},
85 |  
86 |   "taglib": {
87 |     "taglib-uri": "cofax.tld",
88 |     "taglib-location": "/WEB-INF/tlds/cofax.tld"}}}
89 | 


--------------------------------------------------------------------------------
/examples/json/example5.json:
--------------------------------------------------------------------------------
 1 | {"menu": {
 2 |     "header": "SVG Viewer",
 3 |     "items": [
 4 |         {"id": "Open"},
 5 |         {"id": "OpenNew", "label": "Open New"},
 6 |         null,
 7 |         {"id": "ZoomIn", "label": "Zoom In"},
 8 |         {"id": "ZoomOut", "label": "Zoom Out"},
 9 |         {"id": "OriginalView", "label": "Original View"},
10 |         null,
11 |         {"id": "Quality"},
12 |         {"id": "Pause"},
13 |         {"id": "Mute"},
14 |         null,
15 |         {"id": "Find", "label": "Find..."},
16 |         {"id": "FindAgain", "label": "Find Again"},
17 |         {"id": "Copy"},
18 |         {"id": "CopyAgain", "label": "Copy Again"},
19 |         {"id": "CopySVG", "label": "Copy SVG"},
20 |         {"id": "ViewSVG", "label": "View SVG"},
21 |         {"id": "ViewSource", "label": "View Source"},
22 |         {"id": "SaveAs", "label": "Save As"},
23 |         null,
24 |         {"id": "Help"},
25 |         {"id": "About", "label": "About Adobe CVG Viewer..."}
26 |     ]
27 | }}
28 | 


--------------------------------------------------------------------------------
/examples/json/json.pg:
--------------------------------------------------------------------------------
 1 | value: FALSE | TRUE | NULL | object | array | number | string;
 2 | object: "{" member*[COMMA] "}";
 3 | member: string ":" value;
 4 | array: "[" value*[COMMA] "]";
 5 | 
 6 | terminals
 7 | FALSE: 'false';
 8 | TRUE: 'true';
 9 | NULL: 'null';
10 | COMMA: ',';
11 | number: /-?\d+(\.\d+)?(e|E[-+]?\d+)?/;
12 | string: /"((\\")|[^"])*"/;
13 | 


--------------------------------------------------------------------------------
/examples/json/json_example.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from parglare import Grammar, Parser
 4 | 
 5 | 
 6 | def main(debug=False):
 7 |     this_folder = os.path.dirname(__file__)
 8 |     g = Grammar.from_file(os.path.join(this_folder, 'json.pg'))
 9 |     parser = Parser(g, debug=debug, debug_colors=True)
10 | 
11 |     for i in range(5):
12 |         result = parser.parse_file(os.path.join(this_folder, f'example{i+1}.json'))
13 |         print(result)
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     main(debug=True)
18 | 


--------------------------------------------------------------------------------
/examples/molecular_formulas/README.md:
--------------------------------------------------------------------------------
 1 | This example is based
 2 | on
 3 | [article by Andrew Dalke](http://www.dalkescientific.com/writings/diary/archive/2007/11/03/antlr_java.html) comparing
 4 | ANTLR and PLY performance in parsing molecular formulas.
 5 | 
 6 | An example is modified to compare PLY and parglare. You can see the difference
 7 | in styles of grammar/actions definition and parser construction.
 8 | 
 9 | By running `python run_test.py` you will see the speed difference. PLY is faster
10 | in this tests.
11 | 


--------------------------------------------------------------------------------
/examples/molecular_formulas/parglare_mw.py:
--------------------------------------------------------------------------------
 1 | """Calculate the molecular weight given a molecular formula
 2 | 
 3 | Parse the formula using parglare.
 4 | This example is based on the example from
 5 | PLY compared with pyparsing and ANTLR by Andrew Dalke
 6 | http://www.dalkescientific.com/writings/diary/archive/2007/11/03/antlr_java.html
 7 | """
 8 | from parglare import Grammar, Parser
 9 | 
10 | grammar = r"""
11 | mw: EMPTY | formula;
12 | formula: species | formula species;
13 | species: ATOM DIGITS | ATOM;
14 | 
15 | terminals
16 | DIGITS: /\d+/;
17 | """
18 | 
19 | mw_table = {
20 |     'H': 1.00794,
21 |     'C': 12.001,
22 |     'Cl': 35.453,
23 |     'O': 15.999,
24 |     'S': 32.06,
25 | }
26 | 
27 | atom_names = sorted(
28 |     mw_table.keys(),
29 |     key=lambda symbol: (symbol[0], -len(symbol), symbol))
30 | 
31 | # Creates a pattern like:  Cl|C|H|O|S
32 | atom_pattern = "|".join(atom_names)
33 | 
34 | # Extend grammar definition with the ATOM rule
35 | grammar += f'\nATOM: /{atom_pattern}/;'
36 | 
37 | actions = {
38 |     'mw': [lambda _, __: 0.0,
39 |            lambda _, nodes: nodes[0]],
40 |     'formula': [lambda _, nodes: nodes[0],
41 |                 lambda _, nodes: nodes[0] + nodes[1]],
42 |     'species': [lambda _, nodes: nodes[0] * nodes[1],
43 |                 lambda _, nodes: nodes[0]],
44 |     'ATOM': lambda _, value: mw_table[value],
45 |     'DIGITS': lambda _, value: int(value)
46 | }
47 | 
48 | parser = Parser(Grammar.from_string(grammar), actions=actions)
49 | 
50 | 
51 | def calculate_mw(formula):
52 |     return parser.parse(formula)
53 | 


--------------------------------------------------------------------------------
/examples/molecular_formulas/ply_mw.py:
--------------------------------------------------------------------------------
  1 | """Calculate the molecular weight given a molecular formula
  2 | 
  3 | Parse the formula using PLY.
  4 | """
  5 | # ply_mw.py
  6 | 
  7 | import ply.yacc as yacc
  8 | from ply import lex
  9 | from ply.lex import TOKEN
 10 | 
 11 | 
 12 | class ParseError(Exception):
 13 |     def __init__(self, msg, offset):
 14 |         self.msg = msg
 15 |         self.offset = offset
 16 | 
 17 |     def __repr__(self):
 18 |         return f"ParseError({self.msg!r}, {self.offset!r})"
 19 | 
 20 |     def __str__(self):
 21 |         return f"{self.msg} at position {self.offset + 1}"
 22 | 
 23 | 
 24 | # Define the lexer
 25 | 
 26 | tokens = (
 27 |     "ATOM",
 28 |     "DIGITS",
 29 | )
 30 | 
 31 | mw_table = {
 32 |     'H': 1.00794,
 33 |     'C': 12.001,
 34 |     'Cl': 35.453,
 35 |     'O': 15.999,
 36 |     'S': 32.06,
 37 | }
 38 | 
 39 | 
 40 | # I don't want to duplicate the atom names so extract the
 41 | # keys to make the lexer pattern.
 42 | 
 43 | # Sort order is:
 44 | #   - alphabetically on first character, to make it easier
 45 | # for a human to look at and debug any problems
 46 | #
 47 | #   - then by the length of the symbol; two letters before 1
 48 | # Needed because Python's regular expression matcher
 49 | # uses "first match" not "longest match" rules.
 50 | # For example, "C|Cl" matches only the "C" in "Cl"
 51 | # The "-" in "-len(symbol)" is a trick to reverse the sort order.
 52 | #
 53 | #   - then by the full symbol, to make it easier for people
 54 | 
 55 | # (This is more complicated than needed; it's to show how
 56 | # this approach can scale to all 100+ known and named elements)
 57 | 
 58 | atom_names = sorted(
 59 |     mw_table.keys(),
 60 |     key=lambda symbol: (symbol[0], -len(symbol), symbol))
 61 | 
 62 | # Creates a pattern like:  Cl|C|H|O|S
 63 | atom_pattern = "|".join(atom_names)
 64 | 
 65 | 
 66 | # Use a relatively new PLY feature to set the __doc__
 67 | # string based on a Python variable.
 68 | @TOKEN(atom_pattern)
 69 | def t_ATOM(t):
 70 |     t.value = mw_table[t.value]
 71 |     return t
 72 | 
 73 | 
 74 | def t_DIGITS(t):
 75 |     r"\d+"
 76 |     t.value = int(t.value)
 77 |     return t
 78 | 
 79 | 
 80 | def t_error(t):
 81 |     raise ParseError("unknown character", t.lexpos)
 82 | 
 83 | 
 84 | lexer = lex.lex()
 85 | 
 86 | # Here's an example of using the lexer
 87 | 
 88 | # data = "H2SO4"
 89 | #
 90 | # lex.input(data)
 91 | #
 92 | # for tok in iter(lex.token, None):
 93 | #     print tok
 94 | 
 95 | # Define the grammar
 96 | 
 97 | 
 98 | # The molecular weight of "" is 0.0
 99 | def p_mw_empty(p):
100 |     "mw : "
101 |     p[0] = 0.0
102 | 
103 | 
104 | def p_mw_formula(p):
105 |     "mw : formula"
106 |     p[0] = p[1]
107 | 
108 | 
109 | def p_first_species_term(p):
110 |     "formula : species"
111 |     p[0] = p[1]
112 | 
113 | 
114 | def p_species_list(p):
115 |     "formula : formula species"
116 |     p[0] = p[1] + p[2]
117 | 
118 | 
119 | def p_species(p):
120 |     "species : ATOM DIGITS"
121 |     p[0] = p[1] * p[2]
122 | 
123 | 
124 | def p_species_default(p):
125 |     "species : ATOM"
126 |     p[0] = p[1]
127 | 
128 | 
129 | def p_error(p):
130 |     raise ParseError("unexpected character", p.lexpos)
131 | 
132 | 
133 | parser = yacc.yacc()
134 | 
135 | # Work around a problem in PLY 2.3 where the first parse does not
136 | # allow a "".  I reported it to the ply mailing list on 2 November.
137 | # This guarantees the first parse will never be "" :)
138 | parser.parse("C")
139 | 
140 | 
141 | # Calculate molecular weight
142 | def calculate_mw(formula):
143 |     return parser.parse(formula, lexer=lexer)
144 | 


--------------------------------------------------------------------------------
/examples/molecular_formulas/run_test.py:
--------------------------------------------------------------------------------
 1 | """Run tests to validate the MW parsers and compare timing results."""
 2 | # compare_mw.py
 3 | 
 4 | import sys
 5 | 
 6 | # time.clock is more accurate under Windows
 7 | import time
 8 | 
 9 | import parglare_mw
10 | import ply_mw
11 | 
12 | if sys.platform == "win32":
13 |     timer = time.clock
14 | else:
15 |     timer = time.time
16 | 
17 | _mw_table = {
18 |     'H': 1.00794,
19 |     'C': 12.001,
20 |     'Cl': 35.453,
21 |     'O': 15.999,
22 |     'S': 32.06,
23 | }
24 | 
25 | _element_names = list(_mw_table.keys())
26 | 
27 | 
28 | def _generate_random_formulas():
29 |     import random
30 |     # Using semi-random values so I can check a wide space
31 |     # Number of terms in the formula
32 |     _possible_lengths = (1, 2, 3, 4, 5, 10, 53, 104)
33 |     # Repeat count for each formula
34 |     _possible_counts = tuple(range(12)) + (88, 91, 106, 107, 200, 1234)
35 |     for _i in range(2500):
36 |         terms = []
37 |         total_mw = 0.0
38 |         # Use a variety of lengths
39 |         for _j in range(random.choice(_possible_lengths)):
40 |             symbol = random.choice(_element_names)
41 |             terms.append(symbol)
42 |             count = random.choice(_possible_counts)
43 |             if count == 1 and random.randint(0, 2) == 1:
44 |                 pass
45 |             else:
46 |                 terms.append(str(count))
47 | 
48 |             total_mw += _mw_table[symbol] * count
49 |         yield total_mw, "".join(terms)
50 | 
51 | 
52 | _selected_formulas = [
53 |     (0.0, ""),
54 |     (1.00794, "H"),
55 |     (1.00794, "H1"),
56 |     (32.06, "S"),
57 |     (12.001+1.00794*4, "CH4"),
58 |     ]
59 | 
60 | good_test_data = (_selected_formulas +
61 |                   list(_generate_random_formulas()))
62 | 
63 | 
64 | def do_tests(calculate_mw):
65 |     start_time = timer()
66 |     for expected_mw, formula in good_test_data:
67 |         got_mw = calculate_mw(formula)
68 |         if expected_mw != got_mw:
69 |             raise AssertionError(f"{formula!r} expected {expected_mw!r} got {got_mw!r}")
70 |     return timer() - start_time
71 | 
72 | 
73 | print("Testing", len(good_test_data), "formulas")
74 | 
75 | # Evaluate everything with parglare
76 | parglare_time = do_tests(parglare_mw.calculate_mw)
77 | print("parglare", parglare_time)
78 | 
79 | # Evaluate everything with PLY
80 | ply_time = do_tests(ply_mw.calculate_mw)
81 | print("PLY", ply_time)
82 | 
83 | print("ratio = %.02f" % (parglare_time / ply_time))
84 | 
85 | # I really should test that they handle invalid formulas ...
86 | 


--------------------------------------------------------------------------------
/examples/quick_intro.py:
--------------------------------------------------------------------------------
 1 | from parglare import Grammar, Parser
 2 | 
 3 | grammar = r"""
 4 | E: E '+' E  {left, 1}
 5 |  | E '-' E  {left, 1}
 6 |  | E '*' E  {left, 2}
 7 |  | E '/' E  {left, 2}
 8 |  | E '^' E  {right, 3}
 9 |  | '(' E ')'
10 |  | number;
11 | 
12 | terminals
13 | number: /\d+(\.\d+)?/;
14 | """
15 | 
16 | actions = {
17 |     "E": [lambda _, n: n[0] + n[2],
18 |           lambda _, n: n[0] - n[2],
19 |           lambda _, n: n[0] * n[2],
20 |           lambda _, n: n[0] / n[2],
21 |           lambda _, n: n[0] ** n[2],
22 |           lambda _, n: n[1],
23 |           lambda _, n: n[0]],
24 |     "number": lambda _, value: float(value),
25 | }
26 | 
27 | g = Grammar.from_string(grammar)
28 | parser = Parser(g, debug=True, actions=actions)
29 | 
30 | result = parser.parse("34 + 4.6 / 2 * 4^2^2 + 78")
31 | 
32 | print("Result = ", result)
33 | 
34 | # Output
35 | # -- Debugging/tracing output with detailed info about grammar, productions,
36 | # -- terminals and nonterminals, DFA states, parsing progress,
37 | # -- and at the end of the output:
38 | # Result = 700.8
39 | 


--------------------------------------------------------------------------------
/examples/rhapsody/rhapsody.pg:
--------------------------------------------------------------------------------
 1 | Model:        Header Object;
 2 | Object:       '{' ID Property+ '}';
 3 | Property:     '-' ID '=' Values SemiColon?;
 4 | Property:     '-' ID '=' SemiColon;
 5 | Values:       Value  | Values SemiColon? Value;
 6 | Value:        STRING | INT | FLOAT | GUID | Object | ID;
 7 | 
 8 | 
 9 | terminals
10 | 
11 | STRING: /("(\\"|[^"])*")|(\'(\\\'|[^\'])*\')/;
12 | 
13 | // INT and FLOAT are ambiguous. Prefer INT if both match are of same length.
14 | INT: /[-+]?[0-9]+\b/ {prefer};
15 | FLOAT: /[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\b/;
16 | 
17 | GUID: /[a-f0-9]*-[a-f0-9]*-[a-f0-9]*-[a-f0-9]*-[a-f0-9]*/;
18 | SemiColon: ';';
19 | ID: /[a-zA-Z_][a-zA-Z_0-9]*/;
20 | Header: /[^\n]*/;
21 | 


--------------------------------------------------------------------------------
/examples/rhapsody/rhapsody.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from parglare import Grammar, Parser
 4 | 
 5 | 
 6 | def main(debug=False):
 7 | 
 8 |     this_folder = os.path.dirname(__file__)
 9 |     grammar_file = os.path.join(this_folder, 'rhapsody.pg')
10 |     g = Grammar.from_file(grammar_file, debug=debug, debug_colors=True)
11 |     parser = Parser(g, build_tree=True, debug=debug, debug_colors=True)
12 | 
13 |     with open(os.path.join(this_folder, 'LightSwitch.rpy')) as f:
14 |         result = parser.parse(f.read())
15 |         print(result.to_str())
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     main(debug=True)
20 | 


--------------------------------------------------------------------------------
/examples/robot/README.md:
--------------------------------------------------------------------------------
 1 | # Robot example
 2 | 
 3 | In this example we have a simple language for moving a robot on a discrete grid.
 4 | There are two type of commands: (1) setting initial position (2) moving in a
 5 | given direction for given steps. If no steps are given 1 is assumed.
 6 | 
 7 | - `robot.pg` - is the grammar of the language. Language supports C-like comments.
 8 | - `program.rbt` - is the "program" executed in this example
 9 | - `robot.py` - is a script that defines semantic actions, constructs and
10 |   executes parser.
11 | - `robot.pg.dot.png` - is a PNG file representing LR automata. This file is
12 |   produced by:
13 | 
14 |   ```
15 |   pglr viz robot.pg
16 |   dot -Tpng -O robot.pg.dot
17 |   ```
18 | 
19 |   `dot` is a part of [GraphViz](http://graphviz.org/) software package.
20 | 


--------------------------------------------------------------------------------
/examples/robot/program.rbt:
--------------------------------------------------------------------------------
1 | begin
2 |   initial 3, 1
3 |   up 4          // go up 4 steps
4 |   left 9
5 |   down          // step is optional
6 |   right 1
7 | end
8 | 


--------------------------------------------------------------------------------
/examples/robot/robot.pg:
--------------------------------------------------------------------------------
 1 | program: "begin" commands=command* "end";
 2 | command: initial | move;
 3 | initial: INITIAL x=INT "," y=INT;
 4 | move: direction=direction steps=INT?;
 5 | direction: "up" | "down" | "left" | "right";
 6 | 
 7 | // Support for C-like comments
 8 | LAYOUT: LayoutItem | LAYOUT LayoutItem | EMPTY;
 9 | LayoutItem: WS | Comment;
10 | 
11 | terminals
12 | INT: /\d+/;
13 | INITIAL: "initial";
14 | WS: /\s+/;
15 | Comment: /\/\/.*/;
16 | 


--------------------------------------------------------------------------------
/examples/robot/robot.pg.dot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/examples/robot/robot.pg.dot.png


--------------------------------------------------------------------------------
/examples/robot/robot.py:
--------------------------------------------------------------------------------
 1 | """This example is inspired by an example from LISA tool
 2 | (http://labraj.uni-mb.si/lisa/) presented during the lecture given by prof.
 3 | Marjan Mernik (http://lpm.uni-mb.si/mernik/) at the University of Novi Sad in
 4 | June, 2011.
 5 | 
 6 | An example of the robot program:
 7 |    begin
 8 |        initial 3, 1
 9 |        up 4
10 |        left 9
11 |        down
12 |        right 1
13 |    end
14 | 
15 | """
16 | import os
17 | 
18 | from parglare import Grammar, Parser, get_collector
19 | 
20 | action = get_collector()
21 | 
22 | 
23 | @action
24 | def INT(_, value):
25 |     return int(value)
26 | 
27 | 
28 | @action
29 | def initial(context, nodes, x, y):
30 |     print(f"Robot initial position set to: {x}, {y}")
31 |     # We use context.extra to keep robot position state.
32 |     context.extra = (x, y)
33 | 
34 | 
35 | @action
36 | def program(context, nodes, commands):
37 |     return context.extra
38 | 
39 | 
40 | @action
41 | def move(context, nodes, direction, steps):
42 |     steps = 1 if steps is None else steps
43 |     print(f"Moving robot {direction} for {steps} steps.")
44 | 
45 |     move = {
46 |         "up": (0, 1),
47 |         "down": (0, -1),
48 |         "left": (-1, 0),
49 |         "right": (1, 0)
50 |     }[direction]
51 | 
52 |     # Calculate new robot position
53 |     x, y = context.extra
54 |     context.extra = (x + steps * move[0], y + steps * move[1])
55 | 
56 | 
57 | def main(debug=False):
58 |     this_folder = os.path.dirname(__file__)
59 |     g = Grammar.from_file(os.path.join(this_folder, 'robot.pg'),
60 |                           debug=debug, debug_colors=True)
61 |     parser = Parser(g, actions=action.all, debug=debug,
62 |                     debug_colors=True)
63 | 
64 |     end_position = parser.parse_file(os.path.join(this_folder, 'program.rbt'))
65 | 
66 |     print(f"Robot stops at position: {end_position}")
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     main(debug=False)
71 | 


--------------------------------------------------------------------------------
/install-dev.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | pip install --upgrade pip || exit 1
4 | pip install -e .[dev] || exit 1
5 | ./install-test.sh
6 | 


--------------------------------------------------------------------------------
/install-test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | pip install --upgrade pip || exit 1
4 | pip install -e .[test] || exit 1
5 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: parglare
 2 | 
 3 | site_description: A pure Python (G)LR parser with integrated scanner
 4 | site_author: Igor Dejanović
 5 | 
 6 | repo_url: https://github.com/igordejanovic/parglare
 7 | theme:
 8 |   name: readthedocs
 9 |   analytics:
10 |     gtag: UA-68681917-1
11 | extra_css: [style.css]
12 | strict: true
13 | 
14 | nav:
15 | - Home: index.md
16 | - User Guide:
17 |   - Getting started: getting_started.md
18 |   - Grammar language: grammar_language.md
19 |   - Grammar class: grammar.md
20 |   - Parser: parser.md
21 |   - Actions: actions.md
22 |   - Common API: common.md
23 |   - Recognizers: recognizers.md
24 |   - Modularization: grammar_modularization.md
25 |   - LR parsing and conflicts: lr_parsing.md
26 |   - Disambiguation: disambiguation.md
27 |   - Parse forest/trees: parse_forest_trees.md
28 |   - Handling errors: handling_errors.md
29 |   - pglr command: pglr.md
30 |   - Debugging: debugging.md
31 | - Release Notes:
32 |   - 0.15: release_notes/release_0_15.md
33 |   - 0.14: release_notes/release_0_14.md
34 | - About:
35 |   - Contributing: about/CONTRIBUTING.md
36 |   - License: about/LICENSE.md
37 | 
38 | markdown_extensions:
39 | - admonition:
40 | - toc:
41 |     permalink: true
42 | 
43 | plugins:
44 |   - mike
45 |   - search
46 | 
47 | copyright: Copyright &copy; <a href="http://igordejanovic.net/">Igor Dejanović</a>.
48 | 


--------------------------------------------------------------------------------
/parglare/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # flake8: NOQA
 3 | from parglare.parser import Parser, Token, pos_to_line_col
 4 | from parglare.tables import LALR, SLR, SHIFT, REDUCE, ACCEPT
 5 | from parglare.glr import GLRParser
 6 | from parglare.grammar import Grammar, NonTerminal, Terminal, \
 7 |     RegExRecognizer, StringRecognizer, EMPTY, STOP
 8 | from parglare.common import get_collector
 9 | from parglare.trees import Node, NodeTerm, NodeNonTerm, visitor
10 | from parglare.exceptions import ParserInitError, ParseError, GrammarError, \
11 |     DisambiguationError, LoopError
12 | 
13 | try:
14 |     from importlib.metadata import version
15 | except ModuleNotFoundError:
16 |     from importlib_metadata import version
17 | 
18 | __version__ = version("parglare")
19 | 


--------------------------------------------------------------------------------
/parglare/actions.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Common parsing actions.
  3 | """
  4 | import contextlib
  5 | 
  6 | 
  7 | def pass_none(_, value, *args):
  8 |     return None
  9 | 
 10 | 
 11 | def pass_nochange(_, value, *args):
 12 |     return value
 13 | 
 14 | 
 15 | def pass_empty(_, value, *args):
 16 |     """
 17 |     Used for EMPTY production alternative in collect.
 18 |     """
 19 |     return []
 20 | 
 21 | 
 22 | def pass_single(_, nodes):
 23 |     """
 24 |     Unpack single value and pass up.
 25 |     """
 26 |     return nodes[0]
 27 | 
 28 | 
 29 | def pass_inner(_, nodes):
 30 |     """
 31 |     Pass inner value up, e.g. for stripping parentheses as in
 32 |     `( <some expression> )`.
 33 |     """
 34 |     n = nodes[1:-1]
 35 |     with contextlib.suppress(ValueError):
 36 |         n, = n
 37 |     return n
 38 | 
 39 | 
 40 | def collect_first(_, nodes):
 41 |     """
 42 |     Used for:
 43 |     Elements = Elements Element;
 44 |     """
 45 |     e1, e2 = nodes
 46 |     if e2 is not None:
 47 |         e1 = list(e1)
 48 |         e1.append(e2)
 49 |     return e1
 50 | 
 51 | 
 52 | def collect_first_sep(_, nodes):
 53 |     """
 54 |     Used for:
 55 |     Elements = Elements "," Element;
 56 |     """
 57 |     e1, _, e2 = nodes
 58 |     if e2 is not None:
 59 |         e1 = list(e1)
 60 |         e1.append(e2)
 61 |     return e1
 62 | 
 63 | 
 64 | def collect_right_first(_, nodes):
 65 |     """
 66 |     Used for:
 67 |     Elements = Element Elements;
 68 |     """
 69 |     e1, e2 = [nodes[0]], nodes[1]
 70 |     e1.extend(e2)
 71 |     return e1
 72 | 
 73 | 
 74 | def collect_right_first_sep(_, nodes):
 75 |     """
 76 |     Used for:
 77 |     Elements = Element "," Elements;
 78 |     """
 79 |     e1, e2 = [nodes[0]], nodes[2]
 80 |     e1.extend(e2)
 81 |     return e1
 82 | 
 83 | 
 84 | # Used for productions of the form - one or more elements:
 85 | # Elements: Elements Element | Element;
 86 | collect = [
 87 |     collect_first,
 88 |     pass_nochange
 89 | ]
 90 | 
 91 | # Used for productions of the form - one or more elements:
 92 | # Elements: Elements "," Element | Element;
 93 | collect_sep = [
 94 |     collect_first_sep,
 95 |     pass_nochange
 96 | ]
 97 | 
 98 | # Used for productions of the form - zero or more elements:
 99 | # Elements: Elements Element | Element | EMPTY;
100 | collect_optional = [
101 |     collect_first,
102 |     pass_nochange,
103 |     pass_empty
104 | ]
105 | 
106 | # Used for productions of the form - zero or more elements:
107 | # Elements: Elements "," Element | Element | EMPTY;
108 | collect_sep_optional = [
109 |     collect_first_sep,
110 |     pass_nochange,
111 |     pass_empty
112 | ]
113 | 
114 | # Used for productions of the form - one or more elements:
115 | # Elements: Element Elements | Element;
116 | collect_right = [
117 |     collect_right_first,
118 |     pass_nochange
119 | ]
120 | 
121 | # Used for productions of the form - one or more elements:
122 | # Elements: Element "," Elements | Element;
123 | collect_right_sep = [
124 |     collect_right_first_sep,
125 |     pass_nochange
126 | ]
127 | 
128 | # Used for productions of the form - zero or more elements:
129 | # Elements: Element Elements | Element | EMPTY;
130 | collect_right_optional = [
131 |     collect_right_first,
132 |     pass_nochange,
133 |     pass_empty
134 | ]
135 | 
136 | # Used for productions of the form - zero or more elements:
137 | # Elements: Element "," Elements | Element | EMPTY;
138 | collect_right_sep_optional = [
139 |     collect_right_first_sep,
140 |     pass_nochange,
141 |     pass_empty
142 | ]
143 | 
144 | # Used for the production of the form:
145 | # OptionalElement: Element | EMPTY;
146 | optional = [
147 |     pass_single,
148 |     pass_none
149 | ]
150 | 
151 | 
152 | def obj(context, nodes, **attrs):
153 |     """
154 |     Creates Python object with the attributes created from named matches.
155 |     This action is used as a default action for rules with named matches.
156 |     """
157 |     grammar = context.parser.grammar
158 |     rule_name = context.production.symbol.fqn
159 | 
160 |     cls = grammar.classes[rule_name]
161 |     instance = cls(**attrs)
162 | 
163 |     instance._pg_start_position = context.start_position
164 |     instance._pg_end_position = context.end_position
165 | 
166 |     return instance
167 | 


--------------------------------------------------------------------------------
/parglare/closure.py:
--------------------------------------------------------------------------------
 1 | from parglare.grammar import EMPTY, NonTerminal
 2 | 
 3 | LR_0 = 0
 4 | LR_1 = 1
 5 | 
 6 | 
 7 | def closure(state, itemset_type, first_sets=None):
 8 |     """
 9 |     For the given LRState calculates its LR(0)/LR(1) itemset closure.
10 | 
11 |     Args:
12 |     state(LRState):
13 |     itemset_type(int): LR_0 or LR_1
14 |     first_sets(dict of sets): Used in LR_1 itemsets calculation.
15 |     """
16 |     from parglare.tables import LRItem
17 | 
18 |     items_to_process = list(state.items)
19 |     while items_to_process:
20 |         item = items_to_process.pop()
21 |         symbol = item.symbol_at_position
22 |         if not isinstance(symbol, NonTerminal):
23 |             continue
24 | 
25 |         # Calculate follow set that is possible after the
26 |         # non-terminal at the given position of the current
27 |         # item.
28 |         if itemset_type is LR_1:
29 |             follow = _new_item_follow(item, first_sets)
30 |         for prod in [p for p in state.grammar.productions
31 |                      if p.symbol == symbol]:
32 |             new_item = LRItem(prod, 0,
33 |                               set(follow) if itemset_type is LR_1 else None)
34 |             if new_item not in state.items:
35 |                 # If the item doesn't exists yet add it and reprocess it.
36 |                 state.items.append(new_item)
37 |                 items_to_process.append(new_item)
38 |             elif itemset_type is LR_1:
39 |                 # If the item already exists, this newly created item might
40 |                 # still have a wider follows set. If so, update with the
41 |                 # current new item follows set if we are building LR_1 items
42 |                 # set.
43 |                 existing_item = next(i for i in state.items if i == new_item)
44 |                 if not follow.issubset(existing_item.follow):
45 |                     existing_item.follow.update(follow)
46 |                     # If there was an update in the follow set of the existing
47 |                     # item we have to process it again as we have to update
48 |                     # follows of all items that were created from it.
49 |                     items_to_process.append(existing_item)
50 | 
51 | 
52 | def _new_item_follow(item, first_sets):
53 |     """
54 |     Returns follow set of possible terminals after the item's current
55 |     non-terminal.
56 | 
57 |     Args:
58 |     item (LRItem): The source item which is causing the creation of the
59 |         new item.
60 |     first_sets(dict of sets): The dict of set of first items keyed by
61 |         a grammar symbol.
62 |     """
63 | 
64 |     new_follow = set()
65 |     for s in item.production.rhs[item.position + 1:]:
66 |         new_follow.update(first_sets[s])
67 |         if EMPTY not in new_follow:
68 |             # If EMPTY can't be derived at current position then we have found
69 |             # the whole follow set.
70 |             break
71 |         else:
72 |             # If the EMPTY is possible at current position in this loop we must
73 |             # continue to include firsts of the next grammar symbol. EMPTY
74 |             # can't be a member of the follow set.
75 |             new_follow.remove(EMPTY)
76 |     else:
77 |         # If the rest of production can be EMPTY we shall inherit all elements
78 |         # of the source item follow set.
79 |         new_follow.update(item.follow)
80 | 
81 |     return new_follow
82 | 


--------------------------------------------------------------------------------
/parglare/export.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from parglare.common import dot_escape
 3 | from parglare.parser import REDUCE, SHIFT
 4 | 
 5 | HEADER = '''
 6 |     digraph grammar {
 7 |     rankdir=LR
 8 |     fontname = "Bitstream Vera Sans"
 9 |     fontsize = 8
10 |     node[
11 |         shape=record,
12 |         style=filled,
13 |         fillcolor=aliceblue
14 |     ]
15 |     nodesep = 0.3
16 |     edge[dir=black,arrowtail=empty]
17 | 
18 | 
19 | '''
20 | 
21 | 
22 | def grammar_pda_export(table, file_name):
23 | 
24 |     with open(file_name, 'w', encoding="utf-8") as f:
25 |         f.write(HEADER)
26 | 
27 |         for state in table.states:
28 |             kernel_items = ""
29 |             for item in state.kernel_items:
30 |                 kernel_items += f"{dot_escape(str(item))}\\l"
31 | 
32 |             nonkernel_items = "|" if state.nonkernel_items else ""
33 |             for item in state.nonkernel_items:
34 |                 nonkernel_items += f"{dot_escape(str(item))}\\l"
35 | 
36 |             # SHIFT actions and GOTOs will be encoded in links.
37 |             # REDUCE actions will be presented inside each node.
38 |             reduce_actions = []
39 |             for term, actions in state.actions.items():
40 |                 r_actions = [a for a in actions if a.action is REDUCE]
41 |                 if r_actions:
42 |                     reduce_actions.append((term, r_actions))
43 | 
44 |             reductions = ""
45 |             if reduce_actions:
46 |                 reductions = "|Reductions:\\l{}".format(
47 |                     ", ".join(["{}:{}".format(
48 |                         dot_escape(x[0].name), x[1][0].prod.prod_id
49 |                         if len(x[1]) == 1 else "[{}]".format(
50 |                                 ",".join([str(i.prod.prod_id) for i in x[1]])))
51 |                                for x in reduce_actions]))
52 | 
53 |             # States
54 |             f.write('{}[label="{}|{}{}{}"]\n'
55 |                     .format(
56 |                         state.state_id,
57 |                         dot_escape(f"{state.state_id}:{state.symbol}"),
58 |                         kernel_items, nonkernel_items, reductions))
59 | 
60 |             f.write("\n")
61 | 
62 |             # SHIFT and GOTOs as links
63 |             shacc = []
64 |             for term, actions in state.actions.items():
65 |                 for a in [a for a in actions if a.action is SHIFT]:
66 |                     shacc.append((term, a))
67 |             for term, action in shacc:
68 |                 f.write('{} -> {} [label="{}:{}"]'.format(
69 |                     state.state_id,
70 |                     action.state.state_id,
71 |                     "SHIFT" if action.action is SHIFT else "ACCEPT", term))
72 | 
73 |             for symb, goto_state in ((symb, goto) for symb, goto
74 |                                      in state.gotos.items()):
75 |                 f.write(f'{state.state_id} -> {goto_state.state_id}'
76 |                         f' [label="GOTO:{symb}"]')
77 | 
78 |         f.write("\n}\n")
79 | 


--------------------------------------------------------------------------------
/parglare/tables/persist.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from collections import OrderedDict
  3 | 
  4 | 
  5 | def table_to_serializable(table):
  6 |     """Convert table object to serializable representation composed of
  7 |     lists and dicts."""
  8 |     # states
  9 |     states = []
 10 |     for state in table.states:
 11 |         states.append(_dump_state(state))
 12 | 
 13 |     return states
 14 | 
 15 | 
 16 | def save_table(file_name, table):
 17 |     with open(file_name, 'w') as f:
 18 |         json.dump(table_to_serializable(table), f, sort_keys=True)
 19 | 
 20 | 
 21 | def table_from_serializable(serialized_states, grammar):
 22 |     """Convert serializable representation of a parsing table into
 23 |     LRTable object."""
 24 |     from parglare.tables import Action, LRState, LRTable
 25 | 
 26 |     states = []
 27 |     states_dict = {}
 28 |     for json_state in serialized_states:
 29 |         state = LRState(grammar, json_state['state_id'],
 30 |                         grammar.get_symbol(json_state['symbol']))
 31 |         states_dict[state.state_id] = state
 32 |         state.finish_flags = json_state['finish_flags']
 33 |         state.actions = json_state['actions']
 34 |         state.gotos = json_state['gotos']
 35 |         states.append(state)
 36 | 
 37 |     # Unpack actions and gotos
 38 |     for state in states:
 39 | 
 40 |         actions = OrderedDict()
 41 |         for json_action_fqn in state.actions:
 42 |             terminal_fqn, json_actions = json_action_fqn
 43 |             term_acts = []
 44 |             for json_action in json_actions:
 45 |                 if 'state_id' in json_action:
 46 |                     act_state = states_dict[json_action['state_id']]
 47 |                 else:
 48 |                     act_state = None
 49 |                 if 'prod_id' in json_action:
 50 |                     act_prod = grammar.productions[json_action['prod_id']]
 51 |                 else:
 52 |                     act_prod = None
 53 |                 term_acts.append(Action(json_action['action'],
 54 |                                         act_state, act_prod))
 55 | 
 56 |             actions[grammar.get_terminal(terminal_fqn)] = term_acts
 57 |         state.actions = actions
 58 | 
 59 |         gotos = OrderedDict()
 60 |         for json_goto_fqn in state.gotos:
 61 |             nonterm_fqn, goto_state = json_goto_fqn
 62 |             gotos[grammar.get_nonterminal(nonterm_fqn)] = \
 63 |                 states_dict[goto_state]
 64 |         state.gotos = gotos
 65 | 
 66 |     table = LRTable(states, calc_finish_flags=False)
 67 | 
 68 |     return table
 69 | 
 70 | 
 71 | def load_table(file_name, grammar):
 72 |     with open(file_name) as f:
 73 |         return table_from_serializable(json.load(f), grammar)
 74 | 
 75 | 
 76 | def _dump_state(state):
 77 |     s = {}
 78 |     s['state_id'] = state.state_id
 79 |     s['symbol'] = state.symbol.fqn
 80 |     action_items = list(state.actions.items())
 81 |     s['actions'] = [[terminal.fqn, _dump_actions(actions)]
 82 |                     for terminal, actions in action_items]
 83 |     goto_items = list(state.gotos.items())
 84 |     s['gotos'] = [[nonterminal.fqn, st.state_id]
 85 |                   for nonterminal, st in goto_items]
 86 |     s['finish_flags'] = state.finish_flags
 87 | 
 88 |     return s
 89 | 
 90 | 
 91 | def _dump_actions(actions):
 92 |     alist = []
 93 |     for action in actions:
 94 |         a = {}
 95 |         a['action'] = action.action
 96 |         if action.state is not None:
 97 |             a['state_id'] = action.state.state_id
 98 |         if action.prod is not None:
 99 |             a['prod_id'] = action.prod.prod_id
100 |         alist.append(a)
101 | 
102 |     return alist
103 | 


--------------------------------------------------------------------------------
/parglare/termui.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import click
 3 | 
 4 | colors = False
 5 | 
 6 | S_ATTENTION = {'fg': 'red', 'bold': True}
 7 | S_HEADER = {'fg': 'green'}
 8 | S_EMPH = {'fg': 'yellow'}
 9 | 
10 | 
11 | def prints(message, s=None):
12 |     if s is None:
13 |         s = {}
14 |     click.echo(style(message, s), color=colors)
15 | 
16 | 
17 | def style_message(message, style):
18 |     if colors:
19 |         return click.style(message, **style)
20 |     else:
21 |         return message
22 | 
23 | 
24 | def s_header(message):
25 |     return style_message(message, S_HEADER)
26 | 
27 | 
28 | def s_attention(message):
29 |     return style_message(message, S_ATTENTION)
30 | 
31 | 
32 | def s_emph(message):
33 |     return style_message(message, S_EMPH)
34 | 
35 | 
36 | def style(header, content, level=0, new_line=False, header_style=S_HEADER,
37 |           width=120):
38 |     if content:
39 |         content_start = level * 8 + len(header) + 1
40 |         content_width = width - content_start
41 |         content = str(content)
42 |         content = [content[start:start+content_width]
43 |                    for start in range(0, len(content), content_width)]
44 |         content = ('\n' + ' ' * content_start).join(content)
45 |     new_line = "\n" if new_line else ""
46 |     level = ("\t" * level) if level else ""
47 |     return new_line + level + style_message(str(header), header_style) \
48 |         + ((" " + str(content)) if content else "")
49 | 
50 | 
51 | def styled_print(header, content, level=0, new_line=False,
52 |                  header_style=S_HEADER, width=120):
53 |     prints(style(header, content, level, new_line, header_style, width))
54 | 
55 | 
56 | def h_print(header, content="", level=0, new_line=False):
57 |     styled_print(header, content, level, new_line, S_HEADER)
58 | 
59 | 
60 | def a_print(header, content="", level=0, new_line=False):
61 |     styled_print(header, content, level, new_line, S_ATTENTION)
62 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "parglare"
 3 | version = "0.18.0"
 4 | description = "A pure Python LR/GLR parser"
 5 | authors = [
 6 |     {name = "Igor R. Dejanović", email = "igor.dejanovic@gmail.com"},
 7 | ]
 8 | maintainers = [
 9 |     {name = "Igor R. Dejanović", email = "igor.dejanovic@gmail.com"},
10 | ]
11 | readme = "README.rst"
12 | license = {text = "MIT"}
13 | keywords = ["parser", "lr", "glr"]
14 | classifiers = [
15 |     "Development Status :: 4 - Beta",
16 |     "Intended Audience :: Developers",
17 |     "Intended Audience :: Information Technology",
18 |     "Intended Audience :: Science/Research",
19 |     "Topic :: Software Development :: Interpreters",
20 |     "Topic :: Software Development :: Compilers",
21 |     "Topic :: Software Development :: Libraries :: Python Modules",
22 |     "License :: OSI Approved :: MIT License",
23 |     "Natural Language :: English",
24 |     "Programming Language :: Python :: 3",
25 |     "Programming Language :: Python :: 3.8",
26 |     "Programming Language :: Python :: 3.9",
27 |     "Programming Language :: Python :: 3.10",
28 |     "Programming Language :: Python :: 3.11",
29 |     "Programming Language :: Python :: 3.12",
30 |     "Operating System :: OS Independent",
31 | ]
32 | 
33 | requires-python = ">=3.8, <3.13"
34 | dependencies = [
35 |     "click >=7.0, <9.0"
36 | ]
37 | 
38 | [project.urls]
39 | Homepage = "https://github.com/igordejanovic/parglare"
40 | Repository = "https://github.com/igordejanovic/parglare"
41 | Changelog = "https://github.com/igordejanovic/parglare/blob/master/CHANGELOG.md"
42 | 
43 | [build-system]
44 | build-backend = "flit_core.buildapi"
45 | requires = ["flit_core >=3.8.0,<4"]
46 | 
47 | [tool.flit.module]
48 | name = "parglare"
49 | 
50 | [tool.flit.sdist]
51 | exclude = ["**/.*"]
52 | 
53 | [project.optional-dependencies]
54 | dev = [
55 |     "ruff",
56 |     "flit",
57 |     "mkdocs",
58 |     "mike",
59 | ]
60 | 
61 | test = [
62 |     "ruff",
63 |     "coverage",
64 |     "coveralls",
65 |     "pytest",
66 | ]
67 | 
68 | [project.scripts]
69 | pglr = "parglare.cli:pglr"
70 | 
71 | [tool.ruff]
72 | line-length = 90
73 | indent-width = 4
74 | 
75 | [tool.ruff.lint]
76 | # https://docs.astral.sh/ruff/linter/#rule-selection
77 | select = [
78 |     # pycodestyle
79 |     "E",
80 |     # Pyflakes
81 |     "F",
82 |     # pyupgrade
83 |     "UP",
84 |     # flake8-bugbear
85 |     "B",
86 |     # flake8-simplify
87 |     "SIM",
88 |     # isort
89 |     "I",
90 | ]
91 | 


--------------------------------------------------------------------------------
/runtests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Run all tests and generate coverage report
3 | 
4 | coverage run --omit parglare/cli.py --source parglare -m pytest tests/func || exit 1
5 | coverage report --fail-under 90 || exit 1
6 | # Run this to generate html report
7 | # coverage html --directory=coverage
8 | ruff check parglare/ tests/func examples/ || exit 1
9 | 


--------------------------------------------------------------------------------
/scripts/parglare_qtree.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Produce LaTex qtree output from the parglare parse trees.
 3 | 
 4 | from parglare import Grammar, GLRParser
 5 | 
 6 | INPUT = '1 + 2 * 3 + 4'
 7 | 
 8 | grammar = r'''
 9 | E: E '+' E
10 |  | E '*' E
11 |  | '(' E ')'
12 |  | number;
13 | 
14 | terminals
15 | number: /\d+/;
16 | '''
17 | 
18 | g = Grammar.from_string(grammar)
19 | parser = GLRParser(g, build_tree=True)
20 | 
21 | result = parser.parse(INPUT)
22 | 
23 | 
24 | def to_str(node, depth=0):
25 |     indent = '  ' * depth
26 |     if node.is_nonterm():
27 |         s = '\n{}[.{} {}\n{}]'.format(indent,
28 |                                       node.production.symbol,
29 |                                       ''.join([to_str(n, depth+1)
30 |                                                for n in node.children]),
31 |                                       indent)
32 |     else:
33 |         s = '\n{}[.{} ]'.format(indent, node.value)
34 |     return s
35 | 
36 | 
37 | with open('qtree_out.txt', 'w') as f:
38 |     f.write('\begin{{tabular}}{{{}}}\n'.format('c' * len(result)))
39 |     trees = '&\n'.join(['\\Tree {}'.format(to_str(tree)) for tree in result])
40 |     f.write(trees)
41 | 


--------------------------------------------------------------------------------
/tests/func/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/__init__.py


--------------------------------------------------------------------------------
/tests/func/actions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/actions/__init__.py


--------------------------------------------------------------------------------
/tests/func/actions/collector/grammar.pg:
--------------------------------------------------------------------------------
1 | Model: INT+ Rule1 INT;
2 | Rule1: a=STRING;
3 | 
4 | terminals
5 | INT: /\d+/;
6 | STRING: /'.*'/;
7 | 


--------------------------------------------------------------------------------
/tests/func/actions/collector/test_actions_get_collector.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from parglare import Grammar, Parser, get_collector
 4 | 
 5 | THIS_FOLDER = os.path.abspath(os.path.dirname(__file__))
 6 | 
 7 | 
 8 | def test_action_explicit_get_collector():
 9 |     """
10 |     Test the basic usage of `get_collector` API where we don't provide
11 |     actions in a separate python module.
12 |     """
13 | 
14 |     action = get_collector()
15 | 
16 |     @action
17 |     def INT(context, value):
18 |         return int(value)
19 | 
20 |     @action
21 |     def STRING(context, value):
22 |         return f"#{value}#"
23 | 
24 |     grammar = Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg'))
25 |     Parser(grammar, actions=action.all)
26 | 
27 | 
28 | def test_action_explicit_get_collector_missing_action():
29 |     """
30 |     Test when `get_collector` has a terminal without defined action nothing
31 |     happens as the default implicit action will be used.
32 |     """
33 | 
34 |     action = get_collector()
35 | 
36 |     @action
37 |     def INT(context, value):
38 |         return int(value)
39 | 
40 |     grammar = Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg'))
41 |     Parser(grammar, actions=action.all)
42 | 
43 | 
44 | def test_actions_explicit_get_collector_action_for_unexisting_terminal():
45 |     """
46 |     Test for situation when `get_collector` has an action for un-existing
47 |     terminal.
48 |     """
49 | 
50 |     action = get_collector()
51 | 
52 |     @action
53 |     def INT(context, value):
54 |         return int(value)
55 | 
56 |     @action
57 |     def STRING(context, value):
58 |         return f"#{value}#"
59 | 
60 |     @action
61 |     def STRING2(context, value):
62 |         return f"#{value}#"
63 | 
64 |     grammar = Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg'))
65 |     Parser(grammar, actions=action.all)
66 | 


--------------------------------------------------------------------------------
/tests/func/grammar/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/grammar/__init__.py


--------------------------------------------------------------------------------
/tests/func/grammar/calc.pg:
--------------------------------------------------------------------------------
 1 | Calc: Assignments E;
 2 | Assignments: Assignment | Assignments Assignment | EMPTY;
 3 | Assignment: VariableName "=" Number;
 4 | 
 5 | E: E "+" E {left, 1}
 6 |  | E "-" E {left, 1}
 7 |  | E "*" E {left, 2}
 8 |  | E "/" E {left, 2}
 9 |  | "(" E ")"
10 |  | VariableRef
11 |  | Number
12 | ;
13 | 
14 | VariableRef: VariableName;
15 | 
16 | terminals
17 | VariableName: /[a-zA-Z_][_a-zA-Z0-9]*/;
18 | Number: /\d+(\.\d+)?/;
19 | 


--------------------------------------------------------------------------------
/tests/func/grammar/calcactions.py:
--------------------------------------------------------------------------------
 1 | def act_assignment(context, nodes):
 2 |     name = nodes[0]
 3 |     number = nodes[2]
 4 | 
 5 |     # Use context.extra to collect variables
 6 |     if context.extra is None:
 7 |         context.extra = {}
 8 | 
 9 |     context.extra[name] = number
10 | 
11 | 
12 | actions = {
13 |     "Calc": lambda _, nodes: nodes[1],
14 |     "Assignment": act_assignment,
15 |     "E": [lambda _, nodes: nodes[0] + nodes[2],
16 |           lambda _, nodes: nodes[0] - nodes[2],
17 |           lambda _, nodes: nodes[0] * nodes[2],
18 |           lambda _, nodes: nodes[0] / nodes[2],
19 |           lambda _, nodes: nodes[1],
20 |           lambda _, nodes: nodes[0],
21 |           lambda _, nodes: nodes[0]],
22 |     "Number": lambda _, value: float(value),
23 |     "VariableName": lambda _, value: value,
24 |     "VariableRef": lambda context, nodes: context.extra[nodes[0]],
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/func/grammar/expression_grammar.py:
--------------------------------------------------------------------------------
 1 | from parglare import Grammar, NonTerminal, Terminal
 2 | 
 3 | # Expression grammar
 4 | E, T, F = (NonTerminal(name) for name in ['E', 'T', 'F'])
 5 | PLUS, MULT, ID, OPEN, CLOSE = (
 6 |     Terminal(value) for value in ['+', '*', 'id', '(', ')'])
 7 | productions = [
 8 |     (E, (E, PLUS, T)),
 9 |     (E, (T, )),
10 |     (T, (T, MULT, F)),
11 |     (T, (F, )),
12 |     (F, (OPEN, E, CLOSE)),
13 |     (F, (ID,))
14 | ]
15 | 
16 | 
17 | def get_grammar():
18 |     return Grammar.from_struct(productions=productions, start_symbol=E)
19 | 


--------------------------------------------------------------------------------
/tests/func/grammar/expression_grammar_numbers.py:
--------------------------------------------------------------------------------
 1 | from parglare import Grammar, NonTerminal, RegExRecognizer, Terminal
 2 | 
 3 | 
 4 | def get_grammar():
 5 | 
 6 |     # Expression grammar with float numbers
 7 |     E, T, F = (NonTerminal(name) for name in ['E', 'T', 'F'])
 8 |     PLUS, MULT, OPEN, CLOSE = (
 9 |         Terminal(value) for value in ['+', '*', '(', ')'])
10 |     NUMBER = Terminal('number', RegExRecognizer(r'\d+(\.\d+)?'))
11 |     productions = [
12 |         (E, (E, PLUS, T)),
13 |         (E, (T, )),
14 |         (T, (T, MULT, F)),
15 |         (T, (F, )),
16 |         (F, (OPEN, E, CLOSE)),
17 |         (F, (NUMBER,))
18 |     ]
19 | 
20 |     return Grammar.from_struct(productions=productions, start_symbol=E)
21 | 


--------------------------------------------------------------------------------
/tests/func/grammar/test_groups.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Grammar rules can use groups in parentheses.
  3 | The group should be treated the same as any other rule reference,
  4 | it can be used in assignments, repetitions etc.
  5 | """
  6 | from parglare import GLRParser, Grammar
  7 | from parglare.grammar import ASSOC_LEFT, MULT_ONE
  8 | 
  9 | 
 10 | def test_group_with_sequence():
 11 |     grammar_str = r'''
 12 |     a: (b* c);
 13 |     b: c;
 14 |     terminals
 15 |     c: "c";
 16 |     '''
 17 |     grammar = Grammar.from_string(grammar_str)
 18 | 
 19 |     # Check initial rule
 20 |     assert grammar.productions[0].rhs[0].name == 'a'
 21 | 
 22 |     # Check that A references A_g1
 23 |     assert grammar.get_productions('a')[0].rhs[0].name == 'a_g1'
 24 | 
 25 |     # Check group rule
 26 |     assert grammar.get_nonterminal('a_g1')
 27 |     prods = grammar.get_productions('a_g1')
 28 |     assert len(prods) == 1
 29 |     assert len(prods[0].rhs) == 2
 30 |     assert prods[0].rhs[0].name == 'b_0'
 31 |     assert prods[0].rhs[1].name == 'c'
 32 | 
 33 | 
 34 | def test_group_with_choice():
 35 |     grammar_str = r'''
 36 |     a: c (b* c | b);
 37 |     b: c;
 38 |     terminals
 39 |     c: "c";
 40 |     '''
 41 |     grammar = Grammar.from_string(grammar_str)
 42 | 
 43 |     # Check initial rule
 44 |     assert grammar.productions[0].rhs[0].name == 'a'
 45 | 
 46 |     # Check that A references A_g1
 47 |     assert grammar.get_productions('a')[0].rhs[1].name == 'a_g1'
 48 | 
 49 |     assert grammar.get_nonterminal('a_g1')
 50 |     prods = grammar.get_productions('a_g1')
 51 |     assert len(prods) == 2
 52 |     assert len(prods[0].rhs) == 2
 53 |     assert prods[0].rhs[0].name == 'b_0'
 54 |     assert prods[0].rhs[1].name == 'c'
 55 |     assert len(prods[1].rhs) == 1
 56 |     assert prods[1].rhs[0].name == 'b'
 57 | 
 58 | 
 59 | def test_group_with_metadata():
 60 |     grammar_str = r'''
 61 |     a: (b* c {left} | c);
 62 |     b: c;
 63 |     terminals
 64 |     c: "c";
 65 |     '''
 66 |     grammar = Grammar.from_string(grammar_str)
 67 |     assert grammar.get_nonterminal('a_g1')
 68 |     prods = grammar.get_productions('a_g1')
 69 |     assert len(prods) == 2
 70 |     assert len(prods[0].rhs) == 2
 71 |     assert prods[0].rhs[0].name == 'b_0'
 72 |     assert prods[0].rhs[1].name == 'c'
 73 |     assert prods[0].assoc == ASSOC_LEFT
 74 | 
 75 | 
 76 | def test_group_with_assignment():
 77 |     grammar_str = r'''
 78 |     a: c c=(b* c);
 79 |     terminals
 80 |     b: "b";
 81 |     c: "c";
 82 |     '''
 83 |     grammar = Grammar.from_string(grammar_str)
 84 |     assert grammar.get_nonterminal('a_g1')
 85 |     prods = grammar.get_productions('a_g1')
 86 |     assert len(prods) == 1
 87 |     prods_a = grammar.get_productions('a')
 88 |     assert len(prods_a) == 1
 89 | 
 90 |     assert not prods[0].assignments
 91 |     assert prods_a[0].assignments
 92 |     assig_c = prods_a[0].assignments['c']
 93 |     assert assig_c.op == '='
 94 |     assert assig_c.multiplicity == MULT_ONE
 95 |     assert assig_c.symbol.name == 'a_g1'
 96 | 
 97 | 
 98 | def test_group_complex():
 99 |     grammar_str = r'''
100 |     @obj
101 |     s: (b c)*[comma];
102 |     s: (b c)*[comma] a=(a+ (b | c)*)+[comma];
103 |     terminals
104 |     a: "a";
105 |     b: "b";
106 |     c: "c";
107 |     comma: ",";
108 |     '''
109 |     grammar = Grammar.from_string(grammar_str)
110 | 
111 |     assert len(grammar.get_productions('s_g1')) == 1
112 |     # B | C
113 |     prods = grammar.get_productions('s_g3')
114 |     assert len(prods) == 2
115 |     assert prods[0].rhs[0].name == 'b'
116 |     assert prods[1].rhs[0].name == 'c'
117 | 
118 |     # Nesting
119 |     prods = grammar.get_productions('s_g2')
120 |     assert len(prods) == 1
121 |     assert prods[0].rhs[0].name == 'a_1'
122 |     assert prods[0].rhs[1].name == 's_g3_0'
123 |     assert grammar.get_productions('s')[1].rhs[1].name == 's_g2_1_comma'
124 | 
125 |     assert 's_g5' not in grammar
126 | 
127 |     parser = GLRParser(grammar)
128 | 
129 |     forest = parser.parse('b c, b c a a a b c c b, a b b')
130 |     result = parser.call_actions(forest[0])
131 |     assert result.a == [[['a', 'a', 'a'],
132 |                          ['b', 'c', 'c', 'b']], [['a'], ['b', 'b']]]
133 | 


--------------------------------------------------------------------------------
/tests/func/grammar/test_keywords.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Test special KEYWORD rule.
  3 | """
  4 | # -*- coding: utf-8 -*-
  5 | import pytest
  6 | 
  7 | from parglare import Grammar, Parser, RegExRecognizer, StringRecognizer
  8 | from parglare.exceptions import GrammarError, ParseError
  9 | 
 10 | 
 11 | def test_keyword_must_be_regex():
 12 |     grammar = r"""
 13 |     S: "for" name=ID "=" from=INT "to" to=INT;
 14 | 
 15 |     terminals
 16 |     KEYWORD: "id";
 17 |     ID: /\w+/;
 18 |     INT: /\d+/;
 19 |     """
 20 | 
 21 |     with pytest.raises(GrammarError) as e:
 22 |         Grammar.from_string(grammar)
 23 | 
 24 |     assert 'must have a regex recognizer defined' in str(e.value)
 25 | 
 26 | 
 27 | def test_keyword_grammar_init():
 28 |     grammar = r"""
 29 |     S: "for" name=ID "=" from=INT "to" to=INT;
 30 | 
 31 |     terminals
 32 |     KEYWORD: /\w+/;
 33 |     ID: /\w+/;
 34 |     INT: /\d+/;
 35 |     """
 36 | 
 37 |     g = Grammar.from_string(grammar)
 38 | 
 39 |     # 'for' term matches KEYWORD rule so it'll be replaced by
 40 |     # RegExRecognizer instance.
 41 |     for_term = g.get_terminal('for')
 42 |     assert type(for_term.recognizer) is RegExRecognizer
 43 |     assert for_term.recognizer._regex == r'\bfor\b'
 44 | 
 45 |     # '=' term doesn't match KEYWORD rule so it will not change
 46 |     eq_term = g.get_terminal('=')
 47 |     assert type(eq_term.recognizer) is StringRecognizer
 48 | 
 49 | 
 50 | def test_keyword_matches_on_word_boundary():
 51 |     grammar = r"""
 52 |     S: "for" name=ID "=" from=INT "to" to=INT;
 53 | 
 54 |     terminals
 55 |     ID: /\w+/;
 56 |     INT: /\d+/;
 57 |     """
 58 | 
 59 |     g = Grammar.from_string(grammar)
 60 | 
 61 |     parser = Parser(g)
 62 |     # This will not raise an error
 63 |     parser.parse('forid=10 to20')
 64 | 
 65 |     # We add KEYWORD rule to the grammar to match ID-like keywords.
 66 |     grammar += r"KEYWORD: /\w+/;"
 67 | 
 68 |     g = Grammar.from_string(grammar)
 69 |     parser = Parser(g)
 70 |     with pytest.raises(ParseError, match='forid=10 t" => Expected: for'):
 71 |         # This *will* raise an error
 72 |         parser.parse('forid=10 to20')
 73 |     with pytest.raises(ParseError, match='Expected: to'):
 74 |         # This *will* also raise an error
 75 |         parser.parse('for id=10 to20')
 76 | 
 77 |     # But this is OK
 78 |     parser.parse('for id=10 to 20')
 79 |     parser.parse('for for=10 to 20')
 80 | 
 81 | 
 82 | def test_keyword_preferred_over_regexes():
 83 |     """
 84 |     Test that keyword matches (internally converted to regex matches) are
 85 |     preferred over ordinary regex matches of the same length.
 86 |     """
 87 | 
 88 |     grammar = r"""
 89 |     S: "for"? name=ID? "=" from=INT "to" to=INT;
 90 | 
 91 |     terminals
 92 |     ID: /\w+/;
 93 |     INT: /\d+/;
 94 |     KEYWORD: /\w+/;
 95 |     """
 96 |     g = Grammar.from_string(grammar)
 97 | 
 98 |     parser = Parser(g)
 99 | 
100 |     # 'for' is ambiguous as it can be keyword or ID(name)
101 |     # ParseError could be thrown but parglare will prefer
102 |     # StringRecognizer and keywords over RegExRecognizer for
103 |     # the match of the same length (i.e. "more specific match")
104 |     parser.parse("for = 10 to 100")
105 | 


--------------------------------------------------------------------------------
/tests/func/grammar/test_load_from_file.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | import os
 3 | from parglare import Grammar, Parser
 4 | from .calcactions import actions
 5 | 
 6 | 
 7 | def test_load_from_file():
 8 | 
 9 |     grammar = Grammar.from_file(os.path.join(
10 |         os.path.dirname(__file__), 'calc.pg'))
11 |     parser = Parser(grammar, actions=actions, debug=True)
12 | 
13 |     res = parser.parse("""
14 |     a = 5
15 |     b = 10
16 | 
17 |     56.4 + a / 3 * 5 - b + 8 * 3
18 |     """)
19 | 
20 |     res2 = 56.4 + 5. / 3 * 5 - 10 + 8 * 3
21 |     print(res2, res)
22 |     assert res == 56.4 + 5. / 3 * 5 - 10 + 8 * 3
23 | 


--------------------------------------------------------------------------------
/tests/func/grammar/test_terminals.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | import re
 3 | from parglare import Parser, Grammar
 4 | 
 5 | 
 6 | def test_str_terminals():
 7 |     g = r"""
 8 |     A: "a" B C D 'b';
 9 | 
10 |     terminals
11 |     B: "b\"";
12 |     C: "\"c\" ";
13 |     D: '\'d\'';
14 |     """
15 |     grammar = Grammar.from_string(g)
16 |     p = Parser(grammar)
17 |     tree = p.parse(r''' a b" "c" 'd' b ''')
18 |     assert tree
19 | 
20 | 
21 | def test_regex_terminals():
22 |     g = r"""
23 |     A: Aterm B C D 'b';
24 |     C: 'c' Cterm;
25 | 
26 |     terminals
27 |     Aterm: /a\//;
28 |     Cterm: /a+/;
29 |     B: /a'b[^"]/;
30 |     D: /\d+\.\d+/;
31 |     """
32 |     grammar = Grammar.from_string(g)
33 |     p = Parser(grammar)
34 |     tree = p.parse(r''' a/ a'bc c aaaa 4.56 b ''')
35 |     assert tree
36 | 
37 |     # Test that re.VEROSE flag is the default for regex matches
38 |     assert grammar.get_terminal('Aterm').recognizer.regex.flags & re.VERBOSE == re.VERBOSE
39 | 


--------------------------------------------------------------------------------
/tests/func/grammar/test_whitespaces.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from parglare import Grammar, Parser
 4 | from parglare.exceptions import ParseError
 5 | 
 6 | from .expression_grammar import get_grammar
 7 | 
 8 | 
 9 | def test_default_whitespaces():
10 | 
11 |     grammar = get_grammar()
12 |     p = Parser(grammar)
13 | 
14 |     p.parse("""id+  id * (id
15 |     +id  )
16 |     """)
17 | 
18 | 
19 | def test_whitespace_redefinition():
20 | 
21 |     grammar = get_grammar()
22 | 
23 |     # Make newline treated as non-ws characted
24 |     p = Parser(grammar, ws=' \t')
25 | 
26 |     p.parse("""id+  id * (id +id  ) """)
27 | 
28 |     try:
29 |         p.parse("""id+  id * (id
30 |         +id  )
31 |         """)
32 |     except ParseError as e:
33 |         assert e.location.start_position == 13
34 | 
35 | 
36 | def test_whitespace_not_used_if_layout():
37 |     """
38 |     If LAYOUT rule is used, ws definition is ignored.
39 |     """
40 |     grammar = """
41 |     S: 'a' 'b';
42 |     LAYOUT: 'k' | EMPTY;
43 |     """
44 |     g = Grammar.from_string(grammar)
45 |     parser = Parser(g)
46 |     with pytest.raises(ParseError):
47 |         parser.parse('a b')
48 | 


--------------------------------------------------------------------------------
/tests/func/import/basic/first.pg:
--------------------------------------------------------------------------------
1 | import 'second.pg';
2 | import 'submodule/third.pg' as t;
3 | 
4 | FirstRule: INT+ second.SecondRule t.ThirdRule;
5 | 
6 | terminals
7 | INT: /\d+/;
8 | 


--------------------------------------------------------------------------------
/tests/func/import/basic/second.pg:
--------------------------------------------------------------------------------
1 | SecondRule: STRING;
2 | 
3 | terminals
4 | STRING: /'[^']'/;
5 | 


--------------------------------------------------------------------------------
/tests/func/import/basic/submodule/third.pg:
--------------------------------------------------------------------------------
1 | ThirdRule: FLOAT;
2 | 
3 | terminals
4 | FLOAT: /\d+\.\d+/;
5 | 


--------------------------------------------------------------------------------
/tests/func/import/basic/test_import.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from parglare import Grammar
 4 | 
 5 | this_folder = os.path.dirname(__file__)
 6 | 
 7 | 
 8 | def test_import():
 9 |     g = Grammar.from_file(os.path.join(this_folder, 'first.pg'))
10 |     assert g
11 | 


--------------------------------------------------------------------------------
/tests/func/import/diamond/base.pg:
--------------------------------------------------------------------------------
1 | terminals
2 | ID: /\w+/;
3 | FQN: /\w+(\.\w+)*/;
4 | COMMA: ',';
5 | 


--------------------------------------------------------------------------------
/tests/func/import/diamond/components.pg:
--------------------------------------------------------------------------------
 1 | import 'base.pg';
 2 | 
 3 | Component:
 4 |   'component' name=base.ID extends=ComponentExtends? '{'
 5 |       slots=Slot*
 6 |   '}'
 7 | ;
 8 | 
 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA];
10 | 
11 | Slot: SlotIn|SlotOut;
12 | 
13 | SlotIn: 'in' name=base.ID;
14 | SlotOut: 'out' name=base.ID;
15 | 


--------------------------------------------------------------------------------
/tests/func/import/diamond/model.pg:
--------------------------------------------------------------------------------
1 | import 'packages.pg';
2 | import 'modules.pg' as m;
3 | 
4 | Model:
5 |     packages=packages.Package*
6 |     modules=m.Module*
7 | ;
8 | 


--------------------------------------------------------------------------------
/tests/func/import/diamond/modules.pg:
--------------------------------------------------------------------------------
1 | import 'components.pg' as c;
2 | import 'base.pg';
3 | 
4 | Module: 'module' name=base.ID '{'
5 |         components=c.Component*
6 | '}';
7 | 


--------------------------------------------------------------------------------
/tests/func/import/diamond/packages.pg:
--------------------------------------------------------------------------------
 1 | import 'components.pg';
 2 | import 'base.pg';
 3 | 
 4 | Package:
 5 |     'package' name=base.ID body=PackageBody?
 6 | ;
 7 | 
 8 | PackageBody:
 9 |     '{'
10 |               components=components.Component*
11 |     '}'
12 | ;
13 | 


--------------------------------------------------------------------------------
/tests/func/import/diamond/test_diamond.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from parglare import Grammar, Parser
 4 | 
 5 | this_folder = os.path.dirname(__file__)
 6 | 
 7 | 
 8 | def test_diamond_import_resolving_and_model_creation():
 9 |     g = Grammar.from_file(os.path.join(this_folder, 'model.pg'))
10 |     assert g
11 |     assert g.get_terminal('packages.components.base.COMMA')
12 |     assert g.get_nonterminal('Model')
13 | 
14 |     # First path used for import of Component is going
15 |     # packages->components->Component
16 |     component_nonterminal = g.get_nonterminal('packages.components.Component')
17 |     assert component_nonterminal
18 | 
19 |     input_str = '''
20 | 
21 |     package First
22 |     package Second {
23 |         component packageComponent {
24 | 
25 |         }
26 |     }
27 | 
28 |     module SomeModule {
29 | 
30 |         component myComponent {
31 |             in SomeInputSlot
32 |             out SomeOutputSlot
33 |         }
34 | 
35 | 
36 |     }
37 | 
38 |     '''
39 | 
40 |     model = Parser(g).parse(input_str)
41 |     assert model
42 |     assert model.__class__.__name__ == 'Model'
43 |     assert isinstance(model.packages, list)
44 |     assert len(model.packages) == 2
45 |     assert model.packages[0].name == 'First'
46 |     assert isinstance(model.modules, list)
47 |     assert len(model.modules) == 1
48 | 
49 |     packageComponent = model.packages[1].body.components[0]
50 |     assert packageComponent.name == 'packageComponent'
51 | 
52 |     module = model.modules[0]
53 |     assert module.__class__.__name__ == 'm.Module'
54 |     assert module.name == 'SomeModule'
55 |     assert len(module.components) == 1
56 | 
57 |     component = module.components[0]
58 |     assert type(component) is type(packageComponent)
59 |     assert component.name == 'myComponent'
60 |     assert len(component.slots) == 2
61 | 
62 |     slot = component.slots[1]
63 |     assert slot.__class__.__name__ == 'packages.components.SlotOut'
64 |     assert slot.name == 'SomeOutputSlot'
65 | 


--------------------------------------------------------------------------------
/tests/func/import/fqn/A.pg:
--------------------------------------------------------------------------------
1 | import 'B.pg';
2 | import 'C.pg';
3 | 
4 | ARule: C.CRule ATerm B.BTerm C.CTerm;
5 | 
6 | terminals
7 | ATerm: "ATerm";
8 | 


--------------------------------------------------------------------------------
/tests/func/import/fqn/B.pg:
--------------------------------------------------------------------------------
1 | import 'C.pg';
2 | 
3 | BRule: BTerm C.CTerm;
4 | 
5 | terminals
6 | BTerm: "dummy";
7 | 


--------------------------------------------------------------------------------
/tests/func/import/fqn/C.pg:
--------------------------------------------------------------------------------
1 | import 'D.pg';
2 | 
3 | CRule: CTerm;
4 | 
5 | terminals
6 | CTerm: "CTerm";
7 | 


--------------------------------------------------------------------------------
/tests/func/import/fqn/D.pg:
--------------------------------------------------------------------------------
1 | SomeRule: dummy;
2 | 
3 | terminals
4 | dummy: "dummy";
5 | 


--------------------------------------------------------------------------------
/tests/func/import/fqn/test_fqn.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from parglare import Grammar
 4 | 
 5 | this_folder = os.path.dirname(__file__)
 6 | 
 7 | 
 8 | def test_fqn_constructed_by_first_import_path():
 9 | 
10 |     g = Grammar.from_file(os.path.join(this_folder, 'A.pg'))
11 | 
12 |     assert g.get_terminal('B.C.CTerm')
13 |     assert not g.get_terminal('C.CTerm')
14 |     assert g.get_nonterminal('B.C.CRule')
15 |     assert not g.get_nonterminal('C.CRule')
16 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_action_name/base.pg:
--------------------------------------------------------------------------------
1 | terminals
2 | ID: /\w+/;
3 | @number NUMERIC_ID: /\d+/;
4 | FQN: /\w+(\.\w+)*/;
5 | COMMA: ',';
6 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_action_name/base_actions.py:
--------------------------------------------------------------------------------
1 | from parglare import get_collector
2 | 
3 | action = get_collector()
4 | 
5 | 
6 | @action
7 | def number(_, value):
8 |     return float(value)
9 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_action_name/components.pg:
--------------------------------------------------------------------------------
 1 | import 'base.pg';
 2 | 
 3 | Component:
 4 |   'component' name=base.ID extends=ComponentExtends? '{'
 5 |       slots=Slot*
 6 |   '}'
 7 | ;
 8 | 
 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA];
10 | 
11 | Slot: SlotIn|SlotOut;
12 | 
13 | SlotIn: 'in' name=base.ID;
14 | SlotOut: 'out' name=base.ID;
15 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_action_name/model.pg:
--------------------------------------------------------------------------------
1 | import 'base.pg';
2 | import 'components.pg' as c;
3 | 
4 | Model:
5 |     'modelID' modelID = base.NUMERIC_ID
6 |     components=c.Component*
7 | ;
8 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_decorator_action_name/base.pg:
--------------------------------------------------------------------------------
1 | terminals
2 | ID: /\w+/;
3 | @number NUMERIC_ID: /\d+/;
4 | FQN: /\w+(\.\w+)*/;
5 | COMMA: ',';
6 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_decorator_action_name/base_actions.py:
--------------------------------------------------------------------------------
1 | from parglare import get_collector
2 | 
3 | action = get_collector()
4 | 
5 | 
6 | @action('number')
7 | def NUMERIC(_, value):
8 |     return float(value)
9 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_decorator_action_name/components.pg:
--------------------------------------------------------------------------------
 1 | import 'base.pg';
 2 | 
 3 | Component:
 4 |   'component' name=base.ID extends=ComponentExtends? '{'
 5 |       slots=Slot*
 6 |   '}'
 7 | ;
 8 | 
 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA];
10 | 
11 | Slot: SlotIn|SlotOut;
12 | 
13 | SlotIn: 'in' name=base.ID;
14 | SlotOut: 'out' name=base.ID;
15 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_decorator_action_name/model.pg:
--------------------------------------------------------------------------------
1 | import 'base.pg';
2 | import 'components.pg' as c;
3 | 
4 | Model:
5 |     'modelID' modelID = base.NUMERIC_ID
6 |     components=c.Component*
7 | ;
8 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_symbol_name/base.pg:
--------------------------------------------------------------------------------
1 | terminals
2 | ID: /\w+/;
3 | NUMERIC_ID: /\d+/;
4 | FQN: /\w+(\.\w+)*/;
5 | COMMA: ',';
6 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_symbol_name/base_actions.py:
--------------------------------------------------------------------------------
1 | from parglare import get_collector
2 | 
3 | action = get_collector()
4 | 
5 | 
6 | @action
7 | def NUMERIC_ID(_, value):
8 |     return float(value)
9 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_symbol_name/components.pg:
--------------------------------------------------------------------------------
 1 | import 'base.pg';
 2 | 
 3 | Component:
 4 |   'component' name=base.ID extends=ComponentExtends? '{'
 5 |       slots=Slot*
 6 |   '}'
 7 | ;
 8 | 
 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA];
10 | 
11 | Slot: SlotIn|SlotOut;
12 | 
13 | SlotIn: 'in' name=base.ID;
14 | SlotOut: 'out' name=base.ID;
15 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/by_symbol_name/model.pg:
--------------------------------------------------------------------------------
1 | import 'base.pg';
2 | import 'components.pg' as c;
3 | 
4 | Model:
5 |     'modelID' modelID = base.NUMERIC_ID
6 |     components=c.Component*
7 | ;
8 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/in_grammar_by_action_name/base.pg:
--------------------------------------------------------------------------------
1 | terminals
2 | ID: /\w+/;
3 | @numeric NUMERIC_ID: /\d+/;
4 | FQN: /\w+(\.\w+)*/;
5 | COMMA: ',';
6 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/in_grammar_by_action_name/base_actions.py:
--------------------------------------------------------------------------------
1 | from parglare import get_collector
2 | 
3 | action = get_collector()
4 | 
5 | 
6 | @action('numeric')
7 | def NUMERIC_ID(_, value):
8 |     return float(value)
9 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/in_grammar_by_action_name/components.pg:
--------------------------------------------------------------------------------
 1 | import 'base.pg';
 2 | 
 3 | Component:
 4 |   'component' name=base.ID extends=ComponentExtends? '{'
 5 |       slots=Slot*
 6 |   '}'
 7 | ;
 8 | 
 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA];
10 | 
11 | Slot: SlotIn|SlotOut;
12 | 
13 | SlotIn: 'in' name=base.ID;
14 | SlotOut: 'out' name=base.ID;
15 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/in_grammar_by_action_name/model.pg:
--------------------------------------------------------------------------------
1 | import 'base.pg';
2 | import 'components.pg' as c;
3 | 
4 | Model:
5 |     'modelID' modelID = base.NUMERIC_ID
6 |     components=c.Component*
7 | ;
8 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/in_grammar_by_action_name/model_actions.py:
--------------------------------------------------------------------------------
 1 | from parglare import get_collector
 2 | 
 3 | action = get_collector()
 4 | 
 5 | 
 6 | @action('base.numeric')
 7 | def number(_, value):
 8 |     "This action is overriding by action name in 'base' module."
 9 |     return 43
10 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/in_grammar_by_symbol_name/base.pg:
--------------------------------------------------------------------------------
1 | terminals
2 | ID: /\w+/;
3 | NUMERIC_ID: /\d+/;
4 | FQN: /\w+(\.\w+)*/;
5 | COMMA: ',';
6 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/in_grammar_by_symbol_name/base_actions.py:
--------------------------------------------------------------------------------
1 | from parglare import get_collector
2 | 
3 | action = get_collector()
4 | 
5 | 
6 | @action
7 | def NUMERIC_ID(_, value):
8 |     return float(value)
9 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/in_grammar_by_symbol_name/components.pg:
--------------------------------------------------------------------------------
 1 | import 'base.pg';
 2 | 
 3 | Component:
 4 |   'component' name=base.ID extends=ComponentExtends? '{'
 5 |       slots=Slot*
 6 |   '}'
 7 | ;
 8 | 
 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA];
10 | 
11 | Slot: SlotIn|SlotOut;
12 | 
13 | SlotIn: 'in' name=base.ID;
14 | SlotOut: 'out' name=base.ID;
15 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/in_grammar_by_symbol_name/model.pg:
--------------------------------------------------------------------------------
1 | import 'base.pg';
2 | import 'components.pg' as c;
3 | 
4 | Model:
5 |     'modelID' modelID = base.NUMERIC_ID
6 |     components=c.Component*
7 | ;
8 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/in_grammar_by_symbol_name/model_actions.py:
--------------------------------------------------------------------------------
1 | from parglare import get_collector
2 | 
3 | action = get_collector()
4 | 
5 | 
6 | @action('base.NUMERIC_ID')
7 | def numeric(_, value):
8 |     return 43
9 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_actions/test_imported_actions.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from parglare import Grammar, Parser
 4 | 
 5 | this_folder = os.path.dirname(__file__)
 6 | 
 7 | model_str = '''
 8 | modelID 42
 9 | component myComponent {
10 |     in SomeInputSlot
11 |     out SomeOutputSlot
12 | }
13 | '''
14 | 
15 | 
16 | def test_imported_actions_connect_by_symbol_name():
17 |     g = Grammar.from_file(os.path.join(this_folder, 'by_symbol_name/model.pg'))
18 |     model = Parser(g).parse(model_str)
19 |     # Check that base.pg actions are properly loaded and triggered.
20 |     assert model.modelID == 42
21 | 
22 | 
23 | def test_imported_actions_connect_by_action_name():
24 |     g = Grammar.from_file(os.path.join(this_folder, 'by_action_name/model.pg'))
25 |     model = Parser(g).parse(model_str)
26 |     # Check that base.pg actions are properly loaded and triggered.
27 |     assert model.modelID == 42
28 | 
29 | 
30 | def test_imported_actions_connect_by_decorator_action_name():
31 |     g = Grammar.from_file(os.path.join(this_folder,
32 |                                        'by_decorator_action_name/model.pg'))
33 |     model = Parser(g).parse(model_str)
34 |     # Check that base.pg actions are properly loaded and triggered.
35 |     assert model.modelID == 42
36 | 
37 | 
38 | def test_imported_actions_override():
39 |     """
40 |     Test that actions loaded from `*_actions.py` files can be overriden by
41 |     users actions.
42 |     """
43 | 
44 |     # We can override either by fqn of symbol
45 |     g = Grammar.from_file(os.path.join(this_folder, 'by_symbol_name/model.pg'))
46 |     actions = {
47 |         'base.NUMERIC_ID': lambda _, value: 43
48 |     }
49 |     model = Parser(g, actions=actions).parse(model_str)
50 |     assert model.modelID == 43
51 | 
52 |     # Or by action name used in grammar for the given symbol
53 |     g = Grammar.from_file(os.path.join(this_folder, 'by_action_name/model.pg'))
54 |     actions = {
55 |         'base.number': lambda _, value: 43
56 |     }
57 |     model = Parser(g, actions=actions).parse(model_str)
58 |     assert model.modelID == 43
59 | 
60 |     # Override by FQN takes precendence
61 |     g = Grammar.from_file(os.path.join(this_folder, 'by_action_name/model.pg'))
62 |     actions = {
63 |         'base.NUMERIC_ID': lambda _, value: 43
64 |     }
65 |     model = Parser(g, actions=actions).parse(model_str)
66 |     assert model.modelID == 43
67 | 
68 | 
69 | def test_imported_actions_override_by_grammar_actions():
70 |     """
71 |     Test that actions loaded from `*_actions.py` files can override actions
72 |     imported from other grammar files.
73 |     """
74 | 
75 |     g = Grammar.from_file(os.path.join(this_folder,
76 |                                        'in_grammar_by_symbol_name/model.pg'))
77 |     model = Parser(g).parse(model_str)
78 |     assert model.modelID == 43
79 | 
80 |     g = Grammar.from_file(os.path.join(this_folder,
81 |                                        'in_grammar_by_action_name/model.pg'))
82 |     model = Parser(g).parse(model_str)
83 |     assert model.modelID == 43
84 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_recognizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/import/imported_recognizers/__init__.py


--------------------------------------------------------------------------------
/tests/func/import/imported_recognizers/base.pg:
--------------------------------------------------------------------------------
1 | terminals
2 | // We will define some of the terminals in `*_recognizers.py` file
3 | ID: /\w+/;
4 | @number NUMERIC_ID: ;
5 | FQN: ;
6 | COMMA: ;
7 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_recognizers/base_recognizers.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from parglare import get_collector
 4 | 
 5 | recognizer = get_collector()
 6 | number_re = re.compile(r'\d+(\.\d+)*')
 7 | fqn_re = re.compile(r'\w+(\.\w+)*')
 8 | 
 9 | 
10 | @recognizer('NUMERIC_ID')
11 | def number(input, pos):
12 |     number_match = number_re.match(input[pos:])
13 |     if number_match:
14 |         return input[pos:pos + len(number_match.group())]
15 | 
16 | 
17 | @recognizer
18 | def FQN(input, pos):
19 |     fqn_match = fqn_re.match(input[pos:])
20 |     if fqn_match:
21 |         return input[pos:pos + len(fqn_match.group())]
22 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_recognizers/components.pg:
--------------------------------------------------------------------------------
 1 | import 'base.pg';
 2 | 
 3 | Component:
 4 |   'component' name=base.ID extends=ComponentExtends? '{'
 5 |       slots=Slot*
 6 |   '}'
 7 | ;
 8 | 
 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA];
10 | 
11 | Slot: SlotIn|SlotOut;
12 | 
13 | SlotIn: 'in' name=base.ID;
14 | SlotOut: 'out' name=base.ID;
15 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_recognizers/model.pg:
--------------------------------------------------------------------------------
1 | import 'base.pg';
2 | import 'components.pg' as c;
3 | 
4 | Model:
5 |     'modelID' modelID = base.NUMERIC_ID
6 |     components=c.Component*
7 | ;
8 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_recognizers/model_override.pg:
--------------------------------------------------------------------------------
1 | import 'base.pg';
2 | import 'components.pg' as c;
3 | 
4 | Model:
5 |     'modelID' modelID = base.NUMERIC_ID
6 |     components=c.Component*
7 | ;
8 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_recognizers/model_override_recognizers.py:
--------------------------------------------------------------------------------
 1 | from parglare import get_collector
 2 | 
 3 | recognizer = get_collector()
 4 | 
 5 | 
 6 | @recognizer('base.NUMERIC_ID')
 7 | def number(input, pos):
 8 |     '''Check override'''
 9 |     pass
10 | 
11 | 
12 | @recognizer('base.COMMA')
13 | def comma_recognizer(input, pos):
14 |     if input[pos] == ',':
15 |         return input[pos:pos + 1]
16 | 


--------------------------------------------------------------------------------
/tests/func/import/imported_recognizers/test_imported_recognizers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from types import FunctionType
  3 | 
  4 | import pytest
  5 | 
  6 | from parglare import Grammar, GrammarError, ParseError, Parser
  7 | 
  8 | from .base_recognizers import number
  9 | 
 10 | this_folder = os.path.dirname(__file__)
 11 | 
 12 | model_str = '''
 13 | modelID 42.23.5
 14 | component myComponent extends some.fqn.name {
 15 |     in SomeInputSlot
 16 |     out SomeOutputSlot
 17 | }
 18 | '''
 19 | 
 20 | 
 21 | def comma_recognizer(input, pos):
 22 |     if input[pos] == ',':
 23 |         return input[pos:pos + 1]
 24 | 
 25 | 
 26 | def test_imported_recognizers_error_undefined_recognizer():
 27 | 
 28 |     with pytest.raises(GrammarError,
 29 |                        match=r'has no recognizer defined and no recognizers '
 30 |                        'are given'):
 31 |         Grammar.from_file(os.path.join(this_folder, 'model.pg'))
 32 | 
 33 |     # If we define COMMA recognizer grammar will construct without exceptions.
 34 |     g = Grammar.from_file(os.path.join(this_folder, 'model.pg'),
 35 |                           recognizers={'base.COMMA': comma_recognizer})
 36 |     assert g
 37 | 
 38 | 
 39 | def test_imported_recognizers_connect_from_external_file():
 40 |     g = Grammar.from_file(os.path.join(this_folder, 'model.pg'),
 41 |                           recognizers={'base.COMMA': comma_recognizer})
 42 | 
 43 |     # Check that recognizers are loaded and connected.
 44 |     rec_fqn = g.get_terminal('base.FQN')
 45 |     assert rec_fqn.recognizer
 46 |     assert type(rec_fqn.recognizer) is FunctionType
 47 |     assert rec_fqn.recognizer.__name__ == 'FQN'
 48 | 
 49 |     rec_fqn = g.get_terminal('base.NUMERIC_ID')
 50 |     assert rec_fqn.recognizer
 51 |     assert type(rec_fqn.recognizer) is FunctionType
 52 |     assert rec_fqn.recognizer.__name__ == 'number'
 53 | 
 54 | 
 55 | def test_imported_recognizers_override():
 56 |     """
 57 |     Test that recognizers loaded from `*_recognizers.py` files can be
 58 |     overriden by users provided recognizers.
 59 |     """
 60 | 
 61 |     called = [False, False]
 62 | 
 63 |     def numeric_id(input, pos):
 64 |         called[0] = True
 65 | 
 66 |     def fqn(input, pos):
 67 |         called[0] = True
 68 | 
 69 |     recognizers = {
 70 |         'base.COMMA': comma_recognizer,
 71 |         'base.NUMERIC_ID': numeric_id,
 72 |         'base.FQN': fqn
 73 |     }
 74 | 
 75 |     g = Grammar.from_file(os.path.join(this_folder, 'model.pg'),
 76 |                           recognizers=recognizers)
 77 |     assert g
 78 |     with pytest.raises(ParseError):
 79 |         Parser(g).parse(model_str)
 80 |     assert any(called)
 81 | 
 82 |     called = [False]
 83 | 
 84 |     def numeric_id(input, pos):
 85 |         called[0] = True
 86 |         return number(input, pos)
 87 | 
 88 |     recognizers = {
 89 |         'base.COMMA': comma_recognizer,
 90 |         'base.NUMERIC_ID': numeric_id,
 91 |     }
 92 | 
 93 |     g = Grammar.from_file(os.path.join(this_folder, 'model.pg'),
 94 |                           recognizers=recognizers)
 95 |     assert g
 96 |     Parser(g).parse(model_str)
 97 |     assert called[0]
 98 | 
 99 | 
100 | def test_imported_recognizers_override_by_importing_grammar_file():
101 |     """
102 |     Test that recognizers loaded from `*_recognizers.py` files can be
103 |     overriden in importing grammar `*_recognizers.py` file by providing
104 |     FQN of the imported terminal relative from the importing grammar file.
105 |     """
106 | 
107 |     g = Grammar.from_file(os.path.join(this_folder, 'model_override.pg'))
108 |     assert g
109 | 
110 |     t = g.get_terminal('base.NUMERIC_ID')
111 |     assert t is not None
112 | 
113 |     assert t.recognizer.__doc__ == 'Check override'
114 | 


--------------------------------------------------------------------------------
/tests/func/import/override/base.pg:
--------------------------------------------------------------------------------
 1 | import 'first.pg' as f;
 2 | 
 3 | S: f.s.A;
 4 | 
 5 | // This rule overrides imported rule C from the second grammar
 6 | // Each rule that reference old rule C should now
 7 | // reference overriden rule.
 8 | f.s.C: 'k' f.s.B;
 9 | 
10 | terminals
11 | f.s.B: 'bb';  // also all rules referencing terminal B now use overriden version
12 | 


--------------------------------------------------------------------------------
/tests/func/import/override/first.pg:
--------------------------------------------------------------------------------
1 | import 'second.pg' as s;
2 | S: s.A s.C;
3 | 
4 | terminals
5 | // This grammar override terminal match;
6 | s.B: 'bf';
7 | 


--------------------------------------------------------------------------------
/tests/func/import/override/nonexisting.pg:
--------------------------------------------------------------------------------
1 | import 'first.pg' as f;
2 | 
3 | S: f.S;
4 | 
5 | f.NonExisting: f.s.B;
6 | 


--------------------------------------------------------------------------------
/tests/func/import/override/second.pg:
--------------------------------------------------------------------------------
1 | A: B+ C;
2 | C: 'sec' B;
3 | 
4 | terminals
5 | B: 'bs';
6 | 


--------------------------------------------------------------------------------
/tests/func/import/override/test_override.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | from parglare import Grammar, GrammarError, Parser
 6 | 
 7 | this_folder = os.path.dirname(__file__)
 8 | 
 9 | 
10 | def test_override_base():
11 |     """
12 |     Test overrides with two level of nesting.
13 |     """
14 |     g = Grammar.from_file(os.path.join(this_folder, 'base.pg'))
15 |     p = Parser(g)
16 |     result = p.parse('bb bb k bb')
17 |     assert result
18 | 
19 | 
20 | def test_override_first():
21 |     """
22 |     Loading grammar from the lower level of import hierarchy works correctly
23 |     also.
24 |     """
25 |     g = Grammar.from_file(os.path.join(this_folder, 'first.pg'))
26 |     p = Parser(g)
27 |     result = p.parse('bf bf sec bf sec bf')
28 |     assert result
29 | 
30 | 
31 | def test_override_nonexisting_symbol():
32 |     """
33 |     Test override that doesn't exist. By default it could go unnoticed and
34 |     the intended rule would not be overriden. This verifies that typo errors
35 |     would not go unnoticed.
36 |     """
37 |     with pytest.raises(GrammarError,
38 |                        match='Unexisting name for symbol override f.NonExisting'):
39 |         Grammar.from_file(os.path.join(this_folder, 'nonexisting.pg'))
40 | 


--------------------------------------------------------------------------------
/tests/func/import/recursion/base.pg:
--------------------------------------------------------------------------------
1 | terminals
2 | ID: /\w+/;
3 | FQN: /\w+(\.\w+)*/;
4 | COMMA: ',';
5 | 


--------------------------------------------------------------------------------
/tests/func/import/recursion/components.pg:
--------------------------------------------------------------------------------
 1 | import 'base.pg';
 2 | import 'modules.pg';   // Recursive import of 'modules'
 3 | 
 4 | Component:
 5 |   'component' name=base.ID extends=ComponentExtends? '{'
 6 |       slots=Slot*
 7 |       modules=modules.Module*   // recursive reference to Module
 8 |   '}'
 9 | ;
10 | 
11 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA];
12 | 
13 | Slot: SlotIn|SlotOut;
14 | 
15 | SlotIn: 'in' name=base.ID;
16 | SlotOut: 'out' name=base.ID;
17 | 


--------------------------------------------------------------------------------
/tests/func/import/recursion/model.pg:
--------------------------------------------------------------------------------
1 | import 'packages.pg';
2 | import 'modules.pg' as m;
3 | 
4 | Model:
5 |     packages=packages.Package*
6 |     modules=m.Module*
7 | ;
8 | 


--------------------------------------------------------------------------------
/tests/func/import/recursion/modules.pg:
--------------------------------------------------------------------------------
1 | import 'components.pg' as c;   // recursive import of 'components'
2 | import 'base.pg';
3 | 
4 | Module: 'module' name=base.ID '{'
5 |         components=c.Component*
6 | '}';
7 | 


--------------------------------------------------------------------------------
/tests/func/import/recursion/packages.pg:
--------------------------------------------------------------------------------
1 | import 'components.pg';
2 | import 'base.pg';
3 | 
4 | Package:
5 |     'package' name=base.ID
6 | ;
7 | 


--------------------------------------------------------------------------------
/tests/func/import/recursion/test_recursion.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from parglare import Grammar, Parser
 4 | 
 5 | this_folder = os.path.dirname(__file__)
 6 | 
 7 | 
 8 | def test_recursive_grammar_import():
 9 |     g = Grammar.from_file(os.path.join(this_folder, 'model.pg'))
10 |     assert g
11 | 
12 |     input_str = '''
13 | 
14 |     package First
15 |     package Second
16 | 
17 |     module SomeModule {
18 | 
19 |         component myComponent {
20 |             in SomeInputSlot
21 |             out SomeOutputSlot
22 |         }
23 | 
24 | 
25 |     }
26 | 
27 |     '''
28 | 
29 |     result = Parser(g).parse(input_str)
30 |     assert result
31 | 


--------------------------------------------------------------------------------
/tests/func/parsing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/parsing/__init__.py


--------------------------------------------------------------------------------
/tests/func/parsing/parsing_errors.txt:
--------------------------------------------------------------------------------
1 | id + id * + id
2 | 


--------------------------------------------------------------------------------
/tests/func/parsing/parsing_from_file.txt:
--------------------------------------------------------------------------------
1 | id + id + id
2 | 
3 | + id
4 | 


--------------------------------------------------------------------------------
/tests/func/parsing/test_build_tree.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | from parglare import Grammar, Parser
 3 | 
 4 | 
 5 | def test_call_actions_during_tree_build():
 6 |     grammar = """
 7 |     Program: "begin" MoveCommand* "end";
 8 |     MoveCommand: "move" Direction;
 9 |     Direction: "up" | "down" | "left" | "right";
10 |     """
11 | 
12 |     g = Grammar.from_string(grammar)
13 | 
14 |     code = """
15 |     begin
16 |         move left
17 |         move left
18 |         move up
19 |         move down
20 |     end
21 |     """
22 | 
23 |     left_moves = []
24 | 
25 |     def left_dir_collector(_, nodes):
26 |         """Finds all 'left' moves and adds them into a list."""
27 |         term = nodes[0]
28 |         if term.value == "left":
29 |             left_moves.append(term)
30 | 
31 |     parser = Parser(g, build_tree=True,
32 |                     actions={"Direction": left_dir_collector})
33 |     parser.parse(code)
34 | 
35 |     # call_actions_during_tree_build is False by default, so left_dir_collector
36 |     # will not be called.
37 |     assert len(left_moves) == 0
38 | 
39 |     parser.call_actions_during_tree_build = True
40 |     parser.parse(code)
41 | 
42 |     assert len(left_moves) == 2
43 | 


--------------------------------------------------------------------------------
/tests/func/parsing/test_conflicts.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | from parglare import Grammar, Parser
 3 | from parglare.exceptions import SRConflicts, RRConflicts
 4 | 
 5 | 
 6 | def test_sr_conflict():
 7 |     grammar = """
 8 |     S: As A A;
 9 |     As: As A | A;
10 | 
11 |     terminals
12 |     A:"a";
13 |     """
14 |     g = Grammar.from_string(grammar, _no_check_recognizers=True)
15 |     with pytest.raises(SRConflicts) as e:
16 |         Parser(g, prefer_shifts=False)
17 |     assert "whether to shift or reduce by production(s) '2: As = As A'" in \
18 |         str(e.value.conflicts[0])
19 | 
20 | 
21 | def test_rr_empty_conflict():
22 |     grammar = """
23 |     S: A B C | A D C;
24 |     B: B1 | EMPTY;
25 |     D: D1 | EMPTY;
26 | 
27 |     terminals
28 |     A:;
29 |     C:;
30 |     B1:;
31 |     D1:;
32 |     """
33 |     g = Grammar.from_string(grammar, _no_check_recognizers=True)
34 |     with pytest.raises(RRConflicts) as e:
35 |         Parser(g)
36 | 
37 |     # For B and D empty reductions both "A B C" and "A D C" can reduce to S
38 |     assert "'6: D = EMPTY' or '4: B = EMPTY'" \
39 |         in str(e.value.conflicts[0])
40 | 
41 | 
42 | def test_rr_nonempty_conflict():
43 |     grammar = """
44 |     S: A | B;
45 |     A: A1 B1;
46 |     B: A1 B1;
47 | 
48 |     terminals
49 |     A1: ;
50 |     B1: ;
51 |     """
52 |     g = Grammar.from_string(grammar, _no_check_recognizers=True)
53 |     with pytest.raises(RRConflicts) as e:
54 |         Parser(g)
55 | 
56 |     # "A1 B1" can reduce to both A and B
57 |     assert "'4: B = A1 B1' or '3: A = A1 B1'" \
58 |         in str(e.value.conflicts[0])
59 | 


--------------------------------------------------------------------------------
/tests/func/parsing/test_dynamic_disambiguation_filters.py:
--------------------------------------------------------------------------------
  1 | import pytest  # noqa
  2 | from parglare import GLRParser, Grammar, Parser, SHIFT, REDUCE
  3 | from parglare.exceptions import SRConflicts
  4 | 
  5 | 
  6 | grammar = r"""
  7 | E: E op_sum E {dynamic}
  8 |  | E op_mul E {dynamic}
  9 |  | number;
 10 | 
 11 | terminals
 12 | number: /\d+/;
 13 | op_sum: '+' {dynamic};
 14 | op_mul: '*' {dynamic};
 15 | """
 16 | instr1 = '1 + 2 * 5 + 3'
 17 | instr2 = '1 * 2 + 5 * 3'
 18 | 
 19 | actions = {
 20 |     'E': [lambda _, nodes: nodes[0] + nodes[2],
 21 |           lambda _, nodes: nodes[0] * nodes[2],
 22 |           lambda _, nodes: float(nodes[0])]
 23 | }
 24 | 
 25 | 
 26 | g = Grammar.from_string(grammar)
 27 | 
 28 | 
 29 | operations = []
 30 | 
 31 | 
 32 | def custom_disambiguation_filter(context, from_state, to_state, action,
 33 |                                  production, subresults):
 34 |     """
 35 |     Make first operation that appears in the input as lower priority.
 36 |     This demonstrates how priority rule can change dynamically depending
 37 |     on the input or how disambiguation can be decided during parsing.
 38 |     """
 39 |     global operations
 40 | 
 41 |     # At the start of parsing this function is called with actions set to None
 42 |     # to give a chance for the strategy to initialize.
 43 |     if action is None:
 44 |         operations = []
 45 |         return
 46 | 
 47 |     assert subresults is None or isinstance(subresults, list)
 48 | 
 49 |     operation = context.token.symbol if action is SHIFT else context.token_ahead.symbol
 50 | 
 51 |     actions = from_state.actions[operation]
 52 |     if operation not in operations and operation.name != 'STOP':
 53 |         operations.append(operation)
 54 | 
 55 |     if action is SHIFT:
 56 |         shifts = [a for a in actions if a.action is SHIFT]
 57 |         if not shifts:
 58 |             return False
 59 | 
 60 |         reductions = [a for a in actions if a.action is REDUCE]
 61 |         if not reductions:
 62 |             return True
 63 | 
 64 |         red_op = reductions[0].prod.rhs[1]
 65 |         return operations.index(operation) > operations.index(red_op)
 66 | 
 67 |     elif action is REDUCE:
 68 | 
 69 |         # Current reduction operation
 70 |         red_op = production.rhs[1]
 71 | 
 72 |         # If operation ahead is STOP or is of less or equal priority -> reduce.
 73 |         return ((operation not in operations)
 74 |                 or (operations.index(operation)
 75 |                     <= operations.index(red_op)))
 76 | 
 77 | 
 78 | def test_dynamic_disambiguation():
 79 |     """
 80 |     Test disambiguation determined at run-time based on the input.
 81 |     This tests LR parsing.
 82 |     """
 83 | 
 84 |     # This grammar is ambiguous if no prefer_shift strategy is used.
 85 |     with pytest.raises(SRConflicts):
 86 |         Parser(g, prefer_shifts=False)
 87 | 
 88 |     # But if we provide dynamic disambiguation filter
 89 |     # the conflicts can be handled at run-time.
 90 |     p = Parser(g, actions=actions, prefer_shifts=False,
 91 |                dynamic_filter=custom_disambiguation_filter)
 92 | 
 93 |     # * operation will be of higher priority as it appears later in the stream.
 94 |     result1 = p.parse(instr1)
 95 |     assert result1 == 1 + (2 * 5) + 3
 96 | 
 97 |     # + operation will be of higher priority here.
 98 |     result2 = p.parse(instr2)
 99 |     assert result2 == 1 * (2 + 5) * 3
100 | 
101 | 
102 | def test_dynamic_disambiguation_glr():
103 |     """
104 |     Test disambiguation determined at run-time based on the input.
105 |     This tests GLR parsing.
106 |     """
107 |     p = GLRParser(g, actions=actions,
108 |                   dynamic_filter=custom_disambiguation_filter)
109 | 
110 |     # * operation will be of higher priority as it appears later in the stream.
111 |     result1 = p.parse(instr1)
112 |     assert len(result1) == 1
113 |     assert p.call_actions(result1[0]) == 1 + (2 * 5) + 3
114 | 
115 |     # + operation will be of higher priority here.
116 |     result2 = p.parse(instr2)
117 |     assert len(result2) == 1
118 |     assert p.call_actions(result2[0]) == 1 * (2 + 5) * 3
119 | 


--------------------------------------------------------------------------------
/tests/func/parsing/test_glr_error_recovery.py:
--------------------------------------------------------------------------------
  1 | import pytest  # noqa
  2 | from parglare import GLRParser, Grammar, ParseError
  3 | from parglare.parser import Token
  4 | from parglare.actions import pass_single, pass_inner
  5 | 
  6 | grammar = r"""
  7 | E: E '+' E
  8 |  | E '-' E
  9 |  | E '*' E
 10 |  | E '/' E
 11 |  | E '^' E
 12 |  | '(' E ')'
 13 |  | number;
 14 | 
 15 | terminals
 16 | number: /\d+(\.\d+)?/;
 17 | """
 18 | 
 19 | actions = {
 20 |     "E": [lambda _, nodes: nodes[0] + nodes[2],
 21 |           lambda _, nodes: nodes[0] - nodes[2],
 22 |           lambda _, nodes: nodes[0] * nodes[2],
 23 |           lambda _, nodes: nodes[0] / nodes[2],
 24 |           lambda _, nodes: nodes[0] ** nodes[2],
 25 |           pass_inner,
 26 |           pass_single],
 27 |     "number": lambda _, value: float(value),
 28 | }
 29 | 
 30 | g = Grammar.from_string(grammar)
 31 | 
 32 | 
 33 | def test_glr_recovery_default():
 34 |     """
 35 |     Test default error recovery in GLR parsing. Default recovery should report
 36 |     the error, drop current input at position and try to recover.
 37 |     In case of multiple subsequent errouneous chars only one error should be
 38 |     reported.
 39 |     """
 40 |     parser = GLRParser(g, actions=actions, error_recovery=True)
 41 | 
 42 |     results = parser.parse('1 + 2 + * 3 & 89 - 5')
 43 | 
 44 |     assert len(parser.errors) == 2
 45 |     e1, e2 = parser.errors
 46 | 
 47 |     # First errors is '*' at position 8 and of length 2
 48 |     assert e1.location.start_position == 8
 49 |     assert e1.location.end_position == 10
 50 | 
 51 |     # Second error is '& 89' at position 12 and length 5
 52 |     assert e2.location.start_position == 12
 53 |     assert e2.location.end_position == 17
 54 | 
 55 |     # There are 5 trees for '1 + 2 + 3 - 5'
 56 |     # All results are the same
 57 |     assert len(results) == 5
 58 |     result_set = set([parser.call_actions(tree) for tree in results])
 59 |     assert len(result_set) == 1
 60 |     assert 1 in set(result_set)
 61 | 
 62 | 
 63 | def test_glr_recovery_custom_new_position():
 64 |     """
 65 |     Test that custom recovery that increment position works.
 66 |     """
 67 | 
 68 |     def custom_recovery(head, error):
 69 |         # This recovery will just skip over erroneous part of input '& 89'.
 70 |         head.position += 4
 71 |         return head.parser.default_error_recovery(head)
 72 | 
 73 |     parser = GLRParser(g, actions=actions, error_recovery=custom_recovery)
 74 | 
 75 |     results = parser.parse('1 + 5 & 89 - 2')
 76 | 
 77 |     assert len(parser.errors) == 1
 78 |     assert len(results) == 2
 79 |     result_set = set([parser.call_actions(tree) for tree in results])
 80 |     assert len(result_set) == 1
 81 |     # Calculated result should be '1 + 5 - 2'
 82 |     assert result_set.pop() == 4
 83 | 
 84 | 
 85 | def test_glr_recovery_custom_new_token():
 86 |     """
 87 |     Test that custom recovery that introduces new token works.
 88 |     """
 89 | 
 90 |     def custom_recovery(head, error):
 91 |         # Here we will introduce missing operation token
 92 |         head.token_ahead = Token(g.get_terminal('-'), '-', head.position, length=0)
 93 |         return True
 94 | 
 95 |     parser = GLRParser(g, actions=actions, error_recovery=custom_recovery)
 96 | 
 97 |     results = parser.parse('1 + 5 8 - 2')
 98 | 
 99 |     assert len(parser.errors) == 1
100 |     assert len(results) == 5
101 |     result_set = set([parser.call_actions(tree) for tree in results])
102 |     assert len(result_set) == 2
103 |     assert -4 in result_set
104 |     assert 0 in result_set
105 | 
106 | 
107 | def test_glr_recovery_custom_unsuccessful():
108 |     """
109 |     Test unsuccessful error recovery.
110 |     """
111 | 
112 |     def custom_recovery(head, error):
113 |         return False
114 | 
115 |     parser = GLRParser(g, actions=actions, error_recovery=custom_recovery)
116 | 
117 |     with pytest.raises(ParseError) as e:
118 |         parser.parse('1 + 5 8 - 2')
119 | 
120 |     error = e.value
121 |     assert error.location.start_position == 6
122 | 


--------------------------------------------------------------------------------
/tests/func/parsing/test_parse_context.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | from parglare import Grammar, Parser
 3 | from parglare.actions import pass_single
 4 | 
 5 | 
 6 | grammar = r"""
 7 | E: E '+' E  {left}
 8 |  | number;
 9 | 
10 | terminals
11 | number: /\d+(\.\d+)?/;
12 | """
13 | 
14 | called = [False, False]
15 | 
16 | 
17 | def act_sum(is_tree):
18 |     def act_sum(context, nodes):
19 |         called[0] = True
20 |         assert context.parser
21 |         assert context.state.symbol.name == 'E'
22 |         assert context.production.symbol.name == 'E'
23 |         assert len(context.production.rhs) == 3
24 |         assert context.layout_content == '   '
25 |         assert context.start_position == 3
26 |         assert context.end_position == 8
27 |         if is_tree:
28 |             # If parse tree is constructed `node` is available on
29 |             # the context.
30 |             assert context.node.is_nonterm() \
31 |                 and context.node.symbol.name == 'E'
32 |         else:
33 |             assert context.node is None
34 | 
35 |     return act_sum
36 | 
37 | 
38 | def act_number(context, value):
39 |     called[1] = True
40 |     value = float(value)
41 |     assert context.symbol.name == 'number'
42 |     if value == 1:
43 |         assert context.start_position == 3
44 |         assert context.end_position == 4
45 |         assert context.layout_content == '   '
46 |     else:
47 |         assert context.start_position == 7
48 |         assert context.end_position == 8
49 |         assert context.layout_content == ' '
50 |     return value
51 | 
52 | 
53 | actions = {
54 |     "Result": pass_single,
55 |     "E": [None, pass_single],
56 |     "number": act_number,
57 | }
58 | 
59 | g = Grammar.from_string(grammar)
60 | 
61 | 
62 | def test_parse_context():
63 |     global called
64 |     called = [False, False]
65 | 
66 |     actions["E"][0] = act_sum(is_tree=False)
67 |     parser = Parser(g, actions=actions)
68 | 
69 |     parser.parse("   1 + 2  ")
70 | 
71 |     assert all(called)
72 | 
73 | 
74 | def test_parse_context_call_actions():
75 |     """
76 |     Test that valid context attributes are available when calling
77 |     actions using `call_actions`.
78 |     """
79 |     global called
80 |     called = [False, False]
81 | 
82 |     actions["E"][0] = act_sum(is_tree=True)
83 |     parser = Parser(g, build_tree=True, actions=actions)
84 | 
85 |     tree = parser.parse("   1 + 2  ")
86 | 
87 |     parser.call_actions(tree)
88 | 
89 |     assert all(called)
90 | 


--------------------------------------------------------------------------------
/tests/func/parsing/test_parsing.py:
--------------------------------------------------------------------------------
 1 | from os.path import dirname, join
 2 | 
 3 | import pytest
 4 | 
 5 | from parglare import Grammar, Parser
 6 | from parglare.exceptions import ParseError
 7 | 
 8 | from ..grammar.expression_grammar import get_grammar
 9 | 
10 | 
11 | def test_parsing():
12 |     grammar = get_grammar()
13 |     p = Parser(grammar)
14 |     assert p.parse("id+id+id")
15 | 
16 | 
17 | def test_parsing_from_file():
18 |     grammar = get_grammar()
19 |     p = Parser(grammar)
20 |     assert p.parse_file(join(dirname(__file__), 'parsing_from_file.txt'))
21 | 
22 | 
23 | def test_partial_parse():
24 |     """
25 |     Test `consume_input` parser parameter.
26 |     """
27 |     grammar = """
28 |     S: 'a' B;
29 |     B: 'b';
30 |     """
31 |     g = Grammar.from_string(grammar)
32 |     parser = Parser(g, consume_input=False)
33 | 
34 |     # Parser should succesfuly parse 'ab' at the beginning.
35 |     parser.parse('abc')
36 | 
37 |     # But if `consume_input` is not set to `False` it should be `True` by
38 |     # default and the parser will not accept partial parses.
39 |     grammar = """
40 |     S: 'a' B;
41 |     B: 'b';
42 |     """
43 |     g = Grammar.from_string(grammar)
44 |     parser = Parser(g)
45 |     parser.parse('a b')
46 |     with pytest.raises(ParseError):
47 |         parser.parse('a b c')
48 | 


--------------------------------------------------------------------------------
/tests/func/parsing/test_parsing_errors.py:
--------------------------------------------------------------------------------
  1 | import pytest  # noqa
  2 | import os
  3 | from parglare import Grammar, Parser, GLRParser, ParseError
  4 | from ..grammar.expression_grammar import get_grammar
  5 | 
  6 | 
  7 | parsers = pytest.mark.parametrize("parser_class", [Parser, GLRParser])
  8 | 
  9 | 
 10 | @parsers
 11 | def test_grammar_in_error(parser_class):
 12 | 
 13 |     grammar = get_grammar()
 14 |     p = parser_class(grammar)
 15 | 
 16 |     with pytest.raises(ParseError) as e:
 17 |         p.parse("id+id*+id")
 18 | 
 19 |     assert e.value.grammar is grammar
 20 | 
 21 | 
 22 | def test_glr_last_heads_in_error():
 23 | 
 24 |     grammar = get_grammar()
 25 |     p = GLRParser(grammar)
 26 | 
 27 |     with pytest.raises(ParseError) as e:
 28 |         p.parse("id+id*+id")
 29 | 
 30 |     assert len(e.value.last_heads) == 1
 31 | 
 32 | 
 33 | @parsers
 34 | def test_invalid_input(parser_class):
 35 | 
 36 |     grammar = get_grammar()
 37 |     p = parser_class(grammar)
 38 | 
 39 |     with pytest.raises(ParseError) as e:
 40 |         p.parse("id+id*+id")
 41 | 
 42 |     assert e.value.location.start_position == 6
 43 |     assert "(" in str(e.value)
 44 |     assert "id" in str(e.value)
 45 |     assert '*' in [s.name for s in e.value.symbols_before]
 46 |     assert '+' in [t.value for t in e.value.tokens_ahead]
 47 |     expected_names = [s.name for s in e.value.symbols_expected]
 48 |     assert 'id' in expected_names
 49 |     assert '(' in expected_names
 50 | 
 51 | 
 52 | @parsers
 53 | def test_premature_end(parser_class):
 54 | 
 55 |     grammar = get_grammar()
 56 |     p = parser_class(grammar)
 57 | 
 58 |     with pytest.raises(ParseError) as e:
 59 |         p.parse("id+id*")
 60 | 
 61 |     assert e.value.location.start_position == 6
 62 |     expected_names = [s.name for s in e.value.symbols_expected]
 63 |     assert 'id' in expected_names
 64 |     assert '(' in expected_names
 65 |     assert '*' in [s.name for s in e.value.symbols_before]
 66 |     assert e.value.tokens_ahead == []
 67 | 
 68 | 
 69 | def test_ambiguous_glr():
 70 |     grammar = r"""
 71 |     E: E '+' E
 72 |      | E '*' E
 73 |      | number;
 74 | 
 75 |     terminals
 76 |     number: /\d+(\.\d+)?/;
 77 |     """
 78 |     g = Grammar.from_string(grammar)
 79 |     parser = GLRParser(g)
 80 | 
 81 |     with pytest.raises(ParseError) as e:
 82 |         parser.parse("1 + 2 * 3 / 5")
 83 | 
 84 |     assert e.value.location.start_position == 10
 85 |     assert 'number' in [s.name for s in e.value.symbols_before]
 86 | 
 87 | 
 88 | @parsers
 89 | def test_line_column(parser_class):
 90 |     grammar = get_grammar()
 91 |     p = parser_class(grammar)
 92 | 
 93 |     with pytest.raises(ParseError) as e:
 94 |         p.parse("""id + id * id + id + error * id""")
 95 | 
 96 |     loc = e.value.location
 97 |     assert loc.start_position == 20
 98 |     assert loc.line == 1
 99 |     assert loc.column == 20
100 | 
101 |     with pytest.raises(ParseError) as e:
102 |         p.parse("""id + id * id + id + error * id
103 | 
104 |         """)
105 |     loc = e.value.location
106 |     assert loc.start_position == 20
107 |     assert loc.line == 1
108 |     assert loc.column == 20
109 | 
110 |     with pytest.raises(ParseError) as e:
111 |         p.parse("""
112 | 
113 | id + id * id + id + error * id""")
114 |     loc = e.value.location
115 |     assert loc.start_position == 22
116 |     assert loc.line == 3
117 |     assert loc.column == 20
118 | 
119 |     with pytest.raises(ParseError) as e:
120 |         p.parse("""
121 | 
122 | id + id * id + id + error * id
123 | 
124 |         """)
125 |     loc = e.value.location
126 |     assert loc.start_position == 22
127 |     assert loc.line == 3
128 |     assert loc.column == 20
129 | 
130 | 
131 | @parsers
132 | def test_file_name(parser_class):
133 |     "Test that file name is given in the error string when parsing file."
134 |     grammar = get_grammar()
135 |     p = parser_class(grammar)
136 | 
137 |     input_file = os.path.join(os.path.dirname(__file__),
138 |                               'parsing_errors.txt')
139 | 
140 |     with pytest.raises(ParseError) as e:
141 |         p.parse_file(input_file)
142 | 
143 |     assert 'parsing_errors.txt' in str(e.value)
144 |     assert 'parsing_errors.txt' in e.value.location.file_name
145 | 


--------------------------------------------------------------------------------
/tests/func/parsing/test_to_dot.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | from parglare import Parser, GLRParser, Grammar
 3 | from ..grammar.expression_grammar import get_grammar
 4 | 
 5 | 
 6 | def test_to_dot():
 7 | 
 8 |     grammar = get_grammar()
 9 |     p = Parser(grammar, build_tree=True)
10 | 
11 |     res = p.parse("""id+  id * (id
12 |     +id  )
13 |     """)
14 | 
15 |     ts = res.to_dot()
16 | 
17 |     assert '[label="T[11-13]"];' in ts
18 |     assert '[label="+[2-3]"];' in ts
19 | 
20 | 
21 | def test_forest_to_dot():
22 | 
23 |     grammar = Grammar.from_string(r'''
24 |     E: E "+" E | E "-" E | "(" E ")" | "id";
25 |     ''')
26 |     p = GLRParser(grammar)
27 | 
28 |     forest = p.parse("""id+  id - (id
29 |     +id  )
30 |     """)
31 | 
32 |     ts = forest.to_dot()
33 | 
34 |     assert '[label="+[18-19]"];' in ts
35 |     assert '[label="E[5-7]"];' in ts
36 |     assert '[label="Amb(E[0-24],2)" shape=box];' in ts
37 | 


--------------------------------------------------------------------------------
/tests/func/parsing/test_to_str.py:
--------------------------------------------------------------------------------
  1 | import pytest  # noqa
  2 | from parglare import Parser, GLRParser, Grammar
  3 | from ..grammar.expression_grammar import get_grammar
  4 | 
  5 | 
  6 | def test_parse_tree_to_str():
  7 | 
  8 |     grammar = get_grammar()
  9 |     p = Parser(grammar, build_tree=True)
 10 | 
 11 |     res = p.parse("""id+  id * (id
 12 |     +id  )
 13 |     """)
 14 | 
 15 |     ts = res.to_str()
 16 | 
 17 |     assert '+[18->19, "+"]' in ts
 18 |     assert ')[23->24, ")"]' in ts
 19 |     assert 'F[10->24]' in ts
 20 | 
 21 | 
 22 | def test_forest_to_str():
 23 | 
 24 |     grammar = Grammar.from_string(r'''
 25 |     E: E "+" E | E "-" E | "(" E ")" | "id";
 26 |     ''')
 27 |     p = GLRParser(grammar)
 28 | 
 29 |     forest = p.parse("""id+  id - (id
 30 |     +id  )
 31 |     """)
 32 | 
 33 |     ts = forest.to_str()
 34 | 
 35 |     assert 'E - ambiguity[2]' in ts
 36 |     assert 'E[10->24]' in ts
 37 |     assert '      E[11->21]' in ts
 38 |     assert '        +[18->19, "+"]' in ts
 39 | 
 40 | 
 41 | def test_ast_to_str():
 42 |     """
 43 |     Test produced str tree from dynamically constructed AST object.
 44 |     """
 45 |     grammar = r"""
 46 |     S: "1" second=Second third=Third+ fourth=Fourth;
 47 |     Second: val="2";
 48 |     Third: "3";
 49 |     Fourth: "4" val=Second;
 50 |     """
 51 | 
 52 |     g = Grammar.from_string(grammar)
 53 |     parser = Parser(g)
 54 | 
 55 |     result = parser.parse('1 2 3 3 3 4 2')
 56 |     print(result.to_str())
 57 |     assert result.to_str().strip() == """
 58 | S [0->13]
 59 |   second=Second [2->3]
 60 |     val='2'
 61 |   third=    [
 62 |     '3'
 63 |     '3'
 64 |     '3'
 65 |     ]
 66 |   fourth=Fourth [10->13]
 67 |     val=Second [12->13]
 68 |       val='2'""".strip()
 69 | 
 70 | 
 71 | def test_ast_to_str_with_bnf_extensions():
 72 |     """
 73 |     Tests `to_str` with lists returned by BNF extensions.
 74 |     """
 75 |     grammar = r"""
 76 |     S: "1" second=Second third=Third+ fourth=Fourth;
 77 |     Second: val="2";
 78 |     Third: val="3";
 79 |     Fourth: "4" val=Second;
 80 |     """
 81 | 
 82 |     g = Grammar.from_string(grammar)
 83 |     parser = Parser(g)
 84 | 
 85 |     result = parser.parse('1 2 3 3 3 4 2')
 86 |     print(result.to_str())
 87 |     assert result.to_str().strip() == """
 88 | S [0->13]
 89 |   second=Second [2->3]
 90 |     val='2'
 91 |   third=    [
 92 |     Third [4->5]
 93 |       val='3'
 94 |     Third [6->7]
 95 |       val='3'
 96 |     Third [8->9]
 97 |       val='3'
 98 |     ]
 99 |   fourth=Fourth [10->13]
100 |     val=Second [12->13]
101 |       val='2'""".strip()
102 | 


--------------------------------------------------------------------------------
/tests/func/persistence/calc_with_actions/calc.pg:
--------------------------------------------------------------------------------
 1 | import 'variable.pg' as v;
 2 | 
 3 | Calc: Assignments E;
 4 | @collect_optional
 5 | Assignments: Assignments Assignment | Assignment | EMPTY;
 6 | Assignment: v.VariableName "=" Number;
 7 | 
 8 | E: E "+" E {left, 1}
 9 |  | E "-" E {left, 1}
10 |  | E "*" E {left, 2}
11 |  | E "/" E {left, 2}
12 |  | "(" E ")"
13 |  | v.VariableRef
14 |  | Number
15 | ;
16 | 
17 | terminals
18 | Number: /\d+(\.\d+)?/;
19 | 


--------------------------------------------------------------------------------
/tests/func/persistence/calc_with_actions/calc_actions.py:
--------------------------------------------------------------------------------
 1 | from parglare import get_collector
 2 | 
 3 | action = get_collector()
 4 | 
 5 | 
 6 | @action
 7 | def Calc(_, nodes):
 8 |     return nodes[-1]
 9 | 
10 | 
11 | @action
12 | def Assignment(context, nodes):
13 |     var_name, _, value = nodes
14 |     context.extra[var_name] = float(value)
15 | 
16 | 
17 | E = [
18 |     lambda _, n: n[0] + n[2],
19 |     lambda _, n: n[0] - n[2],
20 |     lambda _, n: n[0] * n[2],
21 |     lambda _, n: n[0] / n[2],
22 |     lambda _, n: n[1],
23 |     lambda context, n: context.extra[n[0]],
24 |     lambda _, n: float(n[0])
25 | ]
26 | action('E')(E)
27 | 


--------------------------------------------------------------------------------
/tests/func/persistence/calc_with_actions/test_table_persistance.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import os
 3 | import time
 4 | 
 5 | from parglare import Grammar, Parser
 6 | 
 7 | this_folder = os.path.dirname(__file__)
 8 | 
 9 | 
10 | def test_save_load_table():
11 |     """
12 |     Test basic table save/load cycle with table file creation.
13 |     """
14 |     calc_file = os.path.join(this_folder, 'calc.pg')
15 |     variable_file = os.path.join(this_folder, 'variable.pg')
16 |     input_str = 'a = 5   1 + 2 * a - 7'
17 |     input_str_result = 1 + 2 * 5 - 7
18 |     grammar = Grammar.from_file(calc_file)
19 | 
20 |     table_file = os.path.join(this_folder, 'calc.pgt')
21 |     # remove table file if exists
22 |     with contextlib.suppress(OSError):
23 |         os.remove(table_file)
24 | 
25 |     parser = Parser(grammar)
26 |     assert parser.parse(input_str) == input_str_result
27 | 
28 |     # Table file must be produced by parser construction.
29 |     assert os.path.exists(table_file)
30 | 
31 |     last_mtime = os.path.getmtime(table_file)
32 |     time.sleep(1)
33 | 
34 |     parser = Parser(grammar)
35 | 
36 |     # Last generated table should be used during parser construction.
37 |     # Currently, it is hard to check this so we'll only check if
38 |     # table_file is not regenerated.
39 |     assert last_mtime == os.path.getmtime(table_file)
40 |     # Parser constructed from persisted table should produce the same result.
41 |     assert parser.parse(input_str) == input_str_result
42 | 
43 |     # We are now touching variable.pg file
44 |     # This should trigger table file regeneration
45 |     with open(variable_file, 'a'):
46 |         os.utime(variable_file, None)
47 |     parser = Parser(grammar)
48 |     assert parser.parse(input_str) == input_str_result
49 |     # We verify that the table file is newer.
50 |     assert last_mtime < os.path.getmtime(table_file)
51 | 
52 |     # Now we test that force_load_table will load table even if not
53 |     # newer than the grammar.
54 |     time.sleep(1)
55 |     with open(variable_file, 'a'):
56 |         os.utime(variable_file, None)
57 |     last_mtime = os.path.getmtime(table_file)
58 |     parser = Parser(grammar, force_load_table=True)
59 |     assert last_mtime == os.path.getmtime(table_file)
60 |     parser = Parser(grammar)
61 |     assert last_mtime < os.path.getmtime(table_file)
62 | 


--------------------------------------------------------------------------------
/tests/func/persistence/calc_with_actions/variable.pg:
--------------------------------------------------------------------------------
1 | VariableRef: VariableName;
2 | 
3 | terminals
4 | VariableName: /[a-zA-Z_][_a-zA-Z0-9]*/;
5 | Number: /\d+(\.\d+)?/;
6 | 


--------------------------------------------------------------------------------
/tests/func/persistence/compare_table/base.pg:
--------------------------------------------------------------------------------
1 | terminals
2 | ID: /\w+/;
3 | FQN: /\w+(\.\w+)*/;
4 | COMMA: ',';
5 | 


--------------------------------------------------------------------------------
/tests/func/persistence/compare_table/components.pg:
--------------------------------------------------------------------------------
 1 | import 'base.pg';
 2 | 
 3 | Component:
 4 |   'component' name=base.ID extends=ComponentExtends? '{'
 5 |       slots=Slot*
 6 |   '}'
 7 | ;
 8 | 
 9 | ComponentExtends: 'extends' extends=base.FQN+[base.COMMA];
10 | 
11 | Slot: SlotIn|SlotOut;
12 | 
13 | SlotIn: 'in' name=base.ID;
14 | SlotOut: 'out' name=base.ID;
15 | 


--------------------------------------------------------------------------------
/tests/func/persistence/compare_table/model.pg:
--------------------------------------------------------------------------------
1 | import 'packages.pg';
2 | import 'modules.pg' as m;
3 | 
4 | Model:
5 |     packages=packages.Package*
6 |     modules=m.Module*
7 | ;
8 | 


--------------------------------------------------------------------------------
/tests/func/persistence/compare_table/modules.pg:
--------------------------------------------------------------------------------
1 | import 'components.pg' as c;
2 | import 'base.pg';
3 | 
4 | Module: 'module' name=base.ID '{'
5 |         components=c.Component*
6 | '}';
7 | 


--------------------------------------------------------------------------------
/tests/func/persistence/compare_table/packages.pg:
--------------------------------------------------------------------------------
 1 | import 'components.pg';
 2 | import 'base.pg';
 3 | 
 4 | Package:
 5 |     'package' name=base.ID body=PackageBody?
 6 | ;
 7 | 
 8 | PackageBody:
 9 |     '{'
10 |               components=components.Component*
11 |     '}'
12 | ;
13 | 


--------------------------------------------------------------------------------
/tests/func/persistence/compare_table/test_compare_table.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import filecmp
 3 | import os
 4 | 
 5 | from parglare import Grammar, Parser
 6 | 
 7 | this_folder = os.path.dirname(__file__)
 8 | 
 9 | input_str = '''
10 | 
11 | package First
12 | package Second {
13 |     component packageComponent {
14 | 
15 |     }
16 | }
17 | 
18 | module SomeModule {
19 | 
20 |     component myComponent {
21 |         in SomeInputSlot
22 |         out SomeOutputSlot
23 |     }
24 | 
25 | 
26 | }
27 | 
28 | '''
29 | 
30 | 
31 | def test_diamond_import_resolving_and_model_creation():
32 | 
33 |     grammar_file = os.path.join(this_folder, 'model.pg')
34 |     table_file = os.path.join(this_folder, 'model.pgt')
35 |     table_cmp_file = os.path.join(this_folder, 'model_compare.pgt')
36 | 
37 |     g = Grammar.from_file(grammar_file)
38 |     with contextlib.suppress(Exception):
39 |         os.remove(table_file)
40 | 
41 |     parser = Parser(g)
42 | 
43 |     # Check generated table file.
44 |     assert filecmp.cmp(table_file, table_cmp_file, shallow=False)
45 | 
46 |     # Check that parser loaded from the table will correctly parse
47 |     parser = Parser(g, force_load_table=True)
48 | 
49 |     model = parser.parse(input_str)
50 |     assert model
51 |     assert model.__class__.__name__ == 'Model'
52 |     assert isinstance(model.packages, list)
53 |     assert len(model.packages) == 2
54 |     assert model.packages[0].name == 'First'
55 |     assert isinstance(model.modules, list)
56 |     assert len(model.modules) == 1
57 | 
58 |     packageComponent = model.packages[1].body.components[0]
59 |     assert packageComponent.name == 'packageComponent'
60 | 
61 |     module = model.modules[0]
62 |     assert module.__class__.__name__ == 'm.Module'
63 |     assert module.name == 'SomeModule'
64 |     assert len(module.components) == 1
65 | 
66 |     component = module.components[0]
67 |     assert type(component) is type(packageComponent)
68 |     assert component.name == 'myComponent'
69 |     assert len(component.slots) == 2
70 | 
71 |     slot = component.slots[1]
72 |     assert slot.__class__.__name__ == 'packages.components.SlotOut'
73 |     assert slot.name == 'SomeOutputSlot'
74 | 


--------------------------------------------------------------------------------
/tests/func/pglr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/pglr/__init__.py


--------------------------------------------------------------------------------
/tests/func/pglr/grammar.pg:
--------------------------------------------------------------------------------
1 | A: B C D;
2 | B: C D;
3 | 
4 | // Terminal rules without recognizers
5 | terminals
6 | C:;
7 | D:;
8 | 


--------------------------------------------------------------------------------
/tests/func/pglr/test_pglr.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | import subprocess
 3 | import os
 4 | import contextlib
 5 | 
 6 | CURRENT_DIR = os.path.abspath(os.path.dirname(__file__))
 7 | GRAMMAR_FILE = os.path.join(CURRENT_DIR, 'grammar.pg')
 8 | 
 9 | 
10 | @pytest.mark.skipif("TRAVIS" in os.environ
11 |                     and os.environ["TRAVIS"] == "true",
12 |                     reason="Test fails under TRAVIS")
13 | def test_pglr_check():
14 |     """
15 |     Test pglr command for grammar checking.
16 |     """
17 |     result = subprocess.call(['pglr', 'compile', GRAMMAR_FILE])
18 |     assert result == 0
19 | 
20 | 
21 | @pytest.mark.skipif("TRAVIS" in os.environ
22 |                     and os.environ["TRAVIS"] == "true",
23 |                     reason="Test fails under TRAVIS")
24 | def test_pglr_viz():
25 |     """
26 |     Test pglr command for PDA visualization.
27 |     """
28 |     DOT_FILE = os.path.join(CURRENT_DIR, f'{GRAMMAR_FILE}.dot')
29 |     with contextlib.suppress(Exception):
30 |         os.remove(DOT_FILE)
31 |     assert not os.path.exists(DOT_FILE)
32 |     result = subprocess.call(['pglr', '--no-colors', 'viz', GRAMMAR_FILE])
33 |     assert result == 0
34 |     assert os.path.exists(DOT_FILE)
35 |     with open(DOT_FILE) as f:
36 |         assert 'digraph grammar' in f.read()
37 | 


--------------------------------------------------------------------------------
/tests/func/recognizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/igordejanovic/parglare/982de3f5cf657a2c49874c04aaa2b4fb605a7e53/tests/func/recognizers/__init__.py


--------------------------------------------------------------------------------
/tests/func/recognizers/collector/grammar.pg:
--------------------------------------------------------------------------------
1 | Model: INT+ Rule1 INT;
2 | Rule1: a=STRING;
3 | 
4 | terminals
5 | INT:;
6 | STRING:;
7 | 


--------------------------------------------------------------------------------
/tests/func/recognizers/collector/test_recognizers_get_collector.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | 
 4 | import pytest
 5 | 
 6 | from parglare import Grammar, GrammarError, Parser, get_collector
 7 | 
 8 | THIS_FOLDER = os.path.abspath(os.path.dirname(__file__))
 9 | 
10 | 
11 | def test_recognizer_explicit_get_collector():
12 |     """
13 |     Test the basic usage of `get_collector` API where we don't provide
14 |     recognizers in a separate python module.
15 |     """
16 | 
17 |     recognizer = get_collector()
18 | 
19 |     @recognizer
20 |     def INT(input, pos):
21 |         return re.compile(r'\d+').match(input[pos:])
22 | 
23 |     @recognizer
24 |     def STRING(input, pos):
25 |         return re.compile(r'\d+').match(input[pos:])
26 | 
27 |     grammar = Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg'),
28 |                                 recognizers=recognizer.all)
29 |     parser = Parser(grammar)
30 |     assert parser
31 | 
32 | 
33 | def test_recognizer_explicit_get_collector_missing_recognizer():
34 |     """
35 |     Test when `get_collector` has a terminal without defined recognizer an
36 |     exception is raised.
37 |     """
38 | 
39 |     recognizer = get_collector()
40 | 
41 |     @recognizer
42 |     def INT(input, pos):
43 |         return re.compile(r'\d+').match(input[pos:])
44 | 
45 |     with pytest.raises(GrammarError,
46 |                        match=r'Terminal "STRING" has no recognizer defined.'):
47 |         Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg'),
48 |                           recognizers=recognizer.all)
49 | 
50 | 
51 | def test_recognizer_explicit_get_collector_recognizer_for_unexisting_terminal():  # noqa
52 |     """
53 |     Test for situation when `get_collector` has a recognizer for un-existing
54 |     terminal.
55 |     """
56 | 
57 |     recognizer = get_collector()
58 | 
59 |     @recognizer
60 |     def INT(input, pos):
61 |         return re.compile(r'\d+').match(input[pos:])
62 | 
63 |     @recognizer
64 |     def STRING(input, pos):
65 |         return re.compile(r'\d+').match(input[pos:])
66 | 
67 |     @recognizer
68 |     def STRING2(input, pos):
69 |         return re.compile(r'\d+').match(input[pos:])
70 | 
71 |     grammar = Grammar.from_file(os.path.join(THIS_FOLDER, 'grammar.pg'),
72 |                                 recognizers=recognizer.all)
73 |     parser = Parser(grammar)
74 |     assert parser
75 | 


--------------------------------------------------------------------------------
/tests/func/recognizers/test_recognizer_context.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | from parglare import Grammar, Parser
 3 | import re
 4 | 
 5 | 
 6 | def test_recognizer_context():
 7 |     grammar = """
 8 |     program: expression+[semicolon];
 9 |     expression: term+;
10 | 
11 |     terminals
12 |     semicolon: ";";
13 |     term:;
14 |     """
15 | 
16 |     term_re = re.compile(r"[a-zA-Z_]+")
17 | 
18 |     def term(context, input, pos):
19 |         match = term_re.match(input, pos)
20 |         if match is None:
21 |             return None
22 |         return input[pos:match.end()]
23 | 
24 |     g = Grammar.from_string(grammar, recognizers={'term': term})
25 |     parser = Parser(g)
26 |     assert parser.parse("a bb cc; d ee f; g hh i")
27 | 


--------------------------------------------------------------------------------
/tests/func/regressions/issue38/names.pg:
--------------------------------------------------------------------------------
1 | LINE: FIO|SYMBOL;
2 | 
3 | terminals
4 | FIO: /МИША|САША/ {15};
5 | SYMBOL: /\w+/;
6 | 


--------------------------------------------------------------------------------
/tests/func/regressions/issue38/test_issue_38_unicode_py2.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from parglare import Grammar, Parser
 4 | 
 5 | 
 6 | def test_grammar_with_unicode():
 7 |     this_folder = os.path.dirname(__file__)
 8 |     grammar = Grammar.from_file(os.path.join(this_folder, "names.pg"))
 9 |     parser = Parser(grammar, consume_input=False)
10 |     inp = 'МИША МЫЛ РАМУ'
11 |     result = parser.parse(inp)
12 |     assert result
13 | 


--------------------------------------------------------------------------------
/tests/func/regressions/issue52/grammar.pg:
--------------------------------------------------------------------------------
 1 | PROG: EXPR+;
 2 | 
 3 | ///// SIMPLE EXPRESSIONS /////
 4 | 
 5 | EXPR: BUILTIN '(' EXPR* ')' {800}
 6 |     | 'if' '(' EXPR_PAIR+ EXPR ')' {800}
 7 |     | 'switch' '(' EXPR EXPR_PAIR+ EXPR ')' {800}
 8 |     | '[' EXPR 'for' KVP+ ']'
 9 |     | OOO_PAREN EXPR ')'
10 |     | EXPR F_PAREN EXPR+ KVP* ')' {left, 700}
11 |     | EXPR DOT POST_DOT {left, 700}
12 |     | EXPR '%' EXPR {left, 600}
13 |     | EXPR '^' EXPR {left, 600}
14 |     | EXPR '+' EXPR {left, 500}
15 |     | EXPR '/' EXPR {left, 500}
16 |     | EXPR '\\' EXPR {left, 500}
17 |     // logical operators on string expressions
18 |     // lower precedence than string-string operators,
19 |     // higher than pure logic operators
20 |     | EXPR '==' EXPR {left, 450}
21 |     | EXPR '=~' EXPR {left, 450}
22 |     // logical operators, which can't really be chained with string operators
23 |     // (even if a nonzero-length string can evaluate to "true"):
24 |     // !a.b.c+e.f makes no sense as !(a.b.c)+e.f -> (false)+e.f
25 |     // so they will have lower precedence
26 |     | '!' EXPR {right, 400}
27 |     | EXPR '&&' EXPR {left, 300}
28 |     | EXPR '||' EXPR {left, 250}
29 |     | LITERAL {200}
30 |     | VAR {100};
31 | 
32 | POST_DOT: MODIFIER '(' EXPR* ')' {1000}
33 |         | VAR {100};
34 | 
35 | ////// COMPOUND EXPRESSIONS //////
36 | 
37 | KVP: VAR '=' EXPR;
38 | EXPR_PAIR: EXPR ':' EXPR;
39 | 
40 | ///// MODIFIERS /////
41 | 
42 | MODIFIER: 'strip' | 'lower' | 'pre' | 'post' | 'regex' | 'wrap';
43 | 
44 | ///// BUILTINS /////
45 | 
46 | BUILTIN: 'chmod' | 'dynamic' | 'table' | 'sha256' | 'cat';
47 | 
48 | ///// LITERALS /////
49 | 
50 | LITERAL: FUNC | DICT | LIST | STRING | INT | BOOL | PUKE | NULL;
51 | FUNC: '{' VAR+ KVP* '->' EXPR '}';
52 | DICT: '<' KVP* '>';
53 | BOOL: 'true' | 'false';
54 | STRING: DQSTRING | SQSTRING;
55 | LIST: '[' LIST_ELEM* ']';
56 | LIST_ELEM: EXPR | EXPAND | SKIP;
57 | EXPAND: '*' EXPR;
58 | 
59 | ///// TERMINALS /////
60 | 
61 | terminals
62 | // F_PAREN is a paren that must be a function call
63 | F_PAREN: /(?<=[a-zA-Z_0-9)}])\(/;
64 | // OOO_PAREN is a parent that must not be a function call
65 | OOO_PAREN: /(?<![a-zA-Z_0-9)}])\(/;
66 | VAR: /[a-zA-Z_][a-zA-Z_0-9]*/;
67 | INT: /\d+/;
68 | DOT: '.';
69 | PUKE: 'puke';
70 | NULL: 'null';
71 | SKIP: 'skip';
72 | DQSTRING: /"[^"\\]*(?:\\.[^"\\]*)*"/;
73 | SQSTRING: /'[^'\\]*(?:\\.[^'\\]*)*'/;
74 | 


--------------------------------------------------------------------------------
/tests/func/regressions/issue52/test_issue52_table_caching.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import os
 3 | 
 4 | from parglare import Grammar, Parser
 5 | 
 6 | 
 7 | def test_table_from_cache_different_than_calculated():
 8 |     this_folder = os.path.dirname(__file__)
 9 |     grammar_file = os.path.join(this_folder, 'grammar.pg')
10 |     table_file = os.path.join(this_folder, 'grammar.pgt')
11 |     with contextlib.suppress(Exception):
12 |         os.remove(table_file)
13 | 
14 |     g = Grammar.from_file(grammar_file)
15 | 
16 |     p = Parser(g)
17 |     without_cache = p.parse('dynamic("OS")')
18 | 
19 |     p = Parser(g)
20 |     with_cache = p.parse('dynamic("OS")')
21 | 
22 |     assert without_cache == with_cache
23 | 


--------------------------------------------------------------------------------
/tests/func/regressions/issue97/first.pg:
--------------------------------------------------------------------------------
1 | import 'second.pg' as s;
2 | 
3 | S: A s.E*;
4 | A: s.E+;
5 | 


--------------------------------------------------------------------------------
/tests/func/regressions/issue97/second.pg:
--------------------------------------------------------------------------------
1 | A: E+;
2 | 
3 | terminals
4 | E: 'e';
5 | 


--------------------------------------------------------------------------------
/tests/func/regressions/issue97/test_issue97.py:
--------------------------------------------------------------------------------
 1 | from os.path import dirname, join
 2 | 
 3 | from parglare import GLRParser, Grammar
 4 | 
 5 | 
 6 | def test_import_plus_raises_keyerror():
 7 | 
 8 |     this_folder = dirname(__file__)
 9 |     grammar = Grammar.from_file(join(this_folder, 'first.pg'))
10 |     GLRParser(grammar)
11 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_glr_list_building_bug.py:
--------------------------------------------------------------------------------
 1 | from parglare import GLRParser, Grammar
 2 | 
 3 | 
 4 | def test_glr_list_building_bug():
 5 |     """Test regression for a bug in building lists from default `collect` actions.
 6 | 
 7 |     """
 8 |     grammar = r"""
 9 |         S: B+;
10 |         B: "b"? A+;
11 |         A: "a";
12 |     """
13 |     g = Grammar.from_string(grammar)
14 |     parser = GLRParser(g, prefer_shifts=True)
15 |     result = parser.parse('b a b a a a')
16 |     assert len(result) == 1
17 |     assert parser.call_actions(result[0]) == [['b', ['a']], ['b', ['a', 'a', 'a']]]
18 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_issue147.py:
--------------------------------------------------------------------------------
 1 | from parglare import GLRParser, Grammar
 2 | 
 3 | 
 4 | def test_tostr_failure_with_ambiguous_grammar():
 5 |     '''
 6 |     Test by LVrecar@GitHub
 7 |     See: https://github.com/igordejanovic/parglare/issues/147
 8 |     '''
 9 | 
10 |     grammar = r'''
11 |     term: var | app;
12 |     app: term " "? term;
13 | 
14 |     terminals
15 |     var: /[a-z]+?/;
16 |     '''
17 | 
18 |     g = Grammar.from_string(grammar, ignore_case=True)
19 |     parser = GLRParser(g)
20 | 
21 |     result = parser.parse("x y z w")
22 |     for _, tree in enumerate(result):
23 |         print(tree.to_str())
24 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_issue31_glr_drop_parses_on_lexical_ambiguity.py:
--------------------------------------------------------------------------------
 1 | from parglare import GLRParser, Grammar
 2 | 
 3 | 
 4 | def test_issue31_glr_drop_parses_on_lexical_ambiguity():
 5 |     grammar = """
 6 |     model: element+;
 7 |     element: title
 8 |            | table_with_note
 9 |            | table_with_title;
10 |     table_with_title: table_title table_with_note;
11 |     table_with_note: table note*;
12 | 
13 |     terminals
14 |     title: /title/;   // <-- This is lexically ambiguous with the next.
15 |     table_title: /title/;
16 |     table: "table";
17 |     note: "note";
18 |     """
19 | 
20 |     # this input should yield 4 parse trees.
21 |     input = "title table title table"
22 | 
23 |     g = Grammar.from_string(grammar)
24 |     parser = GLRParser(g, debug=True, debug_colors=True)
25 |     results = parser.parse(input)
26 | 
27 |     # We should have 4 solutions for the input.
28 |     assert len(results) == 4
29 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_issue32.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from parglare import Grammar, GrammarError
 4 | 
 5 | 
 6 | def test_conflict_between_string_match_and_rule_with_same_name():
 7 | 
 8 |     with pytest.raises(GrammarError, match=r'.*already defined.*'):
 9 |         Grammar.from_string('''
10 |             Terminals:
11 |                 "Terminals" ":" terminal_list=Terminal* ";"
12 |             ;
13 |         ''')
14 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_issue64.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from parglare import Grammar, Parser
 4 | 
 5 | 
 6 | def test_issue64():
 7 |     g = Grammar.from_string(
 8 |         'T: NL* L+ NL* | NL*; terminals L: /.+/; NL: "\n";',
 9 |         re_flags=re.MULTILINE)
10 |     Parser(g, consume_input=False, ws=None).parse('\nL1\nL2\n\n')
11 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_issue73.py:
--------------------------------------------------------------------------------
 1 | # Tests for issue #73 (see https://github.com/igordejanovic/parglare/issues/73)
 2 | from parglare import Grammar, Parser
 3 | 
 4 | 
 5 | def test_recursive_rule():
 6 |     grammar = r"""
 7 |     s: as;
 8 |     as: as "a" | EMPTY;
 9 |     """
10 | 
11 |     g = Grammar.from_string(grammar)
12 |     parser = Parser(g)
13 |     parser.parse('aaa')
14 | 
15 | 
16 | def test_recursive_rule_other_way():
17 |     grammar = r"""
18 |     s: as;
19 |     as: EMPTY | as "a";
20 |     """
21 | 
22 |     g = Grammar.from_string(grammar)
23 |     parser = Parser(g)
24 |     parser.parse('aaa')
25 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_issue_110_wrong_positions_with_glr_and_empty.py:
--------------------------------------------------------------------------------
 1 | from parglare import GLRParser, Grammar, Parser
 2 | 
 3 | grammar = r"""
 4 | S: A A B;
 5 | A: letter;
 6 | B: letter | EMPTY;
 7 | 
 8 | terminals
 9 | letter: /\w/;
10 | """
11 | 
12 | expression = "ab"
13 | 
14 | 
15 | def test_positions():
16 |     """
17 |     See https://github.com/igordejanovic/parglare/issues/110
18 |     """
19 |     g = Grammar.from_string(grammar)
20 |     parser = Parser(g, build_tree=True)
21 |     result = parser.parse(expression)
22 | 
23 |     assert result.start_position == 0
24 |     assert result.end_position == 2
25 | 
26 | 
27 | def test_positions_glr():
28 |     """
29 |     See https://github.com/igordejanovic/parglare/issues/110
30 |     """
31 |     g = Grammar.from_string(grammar)
32 |     parser = GLRParser(g, build_tree=True)
33 |     result = parser.parse(expression)
34 | 
35 |     assert result[0].start_position == 0
36 |     assert result[0].end_position == 2
37 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_issue_22_wrong_associativity_behaviour.py:
--------------------------------------------------------------------------------
  1 | from parglare import Grammar, Parser, get_collector
  2 | 
  3 | grammar_1 = r'''
  4 | STMT : STMT ADDOP STMT {left, 1}
  5 |      | STMT MULOP STMT {left, 1}
  6 |      | "(" STMT ")" | NUMBER;
  7 | ADDOP : "+" {1}
  8 |       | "-" {1};
  9 | MULOP : "*" {2}
 10 |       | "/" {2};
 11 | 
 12 | terminals
 13 | NUMBER: /\d+(.\d+)?/;
 14 | '''
 15 | 
 16 | grammar_2 = r'''
 17 | STMT : STMT "+" STMT {left, 1}
 18 |      | STMT "-" STMT {left, 1}
 19 |      | STMT "*" STMT {left, 2}
 20 |      | STMT "/" STMT {left, 2}
 21 |      | "(" STMT ")" | NUMBER;
 22 | 
 23 | terminals
 24 | NUMBER: /\d+(.\d+)?/;
 25 | '''
 26 | 
 27 | # Grammar could be simplified a bit
 28 | # with https://github.com/igordejanovic/parglare/issues/17
 29 | grammar_3 = r'''
 30 | STMT {left}: STMT ADDOP STMT {1}
 31 |            | STMT MULOP STMT {2};
 32 | STMT: "(" STMT ")" | NUMBER;
 33 | ADDOP {1}: "+" | "-";
 34 | MULOP {2}: "*" | "/";
 35 | 
 36 | terminals
 37 | NUMBER: /\d+(.\d+)?/;
 38 | '''
 39 | 
 40 | expression = '1 - 2 / (3 - 1 + 5 / 6 - 8 + 8 * 2 - 5)'
 41 | result = 1 - 2 / (3 - 1 + 5 / 6 - 8 + 8 * 2 - 5)
 42 | 
 43 | action = get_collector()
 44 | 
 45 | 
 46 | @action
 47 | def NUMBER(_, value):
 48 |     return int(value)
 49 | 
 50 | 
 51 | @action
 52 | def STMT(_, nodes):
 53 |     if len(nodes) == 1:
 54 |         return nodes[0]
 55 |     elif nodes[0] == '(':
 56 |         return nodes[1]
 57 |     else:
 58 |         left, op, right = nodes
 59 | 
 60 |     if op == '+':
 61 |         return left + right
 62 |     elif op == '-':
 63 |         return left - right
 64 |     elif op == '*':
 65 |         return left * right
 66 |     else:
 67 |         return left / right
 68 | 
 69 | 
 70 | def test_associativity_variant_1():
 71 |     """
 72 |     See https://github.com/igordejanovic/parglare/issues/22
 73 |     """
 74 |     g = Grammar.from_string(grammar_1)
 75 |     parser = Parser(g, actions=action.all)
 76 |     r = parser.parse(expression)
 77 | 
 78 |     assert r == result
 79 | 
 80 | 
 81 | def test_associativity_variant_2():
 82 |     """
 83 |     See https://github.com/igordejanovic/parglare/issues/22
 84 |     """
 85 |     g = Grammar.from_string(grammar_2)
 86 |     parser = Parser(g, actions=action.all)
 87 |     r = parser.parse(expression)
 88 | 
 89 |     assert r == result
 90 | 
 91 | 
 92 | def test_associativity_variant_3():
 93 |     """
 94 |     See https://github.com/igordejanovic/parglare/issues/22
 95 |     Using https://github.com/igordejanovic/parglare/issues/17
 96 |     """
 97 |     g = Grammar.from_string(grammar_3)
 98 |     parser = Parser(g, actions=action.all)
 99 |     r = parser.parse(expression)
100 | 
101 |     assert r == result
102 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_issue_23_multiple_rules_with_assignments.py:
--------------------------------------------------------------------------------
 1 | from parglare import Grammar, Parser
 2 | 
 3 | 
 4 | def test_multiple_rules_with_assignments():
 5 |     '''
 6 |     See https://github.com/igordejanovic/parglare/issues/23
 7 |     '''
 8 | 
 9 |     text = "a"
10 | 
11 |     grammar_str = """\
12 |     A : "a" ;
13 |     A : "b" ;
14 |     """
15 | 
16 |     grammar = Grammar.from_string(grammar_str)
17 |     parser = Parser(grammar)
18 |     result = parser.parse(text)
19 |     assert result == 'a'
20 | 
21 |     grammar_str = """\
22 |     A : t="a" | t="b" ;
23 |     """
24 | 
25 |     grammar = Grammar.from_string(grammar_str)
26 |     parser = Parser(grammar)
27 |     result = parser.parse(text)
28 |     assert type(result).__name__ == 'A'
29 |     assert result.t == 'a'
30 | 
31 |     # Must be equvalent with the previous
32 |     grammar_str = """\
33 |     A : t="a" ;
34 |     A : t="b" ;
35 |     """
36 | 
37 |     grammar = Grammar.from_string(grammar_str)
38 |     parser = Parser(grammar)
39 |     result = parser.parse(text)
40 |     assert type(result).__name__ == 'A'
41 |     assert result.t == 'a'
42 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_issue_54_collector_unicode.py:
--------------------------------------------------------------------------------
 1 | from parglare import get_collector
 2 | 
 3 | 
 4 | def test_collector_can_use_unicode_in_python_2():
 5 | 
 6 |     action = get_collector()
 7 | 
 8 |     def f(context, node):
 9 |         return node
10 | 
11 |     action('f_action')(f)
12 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_recognizer_nonexisting_terminal/grammar.pg:
--------------------------------------------------------------------------------
1 | S: A;
2 | 
3 | terminals
4 | A: 'a';
5 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_recognizer_nonexisting_terminal/grammar_recognizers.py:
--------------------------------------------------------------------------------
 1 | from parglare import get_collector
 2 | 
 3 | recognizer = get_collector()
 4 | 
 5 | 
 6 | @recognizer
 7 | def A(input, pos):
 8 |     return [input[pos]]
 9 | 
10 | 
11 | # This should raise an exception as there is no `B` terminal in the grammar
12 | @recognizer
13 | def B(input, pos):
14 |     return [input[pos]]
15 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_recognizer_nonexisting_terminal/test_recognizer_nonexisting_terminal.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | from os.path import join, dirname
 3 | from parglare import Grammar, GrammarError
 4 | 
 5 | 
 6 | def test_recognizer_for_unexisting_terminal_raises_exception():
 7 |     """
 8 |     If a recognizer is given for a terminal that can't be found in the grammar
 9 |     raise an exception.
10 |     """
11 | 
12 |     with pytest.raises(GrammarError,
13 |                        match=r'.*given for unknown terminal "B".'):
14 |         Grammar.from_file(join(dirname(__file__), 'grammar.pg'))
15 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_regex_alternative_match_bug.py:
--------------------------------------------------------------------------------
 1 | from parglare import GLRParser, Grammar
 2 | 
 3 | 
 4 | def test_regex_alternative_match_bug():
 5 |     """
 6 |     """
 7 | 
 8 |     grammar = """
 9 |     A: "Begin" Eq "End";
10 | 
11 |     terminals
12 |     Eq: /=|EQ/;
13 |     """
14 |     g = Grammar.from_string(grammar)
15 |     parser = GLRParser(g)
16 |     parser.parse('Begin EQ End')
17 | 


--------------------------------------------------------------------------------
/tests/func/regressions/test_terminal_exists_noerror_on_terminal_definition_before.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from parglare import Grammar, GrammarError
 4 | 
 5 | 
 6 | def test_terminal_exists_noerror_on_terminal_definition_before():
 7 |     """
 8 |     Test the situation where we have inline terminal used but the
 9 |     same terminal is defined in the `terminals` section.
10 |     """
11 |     grammar = """
12 |     Program: "begin"
13 |              statements=Statement*
14 |              ProgramEnd;
15 |     Statement: "end" "transaction"  // here we are using inline terminal `end`
16 |                                     // instead of using a name reference
17 |              | "command";
18 | 
19 |     terminals
20 |     ProgramEnd: "end";
21 |     """
22 | 
23 |     with pytest.raises(GrammarError) as e:
24 |         Grammar.from_string(grammar)
25 | 
26 |     assert 'match the same string' in str(e.value)
27 | 


--------------------------------------------------------------------------------
/tests/func/test_common_errors.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | from parglare import Grammar, Parser
 3 | from parglare.exceptions import GrammarError
 4 | 
 5 | 
 6 | def test_infinite_recursions():
 7 |     """
 8 |     If rule have no recursion termination alternative as for example:
 9 | 
10 |     Elements: Elements Element;
11 | 
12 |     instead of:
13 |     Elements: Elements Element | Element;
14 | 
15 |     first set of "Elements" will be empty. GrammarError will be produced during
16 |     parser construction.
17 |     """
18 | 
19 |     grammar = """
20 |     Elements: Elements Element;
21 |     Element: "a" | "b";
22 |     """
23 | 
24 |     g = Grammar.from_string(grammar)
25 | 
26 |     with pytest.raises(GrammarError) as e:
27 |         Parser(g)
28 | 
29 |     assert 'First set empty for grammar symbol "Elements"' in str(e.value)
30 |     assert 'infinite recursion' in str(e.value)
31 | 
32 | 
33 | def test_terminals_with_different_names():
34 |     """Test that all terminals with same string match have the same name.
35 |     """
36 | 
37 |     # In this grammar we have 'd' terminal in S production and B terminal with
38 |     # the same 'd' recognizer.
39 |     grammar = """
40 |     S: 'a' A 'd' | 'b' A B;
41 |     A: 'c' A | 'c';
42 |     terminals
43 |     B: 'd';
44 |     """
45 | 
46 |     with pytest.raises(GrammarError) as e:
47 |         Grammar.from_string(grammar)
48 | 
49 |     assert 'B' in str(e.value)
50 |     assert 'd' in str(e.value)
51 |     assert 'match the same string' in str(e.value)
52 | 
53 | 
54 | def todo_test_grammar_without_valid_inputs():
55 |     """
56 |     TODO: There is no valid input for this grammar. This should be detected by
57 |     the parser.
58 |    """
59 |     grammar = """
60 |     S: A | B;
61 |     A: '1' S '1';
62 |     B: '0' S '0';
63 |     """
64 | 
65 |     g = Grammar.from_string(grammar)
66 |     p = Parser(g)
67 |     p.parse('0101000110001010')
68 | 


--------------------------------------------------------------------------------
/tests/func/test_examples.py:
--------------------------------------------------------------------------------
 1 | import pytest   # noqa
 2 | import os
 3 | import sys
 4 | import glob
 5 | import importlib
 6 | from itertools import chain
 7 | 
 8 | 
 9 | skip_examples = [
10 |     'molecular_formulas',
11 |     'custom_table_caching',
12 |     'c/'
13 | ]
14 | 
15 | examples_pat = os.path.join(os.path.abspath(os.path.dirname(__file__)),
16 |                             '../../examples/*/*.py')
17 | examples = [f for f in glob.glob(examples_pat)
18 |             if not any([x in f
19 |                         for x in chain(['__init__.py'],
20 |                                        (f'examples/{example}'
21 |                                         for example in skip_examples))])]
22 | 
23 | 
24 | @pytest.mark.parametrize("example", examples)
25 | def test_examples(example):
26 |     example_dir = os.path.dirname(example)
27 |     sys.path.insert(0, example_dir)
28 |     (module_name, _) = os.path.splitext(os.path.basename(example))
29 |     m = importlib.import_module(module_name)
30 | 
31 |     if hasattr(m, 'main'):
32 |         m.main(debug=False)
33 | 


--------------------------------------------------------------------------------
/tests/func/test_export.py:
--------------------------------------------------------------------------------
 1 | import pytest  # noqa
 2 | import os
 3 | import tempfile
 4 | from parglare import Grammar
 5 | from parglare.tables import create_table
 6 | from parglare.export import grammar_pda_export
 7 | 
 8 | 
 9 | def test_dot_export():
10 |     grammar = 'S: S S | S S S | "b";'
11 |     g = Grammar.from_string(grammar)
12 | 
13 |     table = create_table(g)
14 | 
15 |     tmp_dir = tempfile.mkdtemp()
16 |     file_name = os.path.join(tmp_dir, 'testexport.dot')
17 | 
18 |     grammar_pda_export(table, file_name)
19 | 
20 |     with open(file_name) as f:
21 |         assert 'label' in f.read()
22 | 
23 |     os.remove(file_name)
24 |     os.rmdir(tmp_dir)
25 | 


--------------------------------------------------------------------------------
/tests/perf/profile.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # pip install pyprof2calltree
3 | 
4 | rm glr.pstats
5 | python -m cProfile -o cpu.pstats -s time test_cpu.py
6 | pyprof2calltree -k -i cpu.pstats
7 | 


--------------------------------------------------------------------------------
/tests/perf/report_grammar.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from os.path import dirname, join
 3 | from parglare import Grammar, GLRParser
 4 | from tests import TESTS
 5 | 
 6 | 
 7 | class Result:
 8 |     def __init__(self, name, productions, nonterminals, lr_states):
 9 |         self.name = name
10 |         self.productions = productions
11 |         self.nonterminals = nonterminals
12 |         self.lr_states = lr_states
13 | 
14 | 
15 | def grammar_sizes():
16 |     results = []
17 |     for test_idx, test in enumerate(TESTS):
18 |         test_root = join(dirname(__file__), f'test{test_idx+1}')
19 |         g = Grammar.from_file(join(test_root, 'g.pg'))
20 |         parser = GLRParser(g)
21 |         productions = len(g.productions)
22 |         nonterminals = len(g.nonterminals)
23 |         states = len(parser.table.states)
24 | 
25 |         results.append(Result(test.name, productions, nonterminals, states))
26 | 
27 |     with open(join(dirname(__file__), 'reports', 'grammar-sizes.txt'), 'w') as f:
28 |         f.write('|  Grammar  | productions | non-terminals | LALR automaton size |\n')
29 |         for result in results:
30 |             sizes_str = f'{result.productions:^13,d}|'\
31 |                         f'{result.nonterminals:^15,d}|{result.lr_states:^21,d}'
32 |             title = f'{result.name:^11s}'
33 |             f.write(f'|{title}|{sizes_str}|\n')
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     grammar_sizes()
38 | 


--------------------------------------------------------------------------------
/tests/perf/reports/cpu-report.txt:
--------------------------------------------------------------------------------
 1 | |               |    I1   |    I2   |    I3   |    I4   |    I5   |    I6   |
 2 | |JSON sizes     | 12,447  | 24,892  | 49,782  | 99,562  | 199,122 | 398,243 |
 3 | |JSON LR        | 746,728 | 770,605 | 766,252 | 771,167 | 761,376 | 759,707 |
 4 | |JSON GLR       | 413,838 | 412,225 | 411,039 | 408,479 | 408,531 | 401,523 |
 5 | |JSON ambig     |    0    |    0    |    0    |    0    |    0    |    0    |
 6 | |BibTeX sizes   |  1,727  |  3,455  |  6,911  | 13,823  | 27,647  | 55,295  |
 7 | |BibTeX LR      | 456,845 | 525,634 | 530,546 | 517,580 | 515,125 | 512,579 |
 8 | |BibTeX GLR     | 302,313 | 300,401 | 299,262 | 292,926 | 290,300 | 288,395 |
 9 | |BibTeX ambig   |    0    |    0    |    0    |    0    |    0    |    0    |
10 | |Java  sizes    |  2,189  |  8,018  | 15,471  | 27,600  | 52,802  | 70,084  |
11 | |Java GLR       | 51,994  | 41,574  | 36,845  | 53,070  | 55,404  | 38,258  |
12 | |Java ambig     |   53    |   239   |   366   |   629   |  1,065  |  1,987  |
13 | 


--------------------------------------------------------------------------------
/tests/perf/reports/grammar-sizes.txt:
--------------------------------------------------------------------------------
1 | |  Grammar  | productions | non-terminals | LALR automaton size |
2 | |   JSON    |     19      |       9       |         26          |
3 | |  BibTeX   |     29      |      17       |         51          |
4 | |   Java    |     795     |      405      |        1,190        |
5 | 


--------------------------------------------------------------------------------
/tests/perf/reports/mem-report.txt:
--------------------------------------------------------------------------------
 1 | |               |    I1   |    I2   |    I3   |    I4   |    I5   |    I6   |
 2 | |JSON sizes     | 12,447  | 24,892  | 49,782  | 99,562  | 199,122 | 398,243 |
 3 | |JSON LR        |   116   |   210   |   404   |   792   |  1,567  |  3,118  |
 4 | |JSON GLR       |  2,485  |  4,945  |  9,891  | 19,786  | 39,593  | 79,206  |
 5 | |JSON ambig     |    0    |    0    |    0    |    0    |    0    |    0    |
 6 | |BibTeX sizes   |  1,727  |  3,455  |  6,911  | 13,823  | 27,647  | 55,295  |
 7 | |BibTeX LR      |   36    |   64    |   120   |   233   |   458   |   909   |
 8 | |BibTeX GLR     |   521   |  1,044  |  2,089  |  4,182  |  8,367  | 16,739  |
 9 | |BibTeX ambig   |    0    |    0    |    0    |    0    |    0    |    0    |
10 | |Java  sizes    |  2,189  |  8,018  | 15,471  | 27,600  | 52,802  | 70,084  |
11 | |Java GLR       |  1,466  |  5,919  | 11,013  | 13,811  | 26,031  | 56,343  |
12 | |Java ambig     |   53    |   239   |   366   |   629   |  1,065  |  1,987  |
13 | 


--------------------------------------------------------------------------------
/tests/perf/requirements.txt:
--------------------------------------------------------------------------------
1 | psutil
2 | pyprof2calltree
3 | 


--------------------------------------------------------------------------------
/tests/perf/runall.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python report_grammar.py
3 | python test_cpu.py
4 | python test_mem.py
5 | 


--------------------------------------------------------------------------------
/tests/perf/test1/g.pg:
--------------------------------------------------------------------------------
 1 | value: FALSE | TRUE | NULL | object | array | number | string;
 2 | object: "{" member*[COMMA] "}";
 3 | member: string ":" value;
 4 | array: "[" value*[COMMA] "]";
 5 | 
 6 | terminals
 7 | FALSE: 'false';
 8 | TRUE: 'true';
 9 | NULL: 'null';
10 | COMMA: ',';
11 | number: /-?\d+(\.\d+)?(e|E[-+]?\d+)?/;
12 | string: /"((\\")|[^"])*"/;
13 | 


--------------------------------------------------------------------------------
/tests/perf/test2/g.pg:
--------------------------------------------------------------------------------
 1 | BibFile:
 2 |     entries=BibEntry+
 3 | ;
 4 | 
 5 | BibEntry: BibLineComment | BibComment | BibPreamble | BibString | BibRefEntry;
 6 | 
 7 | BibLineComment:
 8 |     text=BibCommentLine
 9 | ;
10 | 
11 | BibComment: '@' 'comment' '{'
12 |         text=BlockCommentBody
13 |     '}'
14 | ;
15 | 
16 | BibPreamble: '@' 'preamble' '{'
17 |         value=Value
18 |     '}'
19 | ;
20 | 
21 | BibString: '@' type='string' '{'
22 |         fields=BibField*[Comma]
23 |     '}'
24 | ;
25 | 
26 | BibRefEntry: '@' type=BibType '{' key=BibKey Comma
27 |         fields=BibField*[Comma]
28 |         //Comma?
29 |     '}'
30 | ;
31 | 
32 | BibField: name=Ident '=' value=Value;
33 | 
34 | 
35 | Value: '"' Piece+[Hash] '"' | Piece+[Hash];
36 | Piece: '{' Piece* '}' | InBraces;
37 | 
38 | 
39 | terminals
40 | 
41 | Comma: ',';
42 | Hash: '#';
43 | //BibType: /(?!(string|comment|preamble))\w+/;
44 | BibType: /\w+/;
45 | BibKey: /[^, =\t\}\n]+/;
46 | Ident:  /[^\d]([^ \t\"#%\'\(\),={}])+/;
47 | InBraces: /[^{}]+/;
48 | 
49 | BibCommentLine: /[^@][^\n]+/;
50 | BlockCommentBody: /[^\}]*/;
51 | 


--------------------------------------------------------------------------------
/tests/perf/test2/input1:
--------------------------------------------------------------------------------
 1 | @article{knuth1965,
 2 |     title = {On the translation of languages from left to right},
 3 |     author = {Knuth, Donald E},
 4 |     year = {1965},
 5 |     journal = {Information and control},
 6 |     number = {6},
 7 |     pages = {607–639},
 8 |     publisher = {Elsevier},
 9 |     volume = {8},
10 |     x-stars = {94.89},
11 |     keywords = {parsing, LR, dslbook}
12 | }
13 | 
14 | @inproceedings{tomita1984,
15 |     title = {LR parsers for natural languages},
16 |     author = {Tomita, Masaru},
17 |     booktitle = {10th International Conference on Computational Linguistics and 22nd Annual Meeting of the Association for Computational Linguistics},
18 |     pages = {354--357},
19 |     year = {1984}
20 | }
21 | 
22 | @inproceedings{tomita1985,
23 |     title = {{An Efficient Context-Free Parsing Algorithm for Natural Languages.}},
24 |     author = {Tomita, Masaru},
25 |     year = {1985},
26 |     booktitle = {{IJCAI}},
27 |     pages = {756–764},
28 |     volume = {2}
29 | }
30 | 
31 | @incollection{nozohoor1991,
32 |     title = {GLR Parsing for $\varepsilon$-Grammers},
33 |     author = {Nozohoor-Farshi, Rahman},
34 |     booktitle = {Generalized LR parsing},
35 |     file = {docs/tomita2012.pdf},
36 |     pages = {61--75},
37 |     year = {1991},
38 |     publisher = {Springer}
39 | }
40 | 
41 | @article{scott2007,
42 |     title = {BRNGLR: a cubic Tomita-style GLR parsing algorithm},
43 |     author = {Scott, Elizabeth and Johnstone, Adrian and Economopoulos, Rob},
44 |     journal = {Acta informatica},
45 |     volume = {44},
46 |     number = {6},
47 |     pages = {427--461},
48 |     year = {2007},
49 |     publisher = {Springer}
50 | }
51 | 
52 | @inproceedings{mcpeak2004,
53 |     title = {Elkhound: A fast, practical GLR parser generator},
54 |     author = {McPeak, Scott and Necula, George C},
55 |     booktitle = {International Conference on Compiler Construction},
56 |     pages = {73--88},
57 |     year = {2004},
58 |     organization = {Springer}
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/perf/test2/input2:
--------------------------------------------------------------------------------
  1 | @article{knuth1965,
  2 |     title = {On the translation of languages from left to right},
  3 |     author = {Knuth, Donald E},
  4 |     year = {1965},
  5 |     journal = {Information and control},
  6 |     number = {6},
  7 |     pages = {607–639},
  8 |     publisher = {Elsevier},
  9 |     volume = {8},
 10 |     x-stars = {94.89},
 11 |     keywords = {parsing, LR, dslbook}
 12 | }
 13 | 
 14 | @inproceedings{tomita1984,
 15 |     title = {LR parsers for natural languages},
 16 |     author = {Tomita, Masaru},
 17 |     booktitle = {10th International Conference on Computational Linguistics and 22nd Annual Meeting of the Association for Computational Linguistics},
 18 |     pages = {354--357},
 19 |     year = {1984}
 20 | }
 21 | 
 22 | @inproceedings{tomita1985,
 23 |     title = {{An Efficient Context-Free Parsing Algorithm for Natural Languages.}},
 24 |     author = {Tomita, Masaru},
 25 |     year = {1985},
 26 |     booktitle = {{IJCAI}},
 27 |     pages = {756–764},
 28 |     volume = {2}
 29 | }
 30 | 
 31 | @incollection{nozohoor1991,
 32 |     title = {GLR Parsing for $\varepsilon$-Grammers},
 33 |     author = {Nozohoor-Farshi, Rahman},
 34 |     booktitle = {Generalized LR parsing},
 35 |     file = {docs/tomita2012.pdf},
 36 |     pages = {61--75},
 37 |     year = {1991},
 38 |     publisher = {Springer}
 39 | }
 40 | 
 41 | @article{scott2007,
 42 |     title = {BRNGLR: a cubic Tomita-style GLR parsing algorithm},
 43 |     author = {Scott, Elizabeth and Johnstone, Adrian and Economopoulos, Rob},
 44 |     journal = {Acta informatica},
 45 |     volume = {44},
 46 |     number = {6},
 47 |     pages = {427--461},
 48 |     year = {2007},
 49 |     publisher = {Springer}
 50 | }
 51 | 
 52 | @inproceedings{mcpeak2004,
 53 |     title = {Elkhound: A fast, practical GLR parser generator},
 54 |     author = {McPeak, Scott and Necula, George C},
 55 |     booktitle = {International Conference on Compiler Construction},
 56 |     pages = {73--88},
 57 |     year = {2004},
 58 |     organization = {Springer}
 59 | }
 60 | 
 61 | @article{knuth1965,
 62 |     title = {On the translation of languages from left to right},
 63 |     author = {Knuth, Donald E},
 64 |     year = {1965},
 65 |     journal = {Information and control},
 66 |     number = {6},
 67 |     pages = {607–639},
 68 |     publisher = {Elsevier},
 69 |     volume = {8},
 70 |     x-stars = {94.89},
 71 |     keywords = {parsing, LR, dslbook}
 72 | }
 73 | 
 74 | @inproceedings{tomita1984,
 75 |     title = {LR parsers for natural languages},
 76 |     author = {Tomita, Masaru},
 77 |     booktitle = {10th International Conference on Computational Linguistics and 22nd Annual Meeting of the Association for Computational Linguistics},
 78 |     pages = {354--357},
 79 |     year = {1984}
 80 | }
 81 | 
 82 | @inproceedings{tomita1985,
 83 |     title = {{An Efficient Context-Free Parsing Algorithm for Natural Languages.}},
 84 |     author = {Tomita, Masaru},
 85 |     year = {1985},
 86 |     booktitle = {{IJCAI}},
 87 |     pages = {756–764},
 88 |     volume = {2}
 89 | }
 90 | 
 91 | @incollection{nozohoor1991,
 92 |     title = {GLR Parsing for $\varepsilon$-Grammers},
 93 |     author = {Nozohoor-Farshi, Rahman},
 94 |     booktitle = {Generalized LR parsing},
 95 |     file = {docs/tomita2012.pdf},
 96 |     pages = {61--75},
 97 |     year = {1991},
 98 |     publisher = {Springer}
 99 | }
100 | 
101 | @article{scott2007,
102 |     title = {BRNGLR: a cubic Tomita-style GLR parsing algorithm},
103 |     author = {Scott, Elizabeth and Johnstone, Adrian and Economopoulos, Rob},
104 |     journal = {Acta informatica},
105 |     volume = {44},
106 |     number = {6},
107 |     pages = {427--461},
108 |     year = {2007},
109 |     publisher = {Springer}
110 | }
111 | 
112 | @inproceedings{mcpeak2004,
113 |     title = {Elkhound: A fast, practical GLR parser generator},
114 |     author = {McPeak, Scott and Necula, George C},
115 |     booktitle = {International Conference on Compiler Construction},
116 |     pages = {73--88},
117 |     year = {2004},
118 |     organization = {Springer}
119 | }
120 | 


--------------------------------------------------------------------------------
/tests/perf/test3/input1:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2012-2019 the original author or authors.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *      https://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | package org.springframework.boot.convert;
18 | 
19 | import java.time.Duration;
20 | import java.time.temporal.ChronoUnit;
21 | import java.util.Collections;
22 | import java.util.Set;
23 | 
24 | import org.springframework.core.convert.TypeDescriptor;
25 | import org.springframework.core.convert.converter.Converter;
26 | import org.springframework.core.convert.converter.GenericConverter;
27 | 
28 | /**
29 |  * {@link Converter} to convert from a {@link Duration} to a {@link String}.
30 |  *
31 |  * @author Phillip Webb
32 |  * @see DurationFormat
33 |  * @see DurationUnit
34 |  */
35 | final class DurationToStringConverter implements GenericConverter {
36 | 
37 | 	@Override
38 | 	public Set<ConvertiblePair> getConvertibleTypes() {
39 | 		return Collections.singleton(new ConvertiblePair(Duration.class, String.class));
40 | 	}
41 | 
42 | 	@Override
43 | 	public Object convert(Object source, TypeDescriptor sourceType, TypeDescriptor targetType) {
44 | 		if (source == null) {
45 | 			return null;
46 | 		}
47 | 		return convert((Duration) source, getDurationStyle(sourceType), getDurationUnit(sourceType));
48 | 	}
49 | 
50 | 	private ChronoUnit getDurationUnit(TypeDescriptor sourceType) {
51 | 		DurationUnit annotation = sourceType.getAnnotation(DurationUnit.class);
52 | 		return (annotation != null) ? annotation.value() : null;
53 | 	}
54 | 
55 | 	private DurationStyle getDurationStyle(TypeDescriptor sourceType) {
56 | 		DurationFormat annotation = sourceType.getAnnotation(DurationFormat.class);
57 | 		return (annotation != null) ? annotation.value() : null;
58 | 	}
59 | 
60 | 	private String convert(Duration source, DurationStyle style, ChronoUnit unit) {
61 | 		style = (style != null) ? style : DurationStyle.ISO8601;
62 | 		return style.print(source, unit);
63 | 	}
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/tests/perf/test_cpu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #######################################################################
 3 | # Testing parsing speed. This is used for the purpose of testing
 4 | #   of performance gains/loses for various approaches.
 5 | # Author: Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
 6 | # Copyright: (c) 2017-2021 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
 7 | # License: MIT License
 8 | #######################################################################
 9 | import time
10 | import io
11 | import gc
12 | from itertools import groupby
13 | from os.path import dirname, join, getsize
14 | from parglare import Grammar, Parser, GLRParser
15 | from tests import TESTS
16 | 
17 | INPUTS = 6
18 | REPEAT = 5
19 | 
20 | 
21 | class TestResult:
22 |     def __init__(self, name):
23 |         self.name = name
24 |         self.input_idx = None
25 |         self.size = None
26 |         self.ambig = None
27 |         self.time = None
28 |         self.speed = None
29 | 
30 |         # Grammar/Table sizes
31 |         self.nonterminals = None
32 |         self.productions = None
33 |         self.states = None
34 | 
35 | 
36 | def cpu_tests():
37 |     results = []
38 |     for test_idx, test in enumerate(TESTS):
39 |         for parsing in ['LR', 'GLR']:
40 |             if ((not test.lr and parsing == 'LR') or
41 |                     (not test.glr and parsing == 'GLR')):
42 |                 continue
43 | 
44 |             parser_class = Parser if parsing == 'LR' else GLRParser
45 |             for input_idx in range(INPUTS):
46 |                 result = TestResult(f'{test.name} {parsing}')
47 |                 result.input_idx = input_idx + 1
48 |                 test_root = join(dirname(__file__), f'test{test_idx+1}')
49 |                 file_name = join(test_root, f'input{input_idx+1}')
50 |                 result.size = getsize(file_name)
51 | 
52 |                 g = Grammar.from_file(join(test_root, 'g.pg'))
53 |                 parser = parser_class(g)
54 |                 result.nonterminals = len(g.nonterminals)
55 |                 result.productions = len(g.productions)
56 |                 result.states = len(parser.table.states)
57 | 
58 |                 with io.open(file_name, 'r', encoding='utf-8') as f:
59 |                     content = f.read()
60 |                 gcold = gc.isenabled()
61 |                 gc.disable()
62 |                 try:
63 |                     t_start = time.time()
64 |                     for i in range(REPEAT):
65 |                         forest = parser.parse(content)
66 |                     t_end = time.time()
67 |                 finally:
68 |                     if gcold:
69 |                         gc.enable()
70 | 
71 |                 result.time = t_end - t_start
72 |                 result.speed = int(result.size / (t_end - t_start)*REPEAT)
73 |                 if parsing == 'GLR':
74 |                     result.ambig = forest.ambiguities
75 | 
76 |                 results.append(result)
77 | 
78 |     with open(join(dirname(__file__), 'reports', 'cpu-report.txt'), 'w') as f:
79 |         inputs = '|'.join(f'    I{i+1}   ' for i in range(INPUTS))
80 |         f.write(f'|               |{inputs}|\n')
81 |         previous_name = 'None'
82 |         for name, results in groupby(results, lambda r: r.name):
83 |             results = list(results)
84 |             if not name.startswith(previous_name):
85 |                 sizes_str = '|'.join(f'{r.size:^9,d}' for r in results)
86 |                 title = '{:15s}'.format(name[:-3] + ' sizes')
87 |                 f.write(f'|{title}|{sizes_str}|\n')
88 |             results_str = '|'.join(f'{r.speed:^9,d}' for r in results)
89 |             f.write(f'|{name:15s}|{results_str}|\n')
90 |             if name.endswith('GLR'):
91 |                 ambig_str = '|'.join(f'{r.ambig:^9,d}' for r in results)
92 |                 title = '{:15s}'.format(name[:-4] + ' ambig')
93 |                 f.write(f'|{title}|{ambig_str}|\n')
94 |             previous_name = ''.join(name.split()[:-1])
95 | 
96 | 
97 | if __name__ == '__main__':
98 |     cpu_tests()
99 | 


--------------------------------------------------------------------------------
/tests/perf/test_mem.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #######################################################################
 3 | # Testing memory utilization. This is used for the purpose of testing
 4 | #   of performance gains/loses for various approaches.
 5 | # Author: Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
 6 | # Copyright: (c) 2021 Igor R. Dejanovic <igor DOT dejanovic AT gmail DOT com>
 7 | # License: MIT License
 8 | #######################################################################
 9 | import io
10 | import tracemalloc
11 | import gc
12 | from itertools import groupby
13 | from os.path import dirname, join, getsize
14 | from parglare import Grammar, Parser, GLRParser
15 | from tests import TESTS
16 | 
17 | INPUTS = 6
18 | REPEAT = 5
19 | 
20 | 
21 | class TestResult:
22 |     def __init__(self, name):
23 |         self.name = name
24 |         self.input_idx = None
25 |         self.size = None
26 |         self.mem = None
27 |         self.ambig = None
28 | 
29 | 
30 | def mem_tests():
31 |     results = []
32 |     for test_idx, test in enumerate(TESTS):
33 |         for parsing in ['LR', 'GLR']:
34 |             if ((not test.lr and parsing == 'LR') or
35 |                     (not test.glr and parsing == 'GLR')):
36 |                 continue
37 | 
38 |             parser_class = Parser if parsing == 'LR' else GLRParser
39 |             for input_idx in range(INPUTS):
40 |                 result = TestResult(f'{test.name} {parsing}')
41 |                 result.input_idx = input_idx + 1
42 |                 test_root = join(dirname(__file__), f'test{test_idx+1}')
43 |                 file_name = join(test_root, f'input{input_idx+1}')
44 |                 result.size = getsize(file_name)
45 | 
46 |                 g = Grammar.from_file(join(test_root, 'g.pg'))
47 |                 parser = parser_class(g)
48 | 
49 |                 with io.open(file_name, 'r', encoding='utf-8') as f:
50 |                     content = f.read()
51 | 
52 |                 gc.collect()
53 |                 tracemalloc.start()
54 |                 forest = parser.parse(content)
55 |                 _, peak = tracemalloc.get_traced_memory()
56 |                 result.mem = peak // 1000
57 |                 tracemalloc.stop()
58 | 
59 |                 if parsing == 'GLR':
60 |                     result.ambig = forest.ambiguities
61 | 
62 |                 results.append(result)
63 | 
64 |     with open(join(dirname(__file__), 'reports', 'mem-report.txt'), 'w') as f:
65 |         inputs = '|'.join(f'    I{i+1}   ' for i in range(INPUTS))
66 |         f.write(f'|               |{inputs}|\n')
67 |         previous_name = 'None'
68 |         for name, results in groupby(results, lambda r: r.name):
69 |             results = list(results)
70 |             if not name.startswith(previous_name):
71 |                 sizes_str = '|'.join(f'{r.size:^9,d}' for r in results)
72 |                 title = '{:15s}'.format(name[:-3] + ' sizes')
73 |                 f.write(f'|{title}|{sizes_str}|\n')
74 |             results_str = '|'.join(f'{r.mem:^9,d}' for r in results)
75 |             f.write(f'|{name:15s}|{results_str}|\n')
76 |             if name.endswith('GLR'):
77 |                 ambig_str = '|'.join(f'{r.ambig:^9,d}' for r in results)
78 |                 title = '{:15s}'.format(name[:-4] + ' ambig')
79 |                 f.write(f'|{title}|{ambig_str}|\n')
80 |             previous_name = ''.join(name.split()[:-1])
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     mem_tests()
85 | 


--------------------------------------------------------------------------------
/tests/perf/tests.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | Test = namedtuple('Test', 'name lr glr')
 4 | 
 5 | TESTS = [
 6 |     # Name, LR, GLR
 7 |     Test('JSON', True, True),
 8 |     Test('BibTeX', True, True),
 9 |     Test('Java', False, True)
10 | ]
11 | 


--------------------------------------------------------------------------------