├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ ├── other.md │ └── question.md └── workflows │ ├── codecov.yml │ ├── mypy.yml │ └── tests.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── Makefile ├── _static │ ├── comparison_memory.png │ ├── comparison_runtime.png │ ├── lark_cheatsheet.pdf │ └── sppf │ │ ├── sppf.html │ │ ├── sppf_111.svg │ │ ├── sppf_abcd.svg │ │ ├── sppf_abcd_noint.svg │ │ └── sppf_cycle.svg ├── classes.rst ├── conf.py ├── features.md ├── forest.rst ├── grammar.md ├── how_to_develop.md ├── how_to_use.md ├── ide │ ├── app.html │ ├── app.js │ ├── app │ │ ├── app.py │ │ ├── core.py │ │ ├── examples.py │ │ ├── ext.py │ │ ├── files.json │ │ ├── html5.py │ │ ├── ignite.py │ │ └── utils.py │ ├── is-loading.gif │ └── lark-logo.png ├── index.rst ├── json_tutorial.md ├── make.bat ├── parsers.md ├── philosophy.md ├── recipes.md ├── requirements.txt ├── tools.md ├── tree_construction.md └── visitors.rst ├── examples ├── README.rst ├── __init__.py ├── advanced │ ├── README.rst │ ├── _json_parser.py │ ├── conf_earley.py │ ├── conf_lalr.py │ ├── create_ast.py │ ├── custom_lexer.py │ ├── dynamic_complete.py │ ├── error_handling.py │ ├── error_reporting_earley.py │ ├── error_reporting_lalr.py │ ├── prioritizer.py │ ├── py3to2.py │ ├── python2.lark │ ├── python_parser.py │ ├── qscintilla_json.py │ ├── reconstruct_json.py │ ├── reconstruct_python.py │ ├── template_lark.lark │ ├── templates.py │ └── tree_forest_transformer.py ├── calc.py ├── composition │ ├── README.rst │ ├── combined_csv_and_json.txt │ ├── csv.lark │ ├── eval_csv.py │ ├── eval_json.py │ ├── json.lark │ ├── main.py │ └── storage.lark ├── fruitflies.png ├── fruitflies.py ├── grammars │ ├── README.rst │ └── verilog.lark ├── indented_tree.py ├── json_parser.py ├── lark_grammar.py ├── relative-imports │ ├── multiple2.lark │ ├── multiple3.lark │ ├── multiples.lark │ └── multiples.py ├── standalone │ ├── README.rst │ ├── create_standalone.sh │ ├── json.lark │ └── json_parser_main.py ├── tests │ ├── negative_priority.lark │ └── no_newline_at_end.lark └── turtle_dsl.py ├── lark ├── __init__.py ├── __pyinstaller │ ├── __init__.py │ └── hook-lark.py ├── ast_utils.py ├── common.py ├── exceptions.py ├── grammar.py ├── grammars │ ├── __init__.py │ ├── common.lark │ ├── lark.lark │ ├── python.lark │ └── unicode.lark ├── indenter.py ├── lark.py ├── lexer.py ├── load_grammar.py ├── parse_tree_builder.py ├── parser_frontends.py ├── parsers │ ├── __init__.py │ ├── cyk.py │ ├── earley.py │ ├── earley_common.py │ ├── earley_forest.py │ ├── grammar_analysis.py │ ├── lalr_analysis.py │ ├── lalr_interactive_parser.py │ ├── lalr_parser.py │ ├── lalr_parser_state.py │ └── xearley.py ├── py.typed ├── reconstruct.py ├── tools │ ├── __init__.py │ ├── nearley.py │ ├── serialize.py │ └── standalone.py ├── tree.py ├── tree_matcher.py ├── tree_templates.py ├── utils.py └── visitors.py ├── pyproject.toml ├── readthedocs.yml ├── test-requirements.txt ├── tests ├── __init__.py ├── __main__.py ├── grammars │ ├── ab.lark │ ├── leading_underscore_grammar.lark │ ├── templates.lark │ ├── test.lark │ ├── test_relative_import_of_nested_grammar.lark │ ├── test_relative_import_of_nested_grammar__grammar_to_import.lark │ ├── test_relative_import_of_nested_grammar__nested_grammar.lark │ ├── test_unicode.lark │ └── three_rules_using_same_token.lark ├── test_cache.py ├── test_grammar.py ├── test_lexer.py ├── test_logger.py ├── test_nearley │ ├── __init__.py │ ├── grammars │ │ ├── include_unicode.ne │ │ └── unicode.ne │ └── test_nearley.py ├── test_parser.py ├── test_pattern_matching.py ├── test_python_grammar.py ├── test_reconstructor.py ├── test_relative_import.lark ├── test_relative_import_preserves_leading_underscore.lark ├── test_relative_import_rename.lark ├── test_relative_import_rules_dependencies_imported_only_once.lark ├── test_relative_import_unicode.lark ├── test_relative_multi_import.lark ├── test_relative_rule_import.lark ├── test_relative_rule_import_drop_ignore.lark ├── test_relative_rule_import_rename.lark ├── test_relative_rule_import_subrule.lark ├── test_relative_rule_import_subrule_no_conflict.lark ├── test_templates_import.lark ├── test_tools.py ├── test_tree_forest_transformer.py ├── test_tree_templates.py └── test_trees.py └── tox.ini /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: lark-parser 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 12 | A clear and concise description of what the bug is, and what you expected to happen. 13 | 14 | **To Reproduce** 15 | 16 | Provide a short script that reproduces the erroneous behavior. 17 | 18 | If that is impossible, provide clear steps to reproduce the behavior. 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Suggestion** 11 | Provide a clear and concise description of what the problem is, and what you would like to happen. 12 | 13 | **Describe alternatives you've considered** 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/other.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Other 3 | about: For any discussion that doesn't fit the templates 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask a question about Lark or request help 4 | title: '' 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What is your question?** 11 | 12 | Try to be accurate and concise. 13 | 14 | **If you're having trouble with your code or grammar** 15 | 16 | Provide a small script that encapsulates your issue. 17 | 18 | Explain what you're trying to do, and what is obstructing your progress. 19 | -------------------------------------------------------------------------------- /.github/workflows/codecov.yml: -------------------------------------------------------------------------------- 1 | name: Compute coverage and push to Codecov 2 | on: [push] 3 | jobs: 4 | run: 5 | runs-on: ${{ matrix.os }} 6 | strategy: 7 | matrix: 8 | os: [ubuntu-latest, macos-latest, windows-latest] 9 | env: 10 | OS: ${{ matrix.os }} 11 | PYTHON: '3.8' 12 | steps: 13 | - uses: actions/checkout@v3 14 | name: Download with submodules 15 | with: 16 | submodules: recursive 17 | - name: Setup Python 18 | uses: actions/setup-python@v3 19 | with: 20 | python-version: "3.8" 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install -r test-requirements.txt 25 | - name: Generate coverage report 26 | run: | 27 | pip install pytest 28 | pip install pytest-cov 29 | pytest --cov=./ --cov-report=xml 30 | - name: Upload coverage to Codecov 31 | uses: codecov/codecov-action@v1 32 | with: 33 | token: ${{ secrets.CODECOV_TOKEN }} 34 | files: ./coverage.xml 35 | flags: unittests 36 | env_vars: OS,PYTHON 37 | name: codecov-umbrella 38 | fail_ci_if_error: false 39 | path_to_write_report: ./coverage/codecov_report.txt 40 | verbose: true 41 | -------------------------------------------------------------------------------- /.github/workflows/mypy.yml: -------------------------------------------------------------------------------- 1 | name: Python type check 2 | on: [push, pull_request] 3 | jobs: 4 | type: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v3 8 | with: 9 | submodules: recursive 10 | - name: Lint with mypy 11 | run: pipx run tox -e type 12 | 13 | pre-commit: 14 | name: Format 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v3 18 | - uses: actions/setup-python@v3 19 | - uses: pre-commit/action@v3.0.1 20 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | build: 6 | runs-on: ubuntu-latest 7 | strategy: 8 | matrix: 9 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13-dev", "pypy-3.10"] 10 | 11 | steps: 12 | - uses: actions/checkout@v3 13 | with: 14 | submodules: recursive 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install -r test-requirements.txt 23 | - name: Run tests 24 | run: | 25 | python -m tests 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | /.tox 4 | /lark.egg-info/** 5 | /lark_parser.egg-info/** 6 | tags 7 | .vscode 8 | .idea 9 | .ropeproject 10 | .cache 11 | .mypy_cache 12 | /dist 13 | /build 14 | docs/_build 15 | docs/examples 16 | docs/sg_execution_times.rst 17 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tests/test_nearley/nearley"] 2 | path = tests/test_nearley/nearley 3 | url = https://github.com/Hardmath123/nearley 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # To use: 2 | # 3 | # pre-commit run -a 4 | # 5 | # Or: 6 | # 7 | # pre-commit install # (runs every time you commit in git) 8 | # 9 | # To update this file: 10 | # 11 | # pre-commit autoupdate 12 | # 13 | # See https://github.com/pre-commit/pre-commit 14 | 15 | repos: 16 | # Standard hooks 17 | - repo: https://github.com/pre-commit/pre-commit-hooks 18 | rev: "v4.4.0" 19 | hooks: 20 | - id: check-added-large-files 21 | - id: check-case-conflict 22 | - id: check-merge-conflict 23 | - id: check-symlinks 24 | - id: check-toml 25 | - id: check-yaml 26 | - id: debug-statements 27 | - id: end-of-file-fixer 28 | exclude: '(^tests/.*\.lark|\.svg)$' 29 | - id: mixed-line-ending 30 | - id: requirements-txt-fixer 31 | - id: trailing-whitespace 32 | exclude: '(^tests/.*\.lark|\.svg)$' 33 | 34 | - repo: https://github.com/codespell-project/codespell 35 | rev: v2.2.2 36 | hooks: 37 | - id: codespell 38 | args: ["-L", "nd,iif,ot,datas"] 39 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | v1.0 2 | 3 | - `maybe_placeholders` is now True by default 4 | 5 | - Renamed TraditionalLexer to BasicLexer, and 'standard' lexer option to 'basic' 6 | 7 | - Default priority is now 0, for both terminals and rules (used to be 1 for terminals) 8 | 9 | - Discard mechanism is now done by returning Discard, instead of raising it as an exception. 10 | 11 | - `use_accepts` in `UnexpectedInput.match_examples()` is now True by default 12 | 13 | - `v_args(meta=True)` now gives meta as the first argument. i.e. `(meta, children)` 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2017 Erez Shinan 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md LICENSE docs/* examples/*.py examples/*.png examples/*.lark tests/*.py tests/*.lark tests/grammars/* tests/test_nearley/*.py tests/test_nearley/grammars/* 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Lark 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/comparison_memory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/docs/_static/comparison_memory.png -------------------------------------------------------------------------------- /docs/_static/comparison_runtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/docs/_static/comparison_runtime.png -------------------------------------------------------------------------------- /docs/_static/lark_cheatsheet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/docs/_static/lark_cheatsheet.pdf -------------------------------------------------------------------------------- /docs/classes.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | Lark 5 | ---- 6 | 7 | .. autoclass:: lark.Lark 8 | :members: open, parse, parse_interactive, lex, save, load, get_terminal, open_from_package 9 | 10 | 11 | Using Unicode character classes with ``regex`` 12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 13 | 14 | Python's builtin ``re`` module has a few persistent known bugs and also won't parse 15 | advanced regex features such as character classes. 16 | With ``pip install lark[regex]``, the ``regex`` module will be 17 | installed alongside lark and can act as a drop-in replacement to ``re``. 18 | 19 | Any instance of Lark instantiated with ``regex=True`` will use the ``regex`` module instead of ``re``. 20 | 21 | For example, we can use character classes to match PEP-3131 compliant Python identifiers: 22 | 23 | :: 24 | 25 | from lark import Lark 26 | >>> g = Lark(r""" 27 | ?start: NAME 28 | NAME: ID_START ID_CONTINUE* 29 | ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/ 30 | ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}·]+/ 31 | """, regex=True) 32 | 33 | >>> g.parse('வணக்கம்') 34 | 'வணக்கம்' 35 | 36 | 37 | Tree 38 | ---- 39 | 40 | .. autoclass:: lark.Tree 41 | :members: pretty, find_pred, find_data, iter_subtrees, scan_values, 42 | iter_subtrees_topdown, __rich__ 43 | 44 | Token 45 | ----- 46 | 47 | .. autoclass:: lark.Token 48 | 49 | Transformer, Visitor & Interpreter 50 | ---------------------------------- 51 | 52 | See :doc:`visitors`. 53 | 54 | ForestVisitor, ForestTransformer, & TreeForestTransformer 55 | ----------------------------------------------------------- 56 | 57 | See :doc:`forest`. 58 | 59 | UnexpectedInput 60 | --------------- 61 | 62 | .. autoclass:: lark.exceptions.UnexpectedInput 63 | :members: get_context, match_examples 64 | 65 | .. autoclass:: lark.exceptions.UnexpectedToken 66 | 67 | .. autoclass:: lark.exceptions.UnexpectedCharacters 68 | 69 | .. autoclass:: lark.exceptions.UnexpectedEOF 70 | 71 | InteractiveParser 72 | ----------------- 73 | 74 | .. autoclass:: lark.parsers.lalr_interactive_parser.InteractiveParser 75 | :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts, as_immutable 76 | 77 | .. autoclass:: lark.parsers.lalr_interactive_parser.ImmutableInteractiveParser 78 | :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts, as_mutable 79 | 80 | 81 | ast_utils 82 | --------- 83 | 84 | For an example of using ``ast_utils``, see `/examples/advanced/create_ast.py`_ 85 | 86 | .. autoclass:: lark.ast_utils.Ast 87 | 88 | .. autoclass:: lark.ast_utils.AsList 89 | 90 | .. autofunction:: lark.ast_utils.create_transformer 91 | 92 | .. _/examples/advanced/create_ast.py: examples/advanced/create_ast.html 93 | 94 | Indenter 95 | -------- 96 | 97 | .. autoclass:: lark.indenter.Indenter 98 | .. autoclass:: lark.indenter.PythonIndenter 99 | 100 | TextSlice 101 | --------- 102 | 103 | .. autoclass:: lark.utils.TextSlice 104 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Lark documentation build configuration file, created by 5 | # sphinx-quickstart on Sun Aug 16 13:09:41 2020. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | import sys 22 | sys.path.insert(0, os.path.abspath('..')) 23 | autodoc_member_order = 'bysource' 24 | 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | # 30 | # needs_sphinx = '1.0' 31 | 32 | # Add any Sphinx extension module names here, as strings. They can be 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 34 | # ones. 35 | extensions = [ 36 | 'sphinx.ext.autodoc', 37 | 'sphinx.ext.napoleon', 38 | 'sphinx.ext.coverage', 39 | 'recommonmark', 40 | 'sphinx_markdown_tables', 41 | 'sphinx_gallery.gen_gallery' 42 | ] 43 | 44 | # Add any paths that contain templates here, relative to this directory. 45 | templates_path = ['_templates'] 46 | 47 | # The suffix(es) of source filenames. 48 | # You can specify multiple suffix as a list of string: 49 | # 50 | # source_suffix = ['.rst', '.md'] 51 | source_suffix = { 52 | '.rst': 'restructuredtext', 53 | '.md': 'markdown' 54 | } 55 | 56 | 57 | # The master toctree document. 58 | master_doc = 'index' 59 | 60 | # General information about the project. 61 | project = 'Lark' 62 | copyright = '2020, Erez Shinan' 63 | author = 'Erez Shinan' 64 | 65 | # The version info for the project you're documenting, acts as replacement for 66 | # |version| and |release|, also used in various other places throughout the 67 | # built documents. 68 | # 69 | # The short X.Y version. 70 | version = '' 71 | # The full version, including alpha/beta/rc tags. 72 | release = '' 73 | 74 | # The language for content autogenerated by Sphinx. Refer to documentation 75 | # for a list of supported languages. 76 | # 77 | # This is also used if you do content translation via gettext catalogs. 78 | # Usually you set "language" from the command line for these cases. 79 | language = 'en' 80 | 81 | # List of patterns, relative to source directory, that match files and 82 | # directories to ignore when looking for source files. 83 | # This patterns also effect to html_static_path and html_extra_path 84 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 85 | 86 | # The name of the Pygments (syntax highlighting) style to use. 87 | pygments_style = 'sphinx' 88 | 89 | # If true, `todo` and `todoList` produce output, else they produce nothing. 90 | todo_include_todos = False 91 | 92 | 93 | # -- Options for HTML output ---------------------------------------------- 94 | 95 | # The theme to use for HTML and HTML Help pages. See the documentation for 96 | # a list of builtin themes. 97 | # 98 | html_theme = 'sphinx_rtd_theme' 99 | 100 | # Theme options are theme-specific and customize the look and feel of a theme 101 | # further. For a list of options available for each theme, see the 102 | # documentation. 103 | # 104 | html_theme_options = { 105 | 'prev_next_buttons_location': 'both' 106 | } 107 | 108 | # Add any paths that contain custom static files (such as style sheets) here, 109 | # relative to this directory. They are copied after the builtin static files, 110 | # so a file named "default.css" will overwrite the builtin "default.css". 111 | html_static_path = ['_static'] 112 | 113 | # Custom sidebar templates, must be a dictionary that maps document names 114 | # to template names. 115 | # 116 | # This is required for the alabaster theme 117 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 118 | html_sidebars = { 119 | '**': [ 120 | 'relations.html', # needs 'show_related': True theme option to display 121 | 'searchbox.html', 122 | ] 123 | } 124 | 125 | 126 | # -- Options for HTMLHelp output ------------------------------------------ 127 | 128 | # Output file base name for HTML help builder. 129 | htmlhelp_basename = 'Larkdoc' 130 | 131 | 132 | # -- Options for LaTeX output --------------------------------------------- 133 | 134 | latex_elements = { 135 | # The paper size ('letterpaper' or 'a4paper'). 136 | # 137 | # 'papersize': 'letterpaper', 138 | 139 | # The font size ('10pt', '11pt' or '12pt'). 140 | # 141 | # 'pointsize': '10pt', 142 | 143 | # Additional stuff for the LaTeX preamble. 144 | # 145 | # 'preamble': '', 146 | 147 | # Latex figure (float) alignment 148 | # 149 | # 'figure_align': 'htbp', 150 | } 151 | 152 | # Grouping the document tree into LaTeX files. List of tuples 153 | # (source start file, target name, title, 154 | # author, documentclass [howto, manual, or own class]). 155 | latex_documents = [ 156 | (master_doc, 'Lark.tex', 'Lark Documentation', 157 | 'Erez Shinan', 'manual'), 158 | ] 159 | 160 | 161 | # -- Options for manual page output --------------------------------------- 162 | 163 | # One entry per manual page. List of tuples 164 | # (source start file, name, description, authors, manual section). 165 | man_pages = [ 166 | (master_doc, 'lark', 'Lark Documentation', 167 | [author], 7) 168 | ] 169 | 170 | 171 | # -- Options for Texinfo output ------------------------------------------- 172 | 173 | # Grouping the document tree into Texinfo files. List of tuples 174 | # (source start file, target name, title, author, 175 | # dir menu entry, description, category) 176 | texinfo_documents = [ 177 | (master_doc, 'Lark', 'Lark Documentation', 178 | author, 'Lark', 'One line description of project.', 179 | 'Miscellaneous'), 180 | ] 181 | 182 | # -- Sphinx gallery config ------------------------------------------- 183 | 184 | sphinx_gallery_conf = { 185 | 'examples_dirs': ['../examples'], 186 | 'gallery_dirs': ['examples'], 187 | } 188 | -------------------------------------------------------------------------------- /docs/features.md: -------------------------------------------------------------------------------- 1 | # Features 2 | 3 | ## Main Features 4 | - Earley parser, capable of parsing any context-free grammar 5 | - Implements SPPF, for efficient parsing and storing of ambiguous grammars. 6 | - LALR(1) parser, limited in power of expression, but very efficient in space and performance (O(n)). 7 | - Implements a parse-aware lexer that provides a better power of expression than traditional LALR implementations (such as ply). 8 | - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) 9 | - Builds a parse-tree (AST) automagically based on the grammar 10 | - Stand-alone parser generator - create a small independent parser to embed in your project. ([read more](tools.html#stand-alone-parser)) 11 | - Flexible error handling by using an interactive parser interface (LALR only) 12 | - Automatic line & column tracking (for both tokens and matched rules) 13 | - Automatic terminal collision resolution 14 | - Warns on regex collisions using the optional `interegular` library. ([read more](how_to_use.html#regex-collisions)) 15 | - Grammar composition - Import terminals and rules from other grammars (see [example](https://github.com/lark-parser/lark/tree/master/examples/composition)). 16 | - Standard library of terminals (strings, numbers, names, etc.) 17 | - Unicode fully supported 18 | - Extensive test suite 19 | - Type annotations (MyPy support) 20 | - Pure-Python implementation 21 | 22 | [Read more about the parsers](parsers.md) 23 | 24 | ## Extra features 25 | - Support for external regex module ([see here](classes.html#using-unicode-character-classes-with-regex)) 26 | - Import grammars from Nearley.js ([read more](tools.html#importing-grammars-from-nearleyjs)) 27 | - CYK parser 28 | - Visualize your parse trees as dot or png files ([see_example](https://github.com/lark-parser/lark/blob/master/examples/fruitflies.py)) 29 | - Automatic reconstruction of input from parse-tree (see [example](https://github.com/lark-parser/lark/blob/master/examples/advanced/reconstruct_json.py) and [another example](https://github.com/lark-parser/lark/blob/master/examples/advanced/reconstruct_python.py)) 30 | - Use Lark grammars in [Julia](https://github.com/jamesrhester/Lerche.jl) and [Javascript](https://github.com/lark-parser/Lark.js). 31 | -------------------------------------------------------------------------------- /docs/forest.rst: -------------------------------------------------------------------------------- 1 | Working with the SPPF 2 | ===================== 3 | 4 | When parsing with Earley, Lark provides the ``ambiguity='forest'`` option 5 | to obtain the shared packed parse forest (SPPF) produced by the parser as 6 | an alternative to it being automatically converted to a tree. 7 | 8 | Lark provides a few tools to facilitate working with the SPPF. Here are some 9 | things to consider when deciding whether or not to use the SPPF. 10 | 11 | **Pros** 12 | 13 | - Efficient storage of highly ambiguous parses 14 | - Precise handling of ambiguities 15 | - Custom rule prioritizers 16 | - Ability to handle infinite ambiguities 17 | - Directly transform forest -> object instead of forest -> tree -> object 18 | 19 | **Cons** 20 | 21 | - More complex than working with a tree 22 | - SPPF may contain nodes corresponding to rules generated internally 23 | - Loss of Lark grammar features: 24 | 25 | - Rules starting with '_' are not inlined in the SPPF 26 | - Rules starting with '?' are never inlined in the SPPF 27 | - All tokens will appear in the SPPF 28 | 29 | SymbolNode 30 | ---------- 31 | 32 | .. autoclass:: lark.parsers.earley_forest.SymbolNode 33 | :members: is_ambiguous, children 34 | 35 | PackedNode 36 | ---------- 37 | 38 | .. autoclass:: lark.parsers.earley_forest.PackedNode 39 | :members: children 40 | 41 | ForestVisitor 42 | ------------- 43 | 44 | .. autoclass:: lark.parsers.earley_forest.ForestVisitor 45 | :members: visit, visit_symbol_node_in, visit_symbol_node_out, 46 | visit_packed_node_in, visit_packed_node_out, 47 | visit_token_node, on_cycle, get_cycle_in_path 48 | 49 | ForestTransformer 50 | ----------------- 51 | 52 | .. autoclass:: lark.parsers.earley_forest.ForestTransformer 53 | :members: transform, transform_symbol_node, transform_intermediate_node, 54 | transform_packed_node, transform_token_node 55 | 56 | TreeForestTransformer 57 | --------------------- 58 | 59 | .. autoclass:: lark.parsers.earley_forest.TreeForestTransformer 60 | :members: __default__, __default_token__, __default_ambig__ 61 | 62 | handles_ambiguity 63 | ----------------- 64 | 65 | .. autofunction:: lark.parsers.earley_forest.handles_ambiguity 66 | -------------------------------------------------------------------------------- /docs/how_to_develop.md: -------------------------------------------------------------------------------- 1 | # How to develop Lark - Guide 2 | 3 | There are many ways you can help the project: 4 | 5 | * Help solve issues 6 | * Improve the documentation 7 | * Write new grammars for Lark's library 8 | * Write a blog post introducing Lark to your audience 9 | * Port Lark to another language 10 | * Help with code development 11 | 12 | If you're interested in taking one of these on, contact us on [Gitter](https://gitter.im/lark-parser/Lobby) or [Github Discussion](https://github.com/lark-parser/lark/discussions), and we will provide more details and assist you in the process. 13 | 14 | ## Code Style 15 | 16 | Lark does not follow a predefined code style. 17 | We accept any code style that makes sense, as long as it's Pythonic and easy to read. 18 | 19 | ## Unit Tests 20 | 21 | Lark comes with an extensive set of tests. Many of the tests will run several times, once for each parser configuration. 22 | 23 | To run the tests, just go to the lark project root, and run the command: 24 | ```bash 25 | python -m tests 26 | ``` 27 | 28 | or 29 | 30 | ```bash 31 | pypy -m tests 32 | ``` 33 | 34 | For a list of supported interpreters, you can consult the `tox.ini` file. 35 | 36 | You can also run a single unittest using its class and method name, for example: 37 | ```bash 38 | ## test_package test_class_name.test_function_name 39 | python -m tests TestLalrBasic.test_keep_all_tokens 40 | ``` 41 | 42 | ### tox 43 | 44 | To run all Unit Tests with tox, 45 | install tox and Python 2.7 up to the latest python interpreter supported (consult the file tox.ini). 46 | Then, 47 | run the command `tox` on the root of this project (where the main setup.py file is on). 48 | 49 | And, for example, 50 | if you would like to only run the Unit Tests for Python version 2.7, 51 | you can run the command `tox -e py27` 52 | 53 | ### pytest 54 | 55 | You can also run the tests using pytest: 56 | 57 | ```bash 58 | pytest tests 59 | ``` 60 | 61 | ### Using setup.py 62 | 63 | Another way to run the tests is using setup.py: 64 | 65 | ```bash 66 | python setup.py test 67 | ``` 68 | 69 | ## Building the Documentation 70 | 71 | To build the documentation: 72 | 73 | ```sh 74 | cd docs/ 75 | pip install -r requirements.txt 76 | make html 77 | ``` 78 | 79 | To review the result, open the built HTML files under `_build/html/` in your browser. 80 | -------------------------------------------------------------------------------- /docs/ide/app.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 98 | 99 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /docs/ide/app.js: -------------------------------------------------------------------------------- 1 | class app { 2 | 3 | constructor(modules, invocation){ 4 | languagePluginLoader.then(() => { 5 | // If you don't require for pre-loaded Python packages, remove this promise below. 6 | window.pyodide.runPythonAsync("import setuptools, micropip").then(()=>{ 7 | window.pyodide.runPythonAsync("micropip.install('lark-parser')").then(()=>{ 8 | this.fetchSources(modules).then(() => { 9 | window.pyodide.runPythonAsync("import " + Object.keys(modules).join("\nimport ") + "\n" + invocation + "\n").then(() => this.initializingComplete()); 10 | }); 11 | }); 12 | }); 13 | }); 14 | } 15 | 16 | loadSources(module, baseURL, files) { 17 | let promises = []; 18 | 19 | for (let f in files) { 20 | promises.push( 21 | new Promise((resolve, reject) => { 22 | let file = files[f]; 23 | let url = (baseURL ? baseURL + "/" : "") + file; 24 | 25 | fetch(url, {}).then((response) => { 26 | if (response.status === 200) 27 | return response.text().then((code) => { 28 | let path = ("/lib/python3.7/site-packages/" + module + "/" + file).split("/"); 29 | let lookup = ""; 30 | 31 | for (let i in path) { 32 | if (!path[i]) { 33 | continue; 34 | } 35 | 36 | lookup += (lookup ? "/" : "") + path[i]; 37 | 38 | if (parseInt(i) === path.length - 1) { 39 | window.pyodide._module.FS.writeFile(lookup, code); 40 | console.debug(`fetched ${lookup}`); 41 | } else { 42 | try { 43 | window.pyodide._module.FS.lookupPath(lookup); 44 | } catch { 45 | window.pyodide._module.FS.mkdir(lookup); 46 | console.debug(`created ${lookup}`); 47 | } 48 | } 49 | } 50 | 51 | resolve(); 52 | }); 53 | else 54 | reject(); 55 | }); 56 | }) 57 | ); 58 | } 59 | 60 | return Promise.all(promises); 61 | } 62 | 63 | fetchSources(modules) { 64 | let promises = []; 65 | 66 | for( let module of Object.keys(modules) ) 67 | { 68 | promises.push( 69 | new Promise((resolve, reject) => { 70 | fetch(`${modules[module]}/files.json`, {}).then((response) => { 71 | if (response.status === 200) { 72 | response.text().then((list) => { 73 | let files = JSON.parse(list); 74 | 75 | this.loadSources(module, modules[module], files).then(() => { 76 | resolve(); 77 | }) 78 | }) 79 | } else { 80 | reject(); 81 | } 82 | }) 83 | })); 84 | } 85 | 86 | return Promise.all(promises).then(() => { 87 | for( let module of Object.keys(modules) ) { 88 | window.pyodide.loadedPackages[module] = "default channel"; 89 | } 90 | 91 | window.pyodide.runPython( 92 | 'import importlib as _importlib\n' + 93 | '_importlib.invalidate_caches()\n' 94 | ); 95 | }); 96 | } 97 | 98 | initializingComplete() { 99 | document.body.classList.remove("is-loading") 100 | } 101 | } 102 | 103 | (function () { 104 | window.top.app = new app({"app": "app"}, "import app.app; app.app.start()"); 105 | })(); 106 | -------------------------------------------------------------------------------- /docs/ide/app/app.py: -------------------------------------------------------------------------------- 1 | from . import html5 2 | from .examples import examples 3 | 4 | from lark import Lark 5 | from lark.tree import Tree 6 | 7 | 8 | class App(html5.Div): 9 | def __init__(self): 10 | super().__init__(""" 11 |

12 | IDE 13 |

14 | 15 |
16 | 17 | 20 | 25 | 26 |
27 |
28 |
Grammar:
29 | 30 |
31 |
32 |
Input:
33 | 34 |
35 |
36 |
37 |
39 |
40 | """) 41 | self.sinkEvent("onKeyUp", "onChange") 42 | 43 | self.parser = "earley" 44 | 45 | # Pre-load examples 46 | for name, (grammar, input) in examples.items(): 47 | option = html5.Option(name) 48 | option.grammar = grammar 49 | option.input = input 50 | 51 | self.examples.appendChild(option) 52 | 53 | def onChange(self, e): 54 | if html5.utils.doesEventHitWidgetOrChildren(e, self.examples): 55 | example = self.examples.children(self.examples["selectedIndex"]) 56 | self.grammar["value"] = example.grammar.strip() 57 | self.input["value"] = example.input.strip() 58 | self.onKeyUp() 59 | 60 | elif html5.utils.doesEventHitWidgetOrChildren(e, self.parser): 61 | self.parser = self.parser.children(self.parser["selectedIndex"])["value"] 62 | self.onKeyUp() 63 | 64 | def onKeyUp(self, e=None): 65 | l = Lark(self.grammar["value"], parser=self.parser) 66 | 67 | try: 68 | ast = l.parse(self.input["value"]) 69 | except Exception as e: 70 | self.ast.appendChild( 71 | html5.Li(str(e)), replace=True 72 | ) 73 | 74 | print(ast) 75 | traverse = lambda node: html5.Li([node.data, html5.Ul([traverse(c) for c in node.children])] if isinstance(node, Tree) else node) 76 | self.ast.appendChild(traverse(ast), replace=True) 77 | 78 | 79 | def start(): 80 | html5.Body().appendChild( 81 | App() 82 | ) 83 | -------------------------------------------------------------------------------- /docs/ide/app/examples.py: -------------------------------------------------------------------------------- 1 | 2 | # Examples formattet this way: 3 | # "name": ("grammar", "demo-input") 4 | 5 | examples = { 6 | 7 | # --- hello.lark --- 8 | "hello.lark": (""" 9 | start: WORD "," WORD "!" 10 | 11 | %import common.WORD // imports from terminal library 12 | %ignore " " // Disregard spaces in text 13 | """, "Hello, World!"), 14 | 15 | # --- calc.lark --- 16 | "calc.lark": (""" 17 | ?start: sum 18 | | NAME "=" sum -> assign_var 19 | 20 | ?sum: product 21 | | sum "+" product -> add 22 | | sum "-" product -> sub 23 | 24 | ?product: atom 25 | | product "*" atom -> mul 26 | | product "/" atom -> div 27 | 28 | ?atom: NUMBER -> number 29 | | "-" atom -> neg 30 | | NAME -> var 31 | | "(" sum ")" 32 | 33 | %import common.CNAME -> NAME 34 | %import common.NUMBER 35 | %import common.WS_INLINE 36 | %ignore WS_INLINE""", 37 | "1 + 2 * 3 + 4"), 38 | 39 | # --- json.lark --- 40 | "json.lark": (""" 41 | ?start: value 42 | ?value: object 43 | | array 44 | | string 45 | | SIGNED_NUMBER -> number 46 | | "true" -> true 47 | | "false" -> false 48 | | "null" -> null 49 | array : "[" [value ("," value)*] "]" 50 | object : "{" [pair ("," pair)*] "}" 51 | pair : string ":" value 52 | string : ESCAPED_STRING 53 | %import common.ESCAPED_STRING 54 | %import common.SIGNED_NUMBER 55 | %import common.WS 56 | %ignore WS""", 57 | """ 58 | [ 59 | { 60 | "_id": "5edb875cf3d764da55602437", 61 | "index": 0, 62 | "guid": "3dae2206-5d4d-41fe-b81d-dc8cdba7acaa", 63 | "isActive": false, 64 | "balance": "$2,872.54", 65 | "picture": "http://placehold.it/32x32", 66 | "age": 24, 67 | "eyeColor": "blue", 68 | "name": "Theresa Vargas", 69 | "gender": "female", 70 | "company": "GEEKOL", 71 | "email": "theresavargas@geekol.com", 72 | "phone": "+1 (930) 450-3445", 73 | "address": "418 Herbert Street, Sexton, Florida, 1375", 74 | "about": "Id minim deserunt laborum enim. Veniam commodo incididunt amet aute esse duis veniam occaecat nulla esse aute et deserunt eiusmod. Anim elit ullamco minim magna sint laboris. Est consequat quis deserunt excepteur in magna pariatur laborum quis eu. Ex quis tempor elit qui qui et culpa sunt sit esse mollit cupidatat. Fugiat cillum deserunt enim minim irure reprehenderit est. Voluptate nisi quis amet quis incididunt pariatur nostrud Lorem consectetur adipisicing voluptate.\\r\\n", 75 | "registered": "2016-11-19T01:02:42 -01:00", 76 | "latitude": -25.65267, 77 | "longitude": 104.19531, 78 | "tags": [ 79 | "eiusmod", 80 | "reprehenderit", 81 | "anim", 82 | "sunt", 83 | "esse", 84 | "proident", 85 | "esse" 86 | ], 87 | "friends": [ 88 | { 89 | "id": 0, 90 | "name": "Roth Herrera" 91 | }, 92 | { 93 | "id": 1, 94 | "name": "Callie Christian" 95 | }, 96 | { 97 | "id": 2, 98 | "name": "Gracie Whitfield" 99 | } 100 | ], 101 | "greeting": "Hello, Theresa Vargas! You have 6 unread messages.", 102 | "favoriteFruit": "banana" 103 | }, 104 | { 105 | "_id": "5edb875c845eb08161a83e64", 106 | "index": 1, 107 | "guid": "a8ada2c1-e2c7-40d3-96b4-52c93baff7f0", 108 | "isActive": false, 109 | "balance": "$2,717.04", 110 | "picture": "http://placehold.it/32x32", 111 | "age": 23, 112 | "eyeColor": "green", 113 | "name": "Lily Ross", 114 | "gender": "female", 115 | "company": "RODEOMAD", 116 | "email": "lilyross@rodeomad.com", 117 | "phone": "+1 (941) 465-3561", 118 | "address": "525 Beekman Place, Blodgett, Marshall Islands, 3173", 119 | "about": "Aliquip duis proident excepteur eiusmod in quis officia consequat culpa eu et ut. Occaecat reprehenderit tempor mollit do eu magna qui et magna exercitation aliqua. Incididunt exercitation dolor proident eiusmod minim occaecat. Sunt et minim mollit et veniam sint ex. Duis ullamco elit aute eu excepteur reprehenderit officia.\\r\\n", 120 | "registered": "2019-11-02T04:06:42 -01:00", 121 | "latitude": 17.031701, 122 | "longitude": -42.657106, 123 | "tags": [ 124 | "id", 125 | "non", 126 | "culpa", 127 | "reprehenderit", 128 | "esse", 129 | "elit", 130 | "sit" 131 | ], 132 | "friends": [ 133 | { 134 | "id": 0, 135 | "name": "Ursula Maldonado" 136 | }, 137 | { 138 | "id": 1, 139 | "name": "Traci Huff" 140 | }, 141 | { 142 | "id": 2, 143 | "name": "Taylor Holt" 144 | } 145 | ], 146 | "greeting": "Hello, Lily Ross! You have 3 unread messages.", 147 | "favoriteFruit": "strawberry" 148 | } 149 | ]""") 150 | } 151 | -------------------------------------------------------------------------------- /docs/ide/app/files.json: -------------------------------------------------------------------------------- 1 | [ 2 | "app.py", 3 | "examples.py", 4 | "html5.py", 5 | "core.py", 6 | "ext.py", 7 | "ignite.py", 8 | "utils.py" 9 | ] 10 | -------------------------------------------------------------------------------- /docs/ide/app/html5.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | from .core import * 4 | from . import ext, utils, ignite 5 | -------------------------------------------------------------------------------- /docs/ide/app/ignite.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from . import core as html5 3 | 4 | 5 | @html5.tag 6 | class Label(html5.Label): 7 | _parserTagName = "ignite-label" 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(Label, self).__init__(style="label ignt-label", *args, **kwargs) 11 | 12 | 13 | @html5.tag 14 | class Input(html5.Input): 15 | _parserTagName = "ignite-input" 16 | 17 | def __init__(self, *args, **kwargs): 18 | super(Input, self).__init__(style="input ignt-input", *args, **kwargs) 19 | 20 | 21 | @html5.tag 22 | class Switch(html5.Div): 23 | _parserTagName = "ignite-switch" 24 | 25 | def __init__(self, *args, **kwargs): 26 | super(Switch, self).__init__(style="switch ignt-switch", *args, **kwargs) 27 | 28 | self.input = html5.Input(style="switch-input") 29 | self.appendChild(self.input) 30 | self.input["type"] = "checkbox" 31 | 32 | switchLabel = html5.Label(forElem=self.input) 33 | switchLabel.addClass("switch-label") 34 | self.appendChild(switchLabel) 35 | 36 | def _setChecked(self, value): 37 | self.input["checked"] = bool(value) 38 | 39 | def _getChecked(self): 40 | return self.input["checked"] 41 | 42 | 43 | @html5.tag 44 | class Check(html5.Input): 45 | _parserTagName = "ignite-check" 46 | 47 | def __init__(self, *args, **kwargs): 48 | super(Check, self).__init__(style="check ignt-check", *args, **kwargs) 49 | 50 | checkInput = html5.Input() 51 | checkInput.addClass("check-input") 52 | checkInput["type"] = "checkbox" 53 | self.appendChild(checkInput) 54 | 55 | checkLabel = html5.Label(forElem=checkInput) 56 | checkLabel.addClass("check-label") 57 | self.appendChild(checkLabel) 58 | 59 | 60 | @html5.tag 61 | class Radio(html5.Div): 62 | _parserTagName = "ignite-radio" 63 | 64 | def __init__(self, *args, **kwargs): 65 | super(Radio, self).__init__(style="radio ignt-radio", *args, **kwargs) 66 | 67 | radioInput = html5.Input() 68 | radioInput.addClass("radio-input") 69 | radioInput["type"] = "radio" 70 | self.appendChild(radioInput) 71 | 72 | radioLabel = html5.Label(forElem=radioInput) 73 | radioLabel.addClass("radio-label") 74 | self.appendChild(radioLabel) 75 | 76 | 77 | @html5.tag 78 | class Select(html5.Select): 79 | _parserTagName = "ignite-select" 80 | 81 | def __init__(self, *args, **kwargs): 82 | super(Select, self).__init__(style="select ignt-select", *args, **kwargs) 83 | 84 | defaultOpt = html5.Option() 85 | defaultOpt["selected"] = True 86 | defaultOpt["disabled"] = True 87 | defaultOpt.element.innerHTML = "" 88 | self.appendChild(defaultOpt) 89 | 90 | 91 | @html5.tag 92 | class Textarea(html5.Textarea): 93 | _parserTagName = "ignite-textarea" 94 | 95 | def __init__(self, *args, **kwargs): 96 | super(Textarea, self).__init__(style="textarea ignt-textarea", *args, **kwargs) 97 | 98 | 99 | @html5.tag 100 | class Progress(html5.Progress): 101 | _parserTagName = "ignite-progress" 102 | 103 | def __init__(self, *args, **kwargs): 104 | super(Progress, self).__init__(style="progress ignt-progress", *args, **kwargs) 105 | 106 | 107 | @html5.tag 108 | class Item(html5.Div): 109 | _parserTagName = "ignite-item" 110 | 111 | def __init__(self, title=None, descr=None, className=None, *args, **kwargs): 112 | super(Item, self).__init__(style="item ignt-item", *args, **kwargs) 113 | if className: 114 | self.addClass(className) 115 | 116 | self.fromHTML(""" 117 |
118 |
119 |
120 |
121 |
122 |
123 | """) 124 | 125 | if title: 126 | self.itemHeadline.appendChild(html5.TextNode(title)) 127 | 128 | if descr: 129 | self.itemSubline = html5.Div() 130 | self.addClass("item-subline ignt-item-subline") 131 | self.itemSubline.appendChild(html5.TextNode(descr)) 132 | self.appendChild(self.itemSubline) 133 | 134 | 135 | @html5.tag 136 | class Table(html5.Table): 137 | _parserTagName = "ignite-table" 138 | 139 | def __init__(self, *args, **kwargs): 140 | super(Table, self).__init__(*args, **kwargs) 141 | self.head.addClass("ignt-table-head") 142 | self.body.addClass("ignt-table-body") 143 | 144 | def prepareRow(self, row): 145 | assert row >= 0, "Cannot create rows with negative index" 146 | 147 | for child in self.body._children: 148 | row -= child["rowspan"] 149 | if row < 0: 150 | return 151 | 152 | while row >= 0: 153 | tableRow = html5.Tr() 154 | tableRow.addClass("ignt-table-body-row") 155 | self.body.appendChild(tableRow) 156 | row -= 1 157 | 158 | def prepareCol(self, row, col): 159 | assert col >= 0, "Cannot create cols with negative index" 160 | self.prepareRow(row) 161 | 162 | for rowChild in self.body._children: 163 | row -= rowChild["rowspan"] 164 | 165 | if row < 0: 166 | for colChild in rowChild._children: 167 | col -= colChild["colspan"] 168 | if col < 0: 169 | return 170 | 171 | while col >= 0: 172 | tableCell = html5.Td() 173 | tableCell.addClass("ignt-table-body-cell") 174 | rowChild.appendChild(tableCell) 175 | col -= 1 176 | 177 | return 178 | def fastGrid( self, rows, cols, createHidden=False ): 179 | colsstr = "".join(['' for i in range(0, cols)]) 180 | tblstr = '' 181 | 182 | for r in range(0, rows): 183 | tblstr += '%s' %("is-hidden" if createHidden else "",colsstr) 184 | tblstr +="" 185 | 186 | self.fromHTML(tblstr) 187 | -------------------------------------------------------------------------------- /docs/ide/app/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from . import core as html5 3 | 4 | def unescape(val, maxLength = 0): 5 | """ 6 | Unquotes several HTML-quoted characters in a string. 7 | 8 | :param val: The value to be unescaped. 9 | :type val: str 10 | 11 | :param maxLength: Cut-off after maxLength characters. 12 | A value of 0 means "unlimited". (default) 13 | :type maxLength: int 14 | 15 | :returns: The unquoted string. 16 | :rtype: str 17 | """ 18 | val = val \ 19 | .replace("<", "<") \ 20 | .replace(">", ">") \ 21 | .replace(""", "\"") \ 22 | .replace("'", "'") 23 | 24 | if maxLength > 0: 25 | return val[0:maxLength] 26 | 27 | return val 28 | 29 | def doesEventHitWidgetOrParents(event, widget): 30 | """ 31 | Test if event 'event' hits widget 'widget' (or *any* of its parents) 32 | """ 33 | while widget: 34 | if event.target == widget.element: 35 | return widget 36 | 37 | widget = widget.parent() 38 | 39 | return None 40 | 41 | def doesEventHitWidgetOrChildren(event, widget): 42 | """ 43 | Test if event 'event' hits widget 'widget' (or *any* of its children) 44 | """ 45 | if event.target == widget.element: 46 | return widget 47 | 48 | for child in widget.children(): 49 | if doesEventHitWidgetOrChildren(event, child): 50 | return child 51 | 52 | return None 53 | 54 | def textToHtml(node, text): 55 | """ 56 | Generates html nodes from text by splitting text into content and into 57 | line breaks html5.Br. 58 | 59 | :param node: The node where the nodes are appended to. 60 | :param text: The text to be inserted. 61 | """ 62 | 63 | for (i, part) in enumerate(text.split("\n")): 64 | if i > 0: 65 | node.appendChild(html5.Br()) 66 | 67 | node.appendChild(html5.TextNode(part)) 68 | 69 | def parseInt(s, ret = 0): 70 | """ 71 | Parses a value as int 72 | """ 73 | if not isinstance(s, str): 74 | return int(s) 75 | elif s: 76 | if s[0] in "+-": 77 | ts = s[1:] 78 | else: 79 | ts = s 80 | 81 | if ts and all([_ in "0123456789" for _ in ts]): 82 | return int(s) 83 | 84 | return ret 85 | 86 | def parseFloat(s, ret = 0.0): 87 | """ 88 | Parses a value as float. 89 | """ 90 | if not isinstance(s, str): 91 | return float(s) 92 | elif s: 93 | if s[0] in "+-": 94 | ts = s[1:] 95 | else: 96 | ts = s 97 | 98 | if ts and ts.count(".") <= 1 and all([_ in ".0123456789" for _ in ts]): 99 | return float(s) 100 | 101 | return ret 102 | -------------------------------------------------------------------------------- /docs/ide/is-loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/docs/ide/is-loading.gif -------------------------------------------------------------------------------- /docs/ide/lark-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/docs/ide/lark-logo.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Lark documentation master file, created by 2 | sphinx-quickstart on Sun Aug 16 13:09:41 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Lark's documentation! 7 | ================================ 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Overview 12 | :hidden: 13 | 14 | philosophy 15 | features 16 | parsers 17 | 18 | .. toctree:: 19 | :maxdepth: 2 20 | :caption: Tutorials & Guides 21 | :hidden: 22 | 23 | json_tutorial 24 | how_to_use 25 | how_to_develop 26 | recipes 27 | examples/index 28 | 29 | 30 | .. toctree:: 31 | :maxdepth: 2 32 | :caption: Reference 33 | :hidden: 34 | 35 | grammar 36 | tree_construction 37 | classes 38 | visitors 39 | forest 40 | tools 41 | 42 | 43 | 44 | Lark is a modern parsing library for Python. Lark can parse any context-free grammar. 45 | 46 | Lark provides: 47 | 48 | - Advanced grammar language, based on EBNF 49 | - Three parsing algorithms to choose from: Earley, LALR(1) and CYK 50 | - Automatic tree construction, inferred from your grammar 51 | - Fast unicode lexer with regexp support, and automatic line-counting 52 | 53 | 54 | Install Lark 55 | -------------- 56 | 57 | .. code:: bash 58 | 59 | $ pip install lark 60 | 61 | Syntax Highlighting 62 | ------------------- 63 | 64 | - `Sublime Text & TextMate`_ 65 | - `Visual Studio Code`_ (Or install through the vscode plugin system) 66 | - `Intellij & PyCharm`_ 67 | - `Vim`_ 68 | - `Atom`_ 69 | 70 | .. _Sublime Text & TextMate: https://github.com/lark-parser/lark_syntax 71 | .. _Visual Studio Code: https://github.com/lark-parser/vscode-lark 72 | .. _Intellij & PyCharm: https://github.com/lark-parser/intellij-syntax-highlighting 73 | .. _Vim: https://github.com/lark-parser/vim-lark-syntax 74 | .. _Atom: https://github.com/Alhadis/language-grammars 75 | 76 | Resources 77 | --------- 78 | 79 | - :doc:`philosophy` 80 | - :doc:`features` 81 | - `Examples`_ 82 | - `Third-party examples`_ 83 | - `Online IDE`_ 84 | - Tutorials 85 | 86 | - `How to write a DSL`_ - Implements a toy LOGO-like language with 87 | an interpreter 88 | - :doc:`json_tutorial` - Teaches you how to use Lark 89 | - Unofficial 90 | 91 | - `Program Synthesis is Possible`_ - Creates a DSL for Z3 92 | - `Using Lark to Parse Text - Robin Reynolds-Haertle (PyCascades 2023) `_ (video presentation) 93 | 94 | - Guides 95 | 96 | - :doc:`how_to_use` 97 | - :doc:`how_to_develop` 98 | 99 | - Reference 100 | 101 | - :doc:`grammar` 102 | - :doc:`tree_construction` 103 | - :doc:`visitors` 104 | - :doc:`forest` 105 | - :doc:`classes` 106 | - :doc:`tools` 107 | - `Cheatsheet (PDF)`_ 108 | 109 | - Discussion 110 | 111 | - `Gitter`_ 112 | - `Forum (Google Groups)`_ 113 | 114 | 115 | .. _Examples: https://github.com/lark-parser/lark/tree/master/examples 116 | .. _Third-party examples: https://github.com/ligurio/lark-grammars 117 | .. _Online IDE: https://lark-parser.org/ide 118 | .. _How to write a DSL: https://eshsoft.com/blog/write-dsl-in-python-with-lark 119 | .. _Program Synthesis is Possible: https://www.cs.cornell.edu/~asampson/blog/minisynth.html 120 | .. _Cheatsheet (PDF): _static/lark_cheatsheet.pdf 121 | .. _Gitter: https://gitter.im/lark-parser/Lobby 122 | .. _Forum (Google Groups): https://groups.google.com/forum/#!forum/lark-parser 123 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=Lark 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/parsers.md: -------------------------------------------------------------------------------- 1 | # Parsers 2 | Lark implements the following parsing algorithms: Earley, LALR(1), and CYK 3 | 4 | ## Earley 5 | 6 | An [Earley Parser](https://www.wikiwand.com/en/Earley_parser) is a chart parser capable of parsing any context-free grammar at O(n^3), and O(n^2) when the grammar is unambiguous. It can parse most LR grammars at O(n). Most programming languages are LR, and can be parsed at a linear time. 7 | 8 | Lark's Earley implementation runs on top of a skipping chart parser, which allows it to use regular expressions, instead of matching characters one-by-one. This is a huge improvement to Earley that is unique to Lark. This feature is used by default, but can also be requested explicitly using `lexer='dynamic'`. 9 | 10 | It's possible to bypass the dynamic lexing, and use the regular Earley parser with a basic lexer, that tokenizes as an independent first step. Doing so will provide a speed benefit, but will tokenize without using Earley's ambiguity-resolution ability. So choose this only if you know why! Activate with `lexer='basic'` 11 | 12 | **SPPF & Ambiguity resolution** 13 | 14 | Lark implements the Shared Packed Parse Forest data-structure for the Earley parser, in order to reduce the space and computation required to handle ambiguous grammars. 15 | 16 | You can read more about SPPF [here](https://web.archive.org/web/20191229100607/www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest) 17 | 18 | As a result, Lark can efficiently parse and store every ambiguity in the grammar, when using Earley. 19 | 20 | Lark provides the following options to combat ambiguity: 21 | 22 | 1) Lark will choose the best derivation for you (default). Users can choose between different disambiguation strategies, and can prioritize (or demote) individual rules over others, using the rule-priority syntax. 23 | 24 | 2) Users may choose to receive the set of all possible parse-trees (using ambiguity='explicit'), and choose the best derivation themselves. While simple and flexible, it comes at the cost of space and performance, and so it isn't recommended for highly ambiguous grammars, or very long inputs. 25 | 26 | 3) As an advanced feature, users may use specialized visitors to iterate the SPPF themselves. There is also [a 3rd party utility for iterating over the SPPF](https://github.com/chanicpanic/lark-ambig-tools). 27 | 28 | **lexer="dynamic_complete"** 29 | 30 | Earley's "dynamic" lexer uses regular expressions in order to tokenize the text. It tries every possible combination of terminals, but it matches each terminal exactly once, returning the longest possible match. 31 | 32 | That means, for example, that when `lexer="dynamic"` (which is the default), the terminal `/a+/`, when given the text `"aa"`, will return one result, `aa`, even though `a` would also be correct. 33 | 34 | This behavior was chosen because it is much faster, and it is usually what you would expect. 35 | 36 | Setting `lexer="dynamic_complete"` instructs the lexer to consider every possible regexp match. This ensures that the parser will consider and resolve every ambiguity, even inside the terminals themselves. This lexer provides the same capabilities as scannerless Earley, but with different performance tradeoffs. 37 | 38 | Warning: This lexer can be much slower, especially for open-ended terminals such as `/.*/` 39 | 40 | 41 | ## LALR(1) 42 | 43 | [LALR(1)](https://www.wikiwand.com/en/LALR_parser) is a very efficient, true-and-tested parsing algorithm. It's incredibly fast and requires very little memory. It can parse most programming languages (For example: Python and Java). 44 | 45 | LALR(1) stands for: 46 | 47 | - Left-to-right parsing order 48 | 49 | - Rightmost derivation, bottom-up 50 | 51 | - Lookahead of 1 token 52 | 53 | Lark comes with an efficient implementation that outperforms every other parsing library for Python (including PLY) 54 | 55 | Lark extends the traditional YACC-based architecture with a *contextual lexer*, which processes feedback from the parser, making the LALR(1) algorithm stronger than ever. 56 | 57 | The contextual lexer communicates with the parser, and uses the parser's lookahead prediction to narrow its choice of terminals. So at each point, the lexer only matches the subgroup of terminals that are legal at that parser state, instead of all of the terminals. It’s surprisingly effective at resolving common terminal collisions, and allows one to parse languages that LALR(1) was previously incapable of parsing. 58 | 59 | (If you're familiar with YACC, you can think of it as automatic lexer-states) 60 | 61 | This is an improvement to LALR(1) that is unique to Lark. 62 | 63 | ### Grammar constraints in LALR(1) 64 | 65 | Due to having only a lookahead of one token, LALR is limited in its ability to choose between rules, when they both match the input. 66 | 67 | Tips for writing a conforming grammar: 68 | 69 | - Try to avoid writing different rules that can match the same sequence of characters. 70 | 71 | - For the best performance, prefer left-recursion over right-recursion. 72 | 73 | - Consider setting terminal priority only as a last resort. 74 | 75 | For a better understanding of these constraints, it's recommended to learn how a SLR parser works. SLR is very similar to LALR but much simpler. 76 | 77 | ## CYK Parser 78 | 79 | A [CYK parser](https://www.wikiwand.com/en/CYK_algorithm) can parse any context-free grammar at O(n^3*|G|). 80 | 81 | Its too slow to be practical for simple grammars, but it offers good performance for highly ambiguous grammars. 82 | -------------------------------------------------------------------------------- /docs/philosophy.md: -------------------------------------------------------------------------------- 1 | # Philosophy 2 | 3 | Parsers are innately complicated and confusing. They're difficult to understand, difficult to write, and difficult to use. Even experts on the subject can become baffled by the nuances of these complicated state-machines. 4 | 5 | Lark's mission is to make the process of writing them as simple and abstract as possible, by following these design principles: 6 | 7 | ## Design Principles 8 | 9 | 1. Readability matters 10 | 11 | 2. Keep the grammar clean and simple 12 | 13 | 2. Don't force the user to decide on things that the parser can figure out on its own 14 | 15 | 4. Usability is more important than performance 16 | 17 | 5. Performance is still very important 18 | 19 | 6. Follow the Zen of Python, whenever possible and applicable 20 | 21 | 22 | In accordance with these principles, I arrived at the following design choices: 23 | 24 | ----------- 25 | 26 | ## Design Choices 27 | 28 | ### 1. Separation of code and grammar 29 | 30 | Grammars are the de-facto reference for your language, and for the structure of your parse-tree. For any non-trivial language, the conflation of code and grammar always turns out convoluted and difficult to read. 31 | 32 | The grammars in Lark are EBNF-inspired, so they are especially easy to read & work with. 33 | 34 | ### 2. Always build a parse-tree (unless told not to) 35 | 36 | Trees are always simpler to work with than state-machines. 37 | 38 | 1. Trees allow you to see the "state-machine" visually 39 | 40 | 2. Trees allow your computation to be aware of previous and future states 41 | 42 | 3. Trees allow you to process the parse in steps, instead of forcing you to do it all at once. 43 | 44 | And anyway, every parse-tree can be replayed as a state-machine, so there is no loss of information. 45 | 46 | See this answer in more detail [here](https://github.com/erezsh/lark/issues/4). 47 | 48 | To improve performance, you can skip building the tree for LALR(1), by providing Lark with a transformer (see the [JSON example](https://github.com/erezsh/lark/blob/master/examples/json_parser.py)). 49 | 50 | ### 3. Earley is the default 51 | 52 | The Earley algorithm can accept *any* context-free grammar you throw at it (i.e. any grammar you can write in EBNF, it can parse). That makes it extremely friendly to beginners, who are not aware of the strange and arbitrary restrictions that LALR(1) places on its grammars. 53 | 54 | As the users grow to understand the structure of their grammar, the scope of their target language, and their performance requirements, they may choose to switch over to LALR(1) to gain a huge performance boost, possibly at the cost of some language features. 55 | 56 | Both Earley and LALR(1) can use the same grammar, as long as all constraints are satisfied. 57 | 58 | In short, "Premature optimization is the root of all evil." 59 | 60 | ### Other design features 61 | 62 | - Automatically resolve terminal collisions whenever possible 63 | 64 | - Automatically keep track of line & column numbers 65 | -------------------------------------------------------------------------------- /docs/recipes.md: -------------------------------------------------------------------------------- 1 | # Recipes 2 | 3 | A collection of recipes to use Lark and its various features 4 | 5 | 6 | ## Use a transformer to parse integer tokens 7 | 8 | Transformers are the common interface for processing matched rules and tokens. 9 | 10 | They can be used during parsing for better performance. 11 | 12 | ```python 13 | from lark import Lark, Transformer 14 | 15 | class T(Transformer): 16 | def INT(self, tok): 17 | "Convert the value of `tok` from string to int, while maintaining line number & column." 18 | return tok.update(value=int(tok)) 19 | 20 | parser = Lark(""" 21 | start: INT* 22 | %import common.INT 23 | %ignore " " 24 | """, parser="lalr", transformer=T()) 25 | 26 | print(parser.parse('3 14 159')) 27 | ``` 28 | 29 | Prints out: 30 | 31 | ```python 32 | Tree(start, [Token(INT, 3), Token(INT, 14), Token(INT, 159)]) 33 | ``` 34 | 35 | 36 | ## Collect all comments with lexer_callbacks 37 | 38 | `lexer_callbacks` can be used to interface with the lexer as it generates tokens. 39 | 40 | It accepts a dictionary of the form 41 | 42 | {TOKEN_TYPE: callback} 43 | 44 | Where callback is of type `f(Token) -> Token` 45 | 46 | It only works with the basic and contextual lexers. 47 | 48 | This has the same effect of using a transformer, but can also process ignored tokens. 49 | 50 | ```python 51 | from lark import Lark 52 | 53 | comments = [] 54 | 55 | parser = Lark(""" 56 | start: INT* 57 | 58 | COMMENT: /#.*/ 59 | 60 | %import common (INT, WS) 61 | %ignore COMMENT 62 | %ignore WS 63 | """, parser="lalr", lexer_callbacks={'COMMENT': comments.append}) 64 | 65 | parser.parse(""" 66 | 1 2 3 # hello 67 | # world 68 | 4 5 6 69 | """) 70 | 71 | print(comments) 72 | ``` 73 | 74 | Prints out: 75 | 76 | ```python 77 | [Token(COMMENT, '# hello'), Token(COMMENT, '# world')] 78 | ``` 79 | 80 | *Note: We don't have to return a token, because comments are ignored* 81 | 82 | 83 | ## CollapseAmbiguities 84 | 85 | Parsing ambiguous texts with earley and `ambiguity='explicit'` produces a single tree with `_ambig` nodes to mark where the ambiguity occurred. 86 | 87 | However, it's sometimes more convenient instead to work with a list of all possible unambiguous trees. 88 | 89 | Lark provides a utility transformer for that purpose: 90 | 91 | ```python 92 | from lark import Lark, Tree, Transformer 93 | from lark.visitors import CollapseAmbiguities 94 | 95 | grammar = """ 96 | !start: x y 97 | 98 | !x: "a" "b" 99 | | "ab" 100 | | "abc" 101 | 102 | !y: "c" "d" 103 | | "cd" 104 | | "d" 105 | 106 | """ 107 | parser = Lark(grammar, ambiguity='explicit') 108 | 109 | t = parser.parse('abcd') 110 | for x in CollapseAmbiguities().transform(t): 111 | print(x.pretty()) 112 | ``` 113 | 114 | This prints out: 115 | 116 | start 117 | x 118 | a 119 | b 120 | y 121 | c 122 | d 123 | 124 | start 125 | x ab 126 | y cd 127 | 128 | start 129 | x abc 130 | y d 131 | 132 | While convenient, this should be used carefully, as highly ambiguous trees will soon create an exponential explosion of such unambiguous derivations. 133 | 134 | 135 | ## Keeping track of parents when visiting 136 | 137 | The following visitor assigns a `parent` attribute for every node in the tree. 138 | 139 | If your tree nodes aren't unique (if there is a shared Tree instance), the assert will fail. 140 | 141 | ```python 142 | class Parent(Visitor): 143 | def __default__(self, tree): 144 | for subtree in tree.children: 145 | if isinstance(subtree, Tree): 146 | assert not hasattr(subtree, 'parent') 147 | subtree.parent = proxy(tree) 148 | ``` 149 | 150 | 151 | ## Unwinding VisitError after a transformer/visitor exception 152 | 153 | Errors that happen inside visitors and transformers get wrapped inside a `VisitError` exception. 154 | 155 | This can often be inconvenient, if you wish the actual error to propagate upwards, or if you want to catch it. 156 | 157 | But, it's easy to unwrap it at the point of calling the transformer, by catching it and raising the `VisitError.orig_exc` attribute. 158 | 159 | For example: 160 | ```python 161 | from lark import Lark, Transformer 162 | from lark.visitors import VisitError 163 | 164 | tree = Lark('start: "a"').parse('a') 165 | 166 | class T(Transformer): 167 | def start(self, x): 168 | raise KeyError("Original Exception") 169 | 170 | t = T() 171 | try: 172 | print( t.transform(tree)) 173 | except VisitError as e: 174 | raise e.orig_exc 175 | ``` 176 | 177 | 178 | ## Adding a Progress Bar to Parsing with tqdm 179 | 180 | Parsing large files can take a long time, even with the `parser='lalr'` option. To make this process more user-friendly, it's useful to add a progress bar. One way to achieve this is to use the `InteractiveParser` to display each token as it is processed. In this example, we use [tqdm](https://github.com/tqdm/tqdm), but it should be easy to adapt to other kinds of progress bars. 181 | 182 | ```python 183 | from tqdm import tqdm 184 | 185 | def parse_with_progress(parser: Lark, text: str, start=None): 186 | last = 0 187 | progress = tqdm(total=len(text)) 188 | pi = parser.parse_interactive(text, start=start) 189 | for token in pi.iter_parse(): 190 | if token.end_pos is not None: 191 | progress.update(token.end_pos - last) 192 | last = token.end_pos 193 | return pi.resume_parse() # Finish up and get the result 194 | ``` 195 | 196 | Keep in mind that this implementation relies on the `InteractiveParser` and, therefore, only works with the `LALR(1)` parser, and not `Earley`. 197 | 198 | 199 | ## Parsing a Language with Significant Indentation 200 | 201 | If your grammar needs to support significant indentation (e.g. Python, YAML), you will need to use 202 | the `Indenter` class. Take a look at the [indented tree example][indent] as well as the 203 | [Python grammar][python] for inspiration. 204 | 205 | [indent]: examples/indented_tree.html 206 | [python]: https://github.com/lark-parser/lark/blob/master/lark/grammars/python.lark 207 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # https://docs.readthedocs.io/en/stable/guides/specifying-dependencies.html#specifying-a-requirements-file 2 | pillow 3 | recommonmark 4 | requests==2.28.1 5 | sphinx-gallery 6 | sphinx_markdown_tables 7 | sphinx_rtd_theme>=1.2 8 | -------------------------------------------------------------------------------- /docs/tools.md: -------------------------------------------------------------------------------- 1 | # Tools (Stand-alone, Nearley) 2 | 3 | ## Stand-alone parser 4 | 5 | Lark can generate a stand-alone LALR(1) parser from a grammar. 6 | 7 | The resulting module provides the same interface as Lark, but with a fixed grammar, and reduced functionality. 8 | 9 | Run using: 10 | 11 | ```bash 12 | python -m lark.tools.standalone 13 | ``` 14 | 15 | For a play-by-play, read the [tutorial](http://blog.erezsh.com/create-a-stand-alone-lalr1-parser-in-python/) 16 | 17 | 18 | ## Importing grammars from Nearley.js 19 | 20 | Lark comes with a tool to convert grammars from [Nearley](https://github.com/Hardmath123/nearley), a popular Earley library for Javascript. It uses [Js2Py](https://github.com/PiotrDabkowski/Js2Py) to convert and run the Javascript postprocessing code segments. 21 | 22 | #### Requirements 23 | 24 | 1. Install Lark with the `nearley` component: 25 | ```bash 26 | pip install lark[nearley] 27 | ``` 28 | 29 | 2. Acquire a copy of the Nearley codebase. This can be done using: 30 | ```bash 31 | git clone https://github.com/Hardmath123/nearley 32 | ``` 33 | 34 | #### Usage 35 | 36 | The tool can be run using: 37 | 38 | ```bash 39 | python -m lark.tools.nearley 40 | ``` 41 | 42 | Here's an example of how to import nearley's calculator example into Lark: 43 | 44 | ```bash 45 | git clone https://github.com/Hardmath123/nearley 46 | python -m lark.tools.nearley nearley/examples/calculator/arithmetic.ne main ./nearley > ncalc.py 47 | ``` 48 | 49 | You can use the output as a regular python module: 50 | 51 | ```python 52 | >>> import ncalc 53 | >>> ncalc.parse('sin(pi/4) ^ e') 54 | 0.38981434460254655 55 | ``` 56 | 57 | The Nearley converter also supports an experimental converter for newer JavaScript (ES6+), using the `--es6` flag: 58 | 59 | ```bash 60 | git clone https://github.com/Hardmath123/nearley 61 | python -m lark.tools.nearley nearley/examples/calculator/arithmetic.ne main nearley --es6 > ncalc.py 62 | ``` 63 | 64 | #### Notes 65 | 66 | - Lark currently cannot import templates from Nearley 67 | 68 | - Lark currently cannot export grammars to Nearley 69 | 70 | These might get added in the future, if enough users ask for them. 71 | -------------------------------------------------------------------------------- /docs/tree_construction.md: -------------------------------------------------------------------------------- 1 | # Tree Construction Reference 2 | 3 | 4 | Lark builds a tree automatically based on the structure of the grammar, where each rule that is matched becomes a branch (node) in the tree, and its children are its matches, in the order of matching. 5 | 6 | For example, the rule `node: child1 child2` will create a tree node with two children. If it is matched as part of another rule (i.e. if it isn't the root), the new rule's tree node will become its parent. 7 | 8 | Using `item+` or `item*` will result in a list of items, equivalent to writing `item item item ..`. 9 | 10 | Using `item?` will return the item if it matched, or nothing. 11 | 12 | If `maybe_placeholders=True` (the default), then using `[item]` will return the item if it matched, or the value `None`, if it didn't. 13 | 14 | If `maybe_placeholders=False`, then `[]` behaves like `()?`. 15 | 16 | ## Terminals 17 | 18 | Terminals are always values in the tree, never branches. 19 | 20 | Lark filters out certain types of terminals by default, considering them punctuation: 21 | 22 | - Terminals that won't appear in the tree are: 23 | 24 | - Unnamed literals (like `"keyword"` or `"+"`) 25 | - Terminals whose name starts with an underscore (like `_DIGIT`) 26 | 27 | - Terminals that *will* appear in the tree are: 28 | 29 | - Unnamed regular expressions (like `/[0-9]/`) 30 | - Named terminals whose name starts with a letter (like `DIGIT`) 31 | 32 | Note: Terminals composed of literals and other terminals always include the entire match without filtering any part. 33 | 34 | **Example:** 35 | ``` 36 | start: PNAME pname 37 | 38 | PNAME: "(" NAME ")" 39 | pname: "(" NAME ")" 40 | 41 | NAME: /\w+/ 42 | %ignore /\s+/ 43 | ``` 44 | Lark will parse "(Hello) (World)" as: 45 | 46 | start 47 | (Hello) 48 | pname World 49 | 50 | Rules prefixed with `!` will retain all their literals regardless. 51 | 52 | 53 | 54 | 55 | **Example:** 56 | 57 | ```perl 58 | expr: "(" expr ")" 59 | | NAME+ 60 | 61 | NAME: /\w+/ 62 | 63 | %ignore " " 64 | ``` 65 | 66 | Lark will parse "((hello world))" as: 67 | 68 | expr 69 | expr 70 | expr 71 | "hello" 72 | "world" 73 | 74 | The brackets do not appear in the tree by design. The words appear because they are matched by a named terminal. 75 | 76 | 77 | ## Shaping the tree 78 | 79 | Users can alter the automatic construction of the tree using a collection of grammar features. 80 | 81 | ### Inlining rules with `_` 82 | 83 | Rules whose name begins with an underscore will be inlined into their containing rule. 84 | 85 | **Example:** 86 | 87 | ```perl 88 | start: "(" _greet ")" 89 | _greet: /\w+/ /\w+/ 90 | ``` 91 | 92 | Lark will parse "(hello world)" as: 93 | 94 | start 95 | "hello" 96 | "world" 97 | 98 | ### Conditionally inlining rules with `?` 99 | 100 | Rules that receive a question mark (?) at the beginning of their definition, will be inlined if they have a single child, after filtering. 101 | 102 | **Example:** 103 | 104 | ```ruby 105 | start: greet greet 106 | ?greet: "(" /\w+/ ")" 107 | | /\w+/ /\w+/ 108 | ``` 109 | 110 | Lark will parse "hello world (planet)" as: 111 | 112 | start 113 | greet 114 | "hello" 115 | "world" 116 | "planet" 117 | 118 | ### Pinning rule terminals with `!` 119 | 120 | Rules that begin with an exclamation mark will keep all their terminals (they won't get filtered). 121 | 122 | ```perl 123 | !expr: "(" expr ")" 124 | | NAME+ 125 | NAME: /\w+/ 126 | %ignore " " 127 | ``` 128 | 129 | Will parse "((hello world))" as: 130 | 131 | expr 132 | ( 133 | expr 134 | ( 135 | expr 136 | hello 137 | world 138 | ) 139 | ) 140 | 141 | Using the `!` prefix is usually a "code smell", and may point to a flaw in your grammar design. 142 | 143 | ### Aliasing rules 144 | 145 | Aliases - options in a rule can receive an alias. It will be then used as the branch name for the option, instead of the rule name. 146 | 147 | **Example:** 148 | 149 | ```ruby 150 | start: greet greet 151 | greet: "hello" 152 | | "world" -> planet 153 | ``` 154 | 155 | Lark will parse "hello world" as: 156 | 157 | start 158 | greet 159 | planet 160 | -------------------------------------------------------------------------------- /docs/visitors.rst: -------------------------------------------------------------------------------- 1 | Transformers & Visitors 2 | ======================= 3 | 4 | Transformers & Visitors provide a convenient interface to process the 5 | parse-trees that Lark returns. 6 | 7 | They are used by inheriting from the correct class (visitor or transformer), 8 | and implementing methods corresponding to the rule you wish to process. Each 9 | method accepts the children as an argument. That can be modified using the 10 | ``v_args`` decorator, which allows one to inline the arguments (akin to ``*args``), 11 | or add the tree ``meta`` property as an argument. 12 | 13 | See: `visitors.py`_ 14 | 15 | .. _visitors.py: https://github.com/lark-parser/lark/blob/master/lark/visitors.py 16 | 17 | Visitor 18 | ------- 19 | 20 | Visitors visit each node of the tree, and run the appropriate method on it according to the node's data. 21 | 22 | They work bottom-up, starting with the leaves and ending at the root of the tree. 23 | 24 | There are two classes that implement the visitor interface: 25 | 26 | - ``Visitor``: Visit every node (without recursion) 27 | - ``Visitor_Recursive``: Visit every node using recursion. Slightly faster. 28 | 29 | Example: 30 | :: 31 | 32 | class IncreaseAllNumbers(Visitor): 33 | def number(self, tree): 34 | assert tree.data == "number" 35 | tree.children[0] += 1 36 | 37 | IncreaseAllNumbers().visit(parse_tree) 38 | 39 | .. autoclass:: lark.visitors.Visitor 40 | :members: visit, visit_topdown, __default__ 41 | 42 | .. autoclass:: lark.visitors.Visitor_Recursive 43 | :members: visit, visit_topdown, __default__ 44 | 45 | Interpreter 46 | ----------- 47 | 48 | .. autoclass:: lark.visitors.Interpreter 49 | 50 | 51 | Example: 52 | :: 53 | 54 | class IncreaseSomeOfTheNumbers(Interpreter): 55 | def number(self, tree): 56 | tree.children[0] += 1 57 | 58 | def skip(self, tree): 59 | # skip this subtree. don't change any number node inside it. 60 | pass 61 | 62 | IncreaseSomeOfTheNumbers().visit(parse_tree) 63 | 64 | Transformer 65 | ----------- 66 | 67 | .. autoclass:: lark.visitors.Transformer 68 | :members: transform, __default__, __default_token__, __mul__ 69 | 70 | Example: 71 | :: 72 | 73 | from lark import Tree, Transformer 74 | 75 | class EvalExpressions(Transformer): 76 | def expr(self, args): 77 | return eval(args[0]) 78 | 79 | t = Tree('a', [Tree('expr', ['1+2'])]) 80 | print(EvalExpressions().transform( t )) 81 | 82 | # Prints: Tree(a, [3]) 83 | 84 | Example: 85 | :: 86 | 87 | class T(Transformer): 88 | INT = int 89 | NUMBER = float 90 | def NAME(self, name): 91 | return lookup_dict.get(name, name) 92 | 93 | T(visit_tokens=True).transform(tree) 94 | 95 | .. autoclass:: lark.visitors.Transformer_NonRecursive 96 | 97 | .. autoclass:: lark.visitors.Transformer_InPlace 98 | 99 | .. autoclass:: lark.visitors.Transformer_InPlaceRecursive 100 | 101 | v_args 102 | ------ 103 | 104 | .. autofunction:: lark.visitors.v_args 105 | 106 | merge_transformers 107 | ------------------ 108 | 109 | .. autofunction:: lark.visitors.merge_transformers 110 | 111 | Discard 112 | ------- 113 | 114 | ``Discard`` is the singleton instance of ``_DiscardType``. 115 | 116 | .. autoclass:: lark.visitors._DiscardType 117 | 118 | 119 | VisitError 120 | ---------- 121 | 122 | .. autoclass:: lark.exceptions.VisitError 123 | -------------------------------------------------------------------------------- /examples/README.rst: -------------------------------------------------------------------------------- 1 | Examples for Lark 2 | ================= 3 | 4 | **How to run the examples**: 5 | 6 | After cloning the repo, open the terminal into the root directory of the 7 | project, and run the following: 8 | 9 | .. code:: bash 10 | 11 | [lark]$ python -m examples. 12 | 13 | For example, the following will parse all the Python files in the 14 | standard library of your local installation: 15 | 16 | .. code:: bash 17 | 18 | [lark]$ python -m examples.advanced.python_parser 19 | 20 | Beginner Examples 21 | ~~~~~~~~~~~~~~~~~ 22 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/examples/__init__.py -------------------------------------------------------------------------------- /examples/advanced/README.rst: -------------------------------------------------------------------------------- 1 | Advanced Examples 2 | ~~~~~~~~~~~~~~~~~ 3 | -------------------------------------------------------------------------------- /examples/advanced/_json_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple JSON Parser 3 | ================== 4 | 5 | The code is short and clear, and outperforms every other parser (that's written in Python). 6 | For an explanation, check out the JSON parser tutorial at /docs/json_tutorial.md 7 | 8 | (this is here for use by the other examples) 9 | """ 10 | from lark import Lark, Transformer, v_args 11 | 12 | json_grammar = r""" 13 | ?start: value 14 | 15 | ?value: object 16 | | array 17 | | string 18 | | SIGNED_NUMBER -> number 19 | | "true" -> true 20 | | "false" -> false 21 | | "null" -> null 22 | 23 | array : "[" [value ("," value)*] "]" 24 | object : "{" [pair ("," pair)*] "}" 25 | pair : string ":" value 26 | 27 | string : ESCAPED_STRING 28 | 29 | %import common.ESCAPED_STRING 30 | %import common.SIGNED_NUMBER 31 | %import common.WS 32 | 33 | %ignore WS 34 | """ 35 | 36 | 37 | class TreeToJson(Transformer): 38 | @v_args(inline=True) 39 | def string(self, s): 40 | return s[1:-1].replace('\\"', '"') 41 | 42 | array = list 43 | pair = tuple 44 | object = dict 45 | number = v_args(inline=True)(float) 46 | 47 | null = lambda self, _: None 48 | true = lambda self, _: True 49 | false = lambda self, _: False 50 | 51 | 52 | ### Create the JSON parser with Lark, using the LALR algorithm 53 | json_parser = Lark(json_grammar, parser='lalr', 54 | # Using the basic lexer isn't required, and isn't usually recommended. 55 | # But, it's good enough for JSON, and it's slightly faster. 56 | lexer='basic', 57 | # Disabling propagate_positions and placeholders slightly improves speed 58 | propagate_positions=False, 59 | maybe_placeholders=False, 60 | # Using an internal transformer is faster and more memory efficient 61 | transformer=TreeToJson()) 62 | -------------------------------------------------------------------------------- /examples/advanced/conf_earley.py: -------------------------------------------------------------------------------- 1 | """ 2 | Earley’s dynamic lexer 3 | ====================== 4 | 5 | Demonstrates the power of Earley’s dynamic lexer on a toy configuration language 6 | 7 | Using a lexer for configuration files is tricky, because values don't 8 | have to be surrounded by delimiters. Using a basic lexer for this just won't work. 9 | 10 | In this example we use a dynamic lexer and let the Earley parser resolve the ambiguity. 11 | 12 | Another approach is to use the contextual lexer with LALR. It is less powerful than Earley, 13 | but it can handle some ambiguity when lexing and it's much faster. 14 | See examples/conf_lalr.py for an example of that approach. 15 | 16 | """ 17 | from lark import Lark 18 | 19 | parser = Lark(r""" 20 | start: _NL? section+ 21 | section: "[" NAME "]" _NL item+ 22 | item: NAME "=" VALUE? _NL 23 | 24 | NAME: /\w/+ 25 | VALUE: /./+ 26 | 27 | %import common.NEWLINE -> _NL 28 | %import common.WS_INLINE 29 | %ignore WS_INLINE 30 | """, parser="earley") 31 | 32 | def test(): 33 | sample_conf = """ 34 | [bla] 35 | 36 | a=Hello 37 | this="that",4 38 | empty= 39 | """ 40 | 41 | r = parser.parse(sample_conf) 42 | print (r.pretty()) 43 | 44 | if __name__ == '__main__': 45 | test() 46 | -------------------------------------------------------------------------------- /examples/advanced/conf_lalr.py: -------------------------------------------------------------------------------- 1 | """ 2 | LALR’s contextual lexer 3 | ======================= 4 | 5 | This example demonstrates the power of LALR's contextual lexer, 6 | by parsing a toy configuration language. 7 | 8 | The terminals `NAME` and `VALUE` overlap. They can match the same input. 9 | A basic lexer would arbitrarily choose one over the other, based on priority, 10 | which would lead to a (confusing) parse error. 11 | However, due to the unambiguous structure of the grammar, Lark's LALR(1) algorithm knows 12 | which one of them to expect at each point during the parse. 13 | The lexer then only matches the tokens that the parser expects. 14 | The result is a correct parse, something that is impossible with a regular lexer. 15 | 16 | Another approach is to use the Earley algorithm. 17 | It will handle more cases than the contextual lexer, but at the cost of performance. 18 | See examples/conf_earley.py for an example of that approach. 19 | """ 20 | from lark import Lark 21 | 22 | parser = Lark(r""" 23 | start: _NL? section+ 24 | section: "[" NAME "]" _NL item+ 25 | item: NAME "=" VALUE? _NL 26 | 27 | NAME: /\w/+ 28 | VALUE: /./+ 29 | 30 | %import common.NEWLINE -> _NL 31 | %import common.WS_INLINE 32 | %ignore WS_INLINE 33 | """, parser="lalr") 34 | 35 | 36 | sample_conf = """ 37 | [bla] 38 | a=Hello 39 | this="that",4 40 | empty= 41 | """ 42 | 43 | print(parser.parse(sample_conf).pretty()) 44 | -------------------------------------------------------------------------------- /examples/advanced/create_ast.py: -------------------------------------------------------------------------------- 1 | """ 2 | Creating an AST from the parse tree 3 | =================================== 4 | 5 | This example demonstrates how to transform a parse-tree into an AST using `lark.ast_utils`. 6 | 7 | create_transformer() collects every subclass of `Ast` subclass from the module, 8 | and creates a Lark transformer that builds the AST with no extra code. 9 | 10 | This example only works with Python 3. 11 | """ 12 | 13 | import sys 14 | from typing import List 15 | from dataclasses import dataclass 16 | 17 | from lark import Lark, ast_utils, Transformer, v_args 18 | from lark.tree import Meta 19 | 20 | this_module = sys.modules[__name__] 21 | 22 | 23 | # 24 | # Define AST 25 | # 26 | class _Ast(ast_utils.Ast): 27 | # This will be skipped by create_transformer(), because it starts with an underscore 28 | pass 29 | 30 | class _Statement(_Ast): 31 | # This will be skipped by create_transformer(), because it starts with an underscore 32 | pass 33 | 34 | @dataclass 35 | class Value(_Ast, ast_utils.WithMeta): 36 | "Uses WithMeta to include line-number metadata in the meta attribute" 37 | meta: Meta 38 | value: object 39 | 40 | @dataclass 41 | class Name(_Ast): 42 | name: str 43 | 44 | @dataclass 45 | class CodeBlock(_Ast, ast_utils.AsList): 46 | # Corresponds to code_block in the grammar 47 | statements: List[_Statement] 48 | 49 | @dataclass 50 | class If(_Statement): 51 | cond: Value 52 | then: CodeBlock 53 | 54 | @dataclass 55 | class SetVar(_Statement): 56 | # Corresponds to set_var in the grammar 57 | name: str 58 | value: Value 59 | 60 | @dataclass 61 | class Print(_Statement): 62 | value: Value 63 | 64 | 65 | class ToAst(Transformer): 66 | # Define extra transformation functions, for rules that don't correspond to an AST class. 67 | 68 | def STRING(self, s): 69 | # Remove quotation marks 70 | return s[1:-1] 71 | 72 | def DEC_NUMBER(self, n): 73 | return int(n) 74 | 75 | @v_args(inline=True) 76 | def start(self, x): 77 | return x 78 | 79 | # 80 | # Define Parser 81 | # 82 | 83 | parser = Lark(""" 84 | start: code_block 85 | 86 | code_block: statement+ 87 | 88 | ?statement: if | set_var | print 89 | 90 | if: "if" value "{" code_block "}" 91 | set_var: NAME "=" value ";" 92 | print: "print" value ";" 93 | 94 | value: name | STRING | DEC_NUMBER 95 | name: NAME 96 | 97 | %import python (NAME, STRING, DEC_NUMBER) 98 | %import common.WS 99 | %ignore WS 100 | """, 101 | parser="lalr", 102 | ) 103 | 104 | transformer = ast_utils.create_transformer(this_module, ToAst()) 105 | 106 | def parse(text): 107 | tree = parser.parse(text) 108 | return transformer.transform(tree) 109 | 110 | # 111 | # Test 112 | # 113 | 114 | if __name__ == '__main__': 115 | print(parse(""" 116 | a = 1; 117 | if a { 118 | print "a is 1"; 119 | a = 2; 120 | } 121 | """)) 122 | -------------------------------------------------------------------------------- /examples/advanced/custom_lexer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Custom lexer 3 | ============ 4 | 5 | Demonstrates using a custom lexer to parse a non-textual stream of data 6 | 7 | You can use a custom lexer to tokenize text when the lexers offered by Lark 8 | are too slow, or not flexible enough. 9 | 10 | You can also use it (as shown in this example) to tokenize streams of objects. 11 | """ 12 | from lark import Lark, Transformer, v_args 13 | from lark.lexer import Lexer, Token 14 | 15 | class TypeLexer(Lexer): 16 | def __init__(self, lexer_conf): 17 | pass 18 | 19 | def lex(self, data): 20 | for obj in data: 21 | if isinstance(obj, int): 22 | yield Token('INT', obj) 23 | elif isinstance(obj, (type(''), type(u''))): 24 | yield Token('STR', obj) 25 | else: 26 | raise TypeError(obj) 27 | 28 | parser = Lark(""" 29 | start: data_item+ 30 | data_item: STR INT* 31 | 32 | %declare STR INT 33 | """, parser='lalr', lexer=TypeLexer) 34 | 35 | 36 | class ParseToDict(Transformer): 37 | @v_args(inline=True) 38 | def data_item(self, name, *numbers): 39 | return name.value, [n.value for n in numbers] 40 | 41 | start = dict 42 | 43 | 44 | def test(): 45 | data = ['alice', 1, 27, 3, 'bob', 4, 'carrie', 'dan', 8, 6] 46 | 47 | print(data) 48 | 49 | tree = parser.parse(data) 50 | res = ParseToDict().transform(tree) 51 | 52 | print('-->') 53 | print(res) # prints {'alice': [1, 27, 3], 'bob': [4], 'carrie': [], 'dan': [8, 6]} 54 | 55 | 56 | if __name__ == '__main__': 57 | test() 58 | -------------------------------------------------------------------------------- /examples/advanced/dynamic_complete.py: -------------------------------------------------------------------------------- 1 | """ 2 | Using lexer dynamic_complete 3 | ============================ 4 | 5 | Demonstrates how to use ``lexer='dynamic_complete'`` and ``ambiguity='explicit'`` 6 | 7 | Sometimes you have data that is highly ambiguous or 'broken' in some sense. 8 | When using ``parser='earley'`` and ``lexer='dynamic_complete'``, Lark will be able 9 | parse just about anything as long as there is a valid way to generate it from 10 | the Grammar, including looking 'into' the Regexes. 11 | 12 | This examples shows how to parse a json input where the quotes have been 13 | replaced by underscores: ``{_foo_:{}, _bar_: [], _baz_: __}`` 14 | Notice that underscores might still appear inside strings, so a potentially 15 | valid reading of the above is: 16 | ``{"foo_:{}, _bar": [], "baz": ""}`` 17 | """ 18 | from pprint import pprint 19 | 20 | from lark import Lark, Tree, Transformer, v_args 21 | from lark.visitors import Transformer_InPlace 22 | 23 | GRAMMAR = r""" 24 | %import common.SIGNED_NUMBER 25 | %import common.WS_INLINE 26 | %import common.NEWLINE 27 | %ignore WS_INLINE 28 | 29 | ?start: value 30 | 31 | ?value: object 32 | | array 33 | | string 34 | | SIGNED_NUMBER -> number 35 | | "true" -> true 36 | | "false" -> false 37 | | "null" -> null 38 | 39 | array : "[" (value ("," value)*)? "]" 40 | object : "{" (pair ("," pair)*)? "}" 41 | pair : string ":" value 42 | 43 | string: STRING 44 | STRING : ESCAPED_STRING 45 | 46 | ESCAPED_STRING: QUOTE_CHAR _STRING_ESC_INNER QUOTE_CHAR 47 | QUOTE_CHAR: "_" 48 | 49 | _STRING_INNER: /.*/ 50 | _STRING_ESC_INNER: _STRING_INNER /(? var 26 | 27 | TEMPLATE_NAME: "$" NAME 28 | 29 | ?template_start: (stmt | testlist_star_expr _NEWLINE) 30 | 31 | %ignore /[\t \f]+/ // WS 32 | %ignore /\\[\t \f]*\r?\n/ // LINE_CONT 33 | %ignore COMMENT 34 | """ 35 | 36 | parser = Lark(TEMPLATED_PYTHON, parser='lalr', start=['single_input', 'file_input', 'eval_input', 'template_start'], postlex=PythonIndenter(), maybe_placeholders=False) 37 | 38 | 39 | def parse_template(s): 40 | return parser.parse(s + '\n', start='template_start') 41 | 42 | def parse_code(s): 43 | return parser.parse(s + '\n', start='file_input') 44 | 45 | 46 | # 47 | # 2. Define translations using templates (each template code is parsed to a template tree) 48 | # 49 | 50 | pytemplate = TemplateConf(parse=parse_template) 51 | 52 | translations_3to2 = { 53 | 'yield from $a': 54 | 'for _tmp in $a: yield _tmp', 55 | 56 | 'raise $e from $x': 57 | 'raise $e', 58 | 59 | '$a / $b': 60 | 'float($a) / $b', 61 | } 62 | translations_3to2 = {pytemplate(k): pytemplate(v) for k, v in translations_3to2.items()} 63 | 64 | # 65 | # 3. Translate and reconstruct Python 3 code into valid Python 2 code 66 | # 67 | 68 | python_reconstruct = PythonReconstructor(parser) 69 | 70 | def translate_py3to2(code): 71 | tree = parse_code(code) 72 | tree = TemplateTranslator(translations_3to2).translate(tree) 73 | return python_reconstruct.reconstruct(tree) 74 | 75 | 76 | # 77 | # Test Code 78 | # 79 | 80 | _TEST_CODE = ''' 81 | if a / 2 > 1: 82 | yield from [1,2,3] 83 | else: 84 | raise ValueError(a) from e 85 | 86 | ''' 87 | 88 | def test(): 89 | print(_TEST_CODE) 90 | print(' -----> ') 91 | print(translate_py3to2(_TEST_CODE)) 92 | 93 | if __name__ == '__main__': 94 | test() 95 | -------------------------------------------------------------------------------- /examples/advanced/python_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Grammar-complete Python Parser 3 | ============================== 4 | 5 | A fully-working Python 2 & 3 parser (but not production ready yet!) 6 | 7 | This example demonstrates usage of the included Python grammars 8 | """ 9 | import sys 10 | import os, os.path 11 | from io import open 12 | import glob, time 13 | 14 | from lark import Lark 15 | from lark.indenter import PythonIndenter 16 | 17 | 18 | kwargs = dict(postlex=PythonIndenter(), start='file_input') 19 | 20 | # Official Python grammar by Lark 21 | python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], parser='lalr', **kwargs) 22 | 23 | # Local Python2 grammar 24 | python_parser2 = Lark.open('python2.lark', rel_to=__file__, parser='lalr', **kwargs) 25 | python_parser2_earley = Lark.open('python2.lark', rel_to=__file__, parser='earley', lexer='basic', **kwargs) 26 | 27 | try: 28 | xrange 29 | except NameError: 30 | chosen_parser = python_parser3 31 | else: 32 | chosen_parser = python_parser2 33 | 34 | 35 | def _read(fn, *args): 36 | kwargs = {'encoding': 'iso-8859-1'} 37 | with open(fn, *args, **kwargs) as f: 38 | return f.read() 39 | 40 | def _get_lib_path(): 41 | if os.name == 'nt': 42 | if 'PyPy' in sys.version: 43 | return os.path.join(sys.base_prefix, 'lib-python', sys.winver) 44 | else: 45 | return os.path.join(sys.base_prefix, 'Lib') 46 | else: 47 | return [x for x in sys.path if x.endswith('%s.%s' % sys.version_info[:2])][0] 48 | 49 | def test_python_lib(): 50 | path = _get_lib_path() 51 | 52 | start = time.time() 53 | files = glob.glob(path+'/*.py') 54 | total_kb = 0 55 | for f in files: 56 | r = _read(os.path.join(path, f)) 57 | kb = len(r) / 1024 58 | print( '%s -\t%.1f kb' % (f, kb)) 59 | chosen_parser.parse(r + '\n') 60 | total_kb += kb 61 | 62 | end = time.time() 63 | print( "test_python_lib (%d files, %.1f kb), time: %.2f secs"%(len(files), total_kb, end-start) ) 64 | 65 | def test_earley_equals_lalr(): 66 | path = _get_lib_path() 67 | 68 | files = glob.glob(path+'/*.py') 69 | for f in files: 70 | print( f ) 71 | tree1 = python_parser2.parse(_read(os.path.join(path, f)) + '\n') 72 | tree2 = python_parser2_earley.parse(_read(os.path.join(path, f)) + '\n') 73 | assert tree1 == tree2 74 | 75 | 76 | if __name__ == '__main__': 77 | test_python_lib() 78 | # test_earley_equals_lalr() 79 | # python_parser3.parse(_read(sys.argv[1]) + '\n') 80 | -------------------------------------------------------------------------------- /examples/advanced/reconstruct_json.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reconstruct a JSON 3 | ================== 4 | 5 | Demonstrates the experimental text-reconstruction feature 6 | 7 | The Reconstructor takes a parse tree (already filtered from punctuation, of course), 8 | and reconstructs it into correct text, that can be parsed correctly. 9 | It can be useful for creating "hooks" to alter data before handing it to other parsers. You can also use it to generate samples from scratch. 10 | """ 11 | 12 | import json 13 | 14 | from lark import Lark 15 | from lark.reconstruct import Reconstructor 16 | 17 | from _json_parser import json_grammar 18 | 19 | test_json = ''' 20 | { 21 | "empty_object" : {}, 22 | "empty_array" : [], 23 | "booleans" : { "YES" : true, "NO" : false }, 24 | "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], 25 | "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ], 26 | "nothing" : null 27 | } 28 | ''' 29 | 30 | def test_earley(): 31 | 32 | json_parser = Lark(json_grammar, maybe_placeholders=False) 33 | tree = json_parser.parse(test_json) 34 | 35 | new_json = Reconstructor(json_parser).reconstruct(tree) 36 | print (new_json) 37 | print (json.loads(new_json) == json.loads(test_json)) 38 | 39 | 40 | def test_lalr(): 41 | 42 | json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False) 43 | tree = json_parser.parse(test_json) 44 | 45 | new_json = Reconstructor(json_parser).reconstruct(tree) 46 | print (new_json) 47 | print (json.loads(new_json) == json.loads(test_json)) 48 | 49 | test_earley() 50 | test_lalr() 51 | -------------------------------------------------------------------------------- /examples/advanced/reconstruct_python.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reconstruct Python 3 | ================== 4 | 5 | Demonstrates how Lark's experimental text-reconstruction feature can recreate 6 | functional Python code from its parse-tree, using just the correct grammar and 7 | a small formatter. 8 | 9 | """ 10 | 11 | from lark import Token, Lark 12 | from lark.reconstruct import Reconstructor 13 | from lark.indenter import PythonIndenter 14 | 15 | # Official Python grammar by Lark 16 | python_parser3 = Lark.open_from_package('lark', 'python.lark', ['grammars'], 17 | parser='lalr', postlex=PythonIndenter(), start='file_input', 18 | maybe_placeholders=False # Necessary for reconstructor 19 | ) 20 | 21 | SPACE_AFTER = set(',+-*/~@<>="|:') 22 | SPACE_BEFORE = (SPACE_AFTER - set(',:')) | set('\'') 23 | 24 | 25 | def special(sym): 26 | return Token('SPECIAL', sym.name) 27 | 28 | def postproc(items): 29 | stack = ['\n'] 30 | actions = [] 31 | last_was_whitespace = True 32 | for item in items: 33 | if isinstance(item, Token) and item.type == 'SPECIAL': 34 | actions.append(item.value) 35 | else: 36 | if actions: 37 | assert actions[0] == '_NEWLINE' and '_NEWLINE' not in actions[1:], actions 38 | 39 | for a in actions[1:]: 40 | if a == '_INDENT': 41 | stack.append(stack[-1] + ' ' * 4) 42 | else: 43 | assert a == '_DEDENT' 44 | stack.pop() 45 | actions.clear() 46 | yield stack[-1] 47 | last_was_whitespace = True 48 | if not last_was_whitespace: 49 | if item[0] in SPACE_BEFORE: 50 | yield ' ' 51 | yield item 52 | last_was_whitespace = item[-1].isspace() 53 | if not last_was_whitespace: 54 | if item[-1] in SPACE_AFTER: 55 | yield ' ' 56 | last_was_whitespace = True 57 | yield "\n" 58 | 59 | 60 | class PythonReconstructor: 61 | def __init__(self, parser): 62 | self._recons = Reconstructor(parser, {'_NEWLINE': special, '_DEDENT': special, '_INDENT': special}) 63 | 64 | def reconstruct(self, tree): 65 | return self._recons.reconstruct(tree, postproc) 66 | 67 | 68 | def test(): 69 | python_reconstructor = PythonReconstructor(python_parser3) 70 | 71 | self_contents = open(__file__).read() 72 | 73 | tree = python_parser3.parse(self_contents+'\n') 74 | output = python_reconstructor.reconstruct(tree) 75 | 76 | tree_new = python_parser3.parse(output) 77 | print(tree.pretty()) 78 | print(tree_new.pretty()) 79 | # assert tree.pretty() == tree_new.pretty() 80 | assert tree == tree_new 81 | 82 | print(output) 83 | 84 | 85 | if __name__ == '__main__': 86 | test() 87 | -------------------------------------------------------------------------------- /examples/advanced/template_lark.lark: -------------------------------------------------------------------------------- 1 | start: (_item | _NL)* 2 | 3 | _item: rule 4 | | token 5 | | statement 6 | 7 | _rule_or_token: RULE 8 | | TOKEN 9 | rule: RULE rule_params priority? ":" expansions{_rule_or_token} _NL 10 | token: TOKEN priority? ":" expansions{TOKEN} _NL 11 | 12 | rule_params: ["{" RULE ("," RULE)* "}"] 13 | 14 | priority: "." NUMBER 15 | 16 | statement: "%ignore" expansions{TOKEN} _NL -> ignore 17 | | "%import" import_path{_rule_or_token} ["->" _rule_or_token] _NL -> import 18 | | "%import" import_path{_rule_or_token} name_list{_rule_or_token} _NL -> multi_import 19 | | "%declare" TOKEN+ -> declare 20 | 21 | !import_path{name}: "."? name ("." name)* 22 | name_list{name}: "(" name ("," name)* ")" 23 | 24 | ?expansions{name}: alias{name} (_VBAR alias{name})* 25 | 26 | ?alias{name}: expansion{name} ["->" RULE] 27 | 28 | ?expansion{name}: expr{name}* 29 | 30 | ?expr{name}: atom{name} [OP | "~" NUMBER [".." NUMBER]] 31 | 32 | ?atom{name}: "(" expansions{name} ")" 33 | | "[" expansions{name} "]" -> maybe 34 | | value{name} 35 | 36 | ?value{name}: STRING ".." STRING -> literal_range 37 | | name 38 | | (REGEXP | STRING) -> literal 39 | | name "{" value{name} ("," value{name})* "}" -> template_usage 40 | 41 | _VBAR: _NL? "|" 42 | OP: /[+*]|[?](?![a-z])/ 43 | RULE: /!?[_?]?[a-z][_a-z0-9]*/ 44 | TOKEN: /_?[A-Z][_A-Z0-9]*/ 45 | STRING: _STRING "i"? 46 | REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/ 47 | _NL: /(\r?\n)+\s*/ 48 | 49 | %import common.ESCAPED_STRING -> _STRING 50 | %import common.INT -> NUMBER 51 | %import common.WS_INLINE 52 | 53 | COMMENT: /\s*/ "//" /[^\n]/* 54 | 55 | %ignore WS_INLINE 56 | %ignore COMMENT 57 | -------------------------------------------------------------------------------- /examples/advanced/templates.py: -------------------------------------------------------------------------------- 1 | """ 2 | Templates 3 | ========= 4 | 5 | This example shows how to use Lark's templates to achieve cleaner grammars 6 | 7 | """ 8 | from lark import Lark 9 | 10 | grammar = r""" 11 | start: list | dict 12 | 13 | list: "[" _seperated{atom, ","} "]" 14 | dict: "{" _seperated{key_value, ","} "}" 15 | key_value: atom ":" atom 16 | 17 | _seperated{x, sep}: x (sep x)* // Define a sequence of 'x sep x sep x ...' 18 | 19 | atom: NUMBER | ESCAPED_STRING 20 | 21 | %import common (NUMBER, ESCAPED_STRING, WS) 22 | %ignore WS 23 | """ 24 | 25 | 26 | parser = Lark(grammar) 27 | 28 | print(parser.parse('[1, "a", 2]')) 29 | print(parser.parse('{"a": 2, "b": 6}')) 30 | -------------------------------------------------------------------------------- /examples/advanced/tree_forest_transformer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Transform a Forest 3 | ================== 4 | 5 | This example demonstrates how to subclass ``TreeForestTransformer`` to 6 | directly transform a SPPF. 7 | """ 8 | 9 | from lark import Lark 10 | from lark.parsers.earley_forest import TreeForestTransformer, handles_ambiguity, Discard 11 | 12 | class CustomTransformer(TreeForestTransformer): 13 | 14 | @handles_ambiguity 15 | def sentence(self, trees): 16 | return next(tree for tree in trees if tree.data == 'simple') 17 | 18 | def simple(self, children): 19 | children.append('.') 20 | return self.tree_class('simple', children) 21 | 22 | def adj(self, children): 23 | return Discard 24 | 25 | def __default_token__(self, token): 26 | return token.capitalize() 27 | 28 | grammar = """ 29 | sentence: noun verb noun -> simple 30 | | noun verb "like" noun -> comparative 31 | 32 | noun: adj? NOUN 33 | verb: VERB 34 | adj: ADJ 35 | 36 | NOUN: "flies" | "bananas" | "fruit" 37 | VERB: "like" | "flies" 38 | ADJ: "fruit" 39 | 40 | %import common.WS 41 | %ignore WS 42 | """ 43 | 44 | parser = Lark(grammar, start='sentence', ambiguity='forest') 45 | sentence = 'fruit flies like bananas' 46 | forest = parser.parse(sentence) 47 | 48 | tree = CustomTransformer(resolve_ambiguity=False).transform(forest) 49 | print(tree.pretty()) 50 | 51 | # Output: 52 | # 53 | # simple 54 | # noun Flies 55 | # verb Like 56 | # noun Bananas 57 | # . 58 | # 59 | -------------------------------------------------------------------------------- /examples/calc.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic calculator 3 | ================ 4 | 5 | A simple example of a REPL calculator 6 | 7 | This example shows how to write a basic calculator with variables. 8 | """ 9 | from lark import Lark, Transformer, v_args 10 | 11 | 12 | try: 13 | input = raw_input # For Python2 compatibility 14 | except NameError: 15 | pass 16 | 17 | 18 | calc_grammar = """ 19 | ?start: sum 20 | | NAME "=" sum -> assign_var 21 | 22 | ?sum: product 23 | | sum "+" product -> add 24 | | sum "-" product -> sub 25 | 26 | ?product: atom 27 | | product "*" atom -> mul 28 | | product "/" atom -> div 29 | 30 | ?atom: NUMBER -> number 31 | | "-" atom -> neg 32 | | NAME -> var 33 | | "(" sum ")" 34 | 35 | %import common.CNAME -> NAME 36 | %import common.NUMBER 37 | %import common.WS_INLINE 38 | 39 | %ignore WS_INLINE 40 | """ 41 | 42 | 43 | @v_args(inline=True) # Affects the signatures of the methods 44 | class CalculateTree(Transformer): 45 | from operator import add, sub, mul, truediv as div, neg 46 | number = float 47 | 48 | def __init__(self): 49 | self.vars = {} 50 | 51 | def assign_var(self, name, value): 52 | self.vars[name] = value 53 | return value 54 | 55 | def var(self, name): 56 | try: 57 | return self.vars[name] 58 | except KeyError: 59 | raise Exception("Variable not found: %s" % name) 60 | 61 | 62 | calc_parser = Lark(calc_grammar, parser='lalr', transformer=CalculateTree()) 63 | calc = calc_parser.parse 64 | 65 | 66 | def main(): 67 | while True: 68 | try: 69 | s = input('> ') 70 | except EOFError: 71 | break 72 | print(calc(s)) 73 | 74 | 75 | def test(): 76 | print(calc("a = 1+2")) 77 | print(calc("1+a*-3")) 78 | 79 | 80 | if __name__ == '__main__': 81 | # test() 82 | main() 83 | -------------------------------------------------------------------------------- /examples/composition/README.rst: -------------------------------------------------------------------------------- 1 | Grammar Composition 2 | =================== 3 | 4 | This example shows how to do grammar composition in Lark, by creating a new 5 | file format that allows both CSV and JSON to co-exist. 6 | 7 | We show how, by using namespaces, Lark grammars and their transformers can be fully reused - 8 | they don't need to care if their grammar is used directly, or being imported, or who is doing the importing. 9 | 10 | See `main.py`_ for more details. 11 | 12 | .. _main.py: https://github.com/lark-parser/lark/blob/master/examples/composition/main.py 13 | -------------------------------------------------------------------------------- /examples/composition/combined_csv_and_json.txt: -------------------------------------------------------------------------------- 1 | {"header": ["this", "is", "json", 1111]} 2 | # file lines author 3 | data.json 12 Robin 4 | data.csv 30 erezsh 5 | compiler.py 123123 Megalng 6 | {"footer": "done"} 7 | -------------------------------------------------------------------------------- /examples/composition/csv.lark: -------------------------------------------------------------------------------- 1 | start: header _NL row+ 2 | header: "#" " "? (WORD _SEPARATOR?)+ 3 | row: (_anything _SEPARATOR?)+ _NL 4 | _anything: INT | WORD | NON_SEPARATOR_STRING | FLOAT | SIGNED_FLOAT 5 | NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ 6 | _SEPARATOR: /[ ]+/ 7 | | "\t" 8 | | "," 9 | 10 | %import common.NEWLINE -> _NL 11 | %import common.WORD 12 | %import common.INT 13 | %import common.FLOAT 14 | %import common.SIGNED_FLOAT 15 | -------------------------------------------------------------------------------- /examples/composition/eval_csv.py: -------------------------------------------------------------------------------- 1 | "Transformer for evaluating csv.lark" 2 | 3 | from lark import Transformer 4 | 5 | class CsvTreeToPandasDict(Transformer): 6 | INT = int 7 | FLOAT = float 8 | SIGNED_FLOAT = float 9 | WORD = str 10 | NON_SEPARATOR_STRING = str 11 | 12 | def row(self, children): 13 | return children 14 | 15 | def start(self, children): 16 | data = {} 17 | 18 | header = children[0].children 19 | for heading in header: 20 | data[heading] = [] 21 | 22 | for row in children[1:]: 23 | for i, element in enumerate(row): 24 | data[header[i]].append(element) 25 | 26 | return data 27 | -------------------------------------------------------------------------------- /examples/composition/eval_json.py: -------------------------------------------------------------------------------- 1 | "Transformer for evaluating json.lark" 2 | 3 | from lark import Transformer, v_args 4 | 5 | class JsonTreeToJson(Transformer): 6 | @v_args(inline=True) 7 | def string(self, s): 8 | return s[1:-1].replace('\\"', '"') 9 | 10 | array = list 11 | pair = tuple 12 | object = dict 13 | number = v_args(inline=True)(float) 14 | 15 | null = lambda self, _: None 16 | true = lambda self, _: True 17 | false = lambda self, _: False 18 | -------------------------------------------------------------------------------- /examples/composition/json.lark: -------------------------------------------------------------------------------- 1 | ?start: value 2 | 3 | ?value: object 4 | | array 5 | | string 6 | | SIGNED_NUMBER -> number 7 | | "true" -> true 8 | | "false" -> false 9 | | "null" -> null 10 | 11 | array : "[" _WS? [value ("," _WS? value)*] "]" 12 | object : "{" _WS? [pair ("," _WS? pair)*] "}" 13 | pair : string ":" _WS value 14 | 15 | string : ESCAPED_STRING 16 | 17 | %import common.ESCAPED_STRING 18 | %import common.SIGNED_NUMBER 19 | %import common.WS -> _WS 20 | -------------------------------------------------------------------------------- /examples/composition/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Grammar Composition 3 | =================== 4 | 5 | This example shows how to do grammar composition in Lark, by creating a new 6 | file format that allows both CSV and JSON to co-exist. 7 | 8 | 1) We define ``storage.lark``, which imports both ``csv.lark`` and ``json.lark``, 9 | and allows them to be used one after the other. 10 | 11 | In the generated tree, each imported rule/terminal is automatically prefixed (with ``json__`` or ``csv__), 12 | which creates an implicit namespace and allows them to coexist without collisions. 13 | 14 | 2) We merge their respective transformers (unaware of each other) into a new base transformer. 15 | The resulting transformer can evaluate both JSON and CSV in the parse tree. 16 | 17 | The methods of each transformer are renamed into their appropriate namespace, using the given prefix. 18 | This approach allows full re-use: the transformers don't need to care if their grammar is used directly, 19 | or being imported, or who is doing the importing. 20 | 21 | """ 22 | from pathlib import Path 23 | from lark import Lark 24 | from json import dumps 25 | from lark.visitors import Transformer, merge_transformers 26 | 27 | from eval_csv import CsvTreeToPandasDict 28 | from eval_json import JsonTreeToJson 29 | 30 | __dir__ = Path(__file__).parent 31 | 32 | class Storage(Transformer): 33 | def start(self, children): 34 | return children 35 | 36 | storage_transformer = merge_transformers(Storage(), csv=CsvTreeToPandasDict(), json=JsonTreeToJson()) 37 | 38 | parser = Lark.open("storage.lark", rel_to=__file__) 39 | 40 | def main(): 41 | json_tree = parser.parse(dumps({"test": "a", "dict": { "list": [1, 1.2] }})) 42 | res = storage_transformer.transform(json_tree) 43 | print("Just JSON: ", res) 44 | 45 | csv_json_tree = parser.parse(open(__dir__ / 'combined_csv_and_json.txt').read()) 46 | res = storage_transformer.transform(csv_json_tree) 47 | print("JSON + CSV: ", dumps(res, indent=2)) 48 | 49 | 50 | if __name__ == "__main__": 51 | main() 52 | -------------------------------------------------------------------------------- /examples/composition/storage.lark: -------------------------------------------------------------------------------- 1 | start: (csv__start | json__start _NL?)+ 2 | 3 | // Renaming of the import variables is required, as they receive the namespace of this file. 4 | // See: https://github.com/lark-parser/lark/pull/973#issuecomment-907287565 5 | %import .csv.start -> csv__start 6 | %import .json.start -> json__start 7 | 8 | %import .csv._NL -> _NL 9 | -------------------------------------------------------------------------------- /examples/fruitflies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/examples/fruitflies.png -------------------------------------------------------------------------------- /examples/fruitflies.py: -------------------------------------------------------------------------------- 1 | """ 2 | Handling Ambiguity 3 | ================== 4 | 5 | A demonstration of ambiguity 6 | 7 | This example shows how to use get explicit ambiguity from Lark's Earley parser. 8 | 9 | """ 10 | import sys 11 | from lark import Lark, tree 12 | 13 | grammar = """ 14 | sentence: noun verb noun -> simple 15 | | noun verb "like" noun -> comparative 16 | 17 | noun: adj? NOUN 18 | verb: VERB 19 | adj: ADJ 20 | 21 | NOUN: "flies" | "bananas" | "fruit" 22 | VERB: "like" | "flies" 23 | ADJ: "fruit" 24 | 25 | %import common.WS 26 | %ignore WS 27 | """ 28 | 29 | parser = Lark(grammar, start='sentence', ambiguity='explicit') 30 | 31 | sentence = 'fruit flies like bananas' 32 | 33 | def make_png(filename): 34 | tree.pydot__tree_to_png( parser.parse(sentence), filename) 35 | 36 | def make_dot(filename): 37 | tree.pydot__tree_to_dot( parser.parse(sentence), filename) 38 | 39 | if __name__ == '__main__': 40 | print(parser.parse(sentence).pretty()) 41 | # make_png(sys.argv[1]) 42 | # make_dot(sys.argv[1]) 43 | 44 | # Output: 45 | # 46 | # _ambig 47 | # comparative 48 | # noun fruit 49 | # verb flies 50 | # noun bananas 51 | # simple 52 | # noun 53 | # fruit 54 | # flies 55 | # verb like 56 | # noun bananas 57 | # 58 | # (or view a nicer version at "./fruitflies.png") 59 | -------------------------------------------------------------------------------- /examples/grammars/README.rst: -------------------------------------------------------------------------------- 1 | Example Grammars 2 | ================ 3 | 4 | This directory is a collection of lark grammars, taken from real world projects. 5 | 6 | - `Verilog`_ - Taken from https://github.com/circuitgraph/circuitgraph/blob/main/circuitgraph/parsing/verilog.lark 7 | 8 | .. _Verilog: https://github.com/lark-parser/lark/blob/master/examples/grammars/verilog.lark 9 | -------------------------------------------------------------------------------- /examples/grammars/verilog.lark: -------------------------------------------------------------------------------- 1 | // Taken from https://github.com/circuitgraph/circuitgraph/blob/master/circuitgraph/parsing/verilog.lark 2 | // Following https://www.verilog.com/VerilogBNF.html 3 | 4 | // 1. Source Text 5 | start: description* 6 | 7 | ?description: module 8 | 9 | module: "module" name_of_module list_of_ports? ";" module_item* "endmodule" 10 | 11 | ?name_of_module: IDENTIFIER 12 | 13 | list_of_ports: "(" port ("," port)* ")" 14 | 15 | ?port: IDENTIFIER 16 | 17 | ?module_item: input_declaration 18 | | output_declaration 19 | | net_declaration 20 | | module_instantiation 21 | | continuous_assign 22 | 23 | 24 | // 2. Declarations 25 | input_declaration: "input" list_of_variables ";" 26 | 27 | output_declaration: "output" list_of_variables ";" 28 | 29 | net_declaration: "wire" list_of_variables ";" 30 | 31 | continuous_assign: "assign" list_of_assignments ";" 32 | 33 | list_of_variables: IDENTIFIER ("," IDENTIFIER)* 34 | 35 | list_of_assignments: assignment ("," assignment)* 36 | 37 | 38 | // 3. Primitive Instances 39 | // These are merged with module instantiations 40 | 41 | // 4. Module Instantiations 42 | module_instantiation: name_of_module module_instance ("," module_instance)* ";" 43 | 44 | module_instance: name_of_instance "(" list_of_module_connections ")" 45 | 46 | ?name_of_instance: IDENTIFIER 47 | 48 | list_of_module_connections: module_port_connection ("," module_port_connection)* 49 | | named_port_connection ("," named_port_connection)* 50 | 51 | module_port_connection: expression 52 | 53 | named_port_connection: "." IDENTIFIER "(" expression ")" 54 | 55 | 56 | // 5. Behavioral Statements 57 | assignment: lvalue "=" expression 58 | 59 | 60 | // 6. Specify Section 61 | 62 | 63 | // 7. Expressions 64 | ?lvalue: identifier 65 | 66 | expression: condition 67 | 68 | ?constant_value: constant_zero 69 | | constant_one 70 | | constant_x 71 | 72 | constant_zero: "1'b0" 73 | | "1'h0" 74 | 75 | constant_one: "1'b1" 76 | | "1'h1" 77 | 78 | constant_x: "1'bx" 79 | | "1'hx" 80 | 81 | ?condition : or 82 | | ternary 83 | 84 | ?ternary: or "?" or ":" or 85 | 86 | ?or : xor 87 | | or_gate 88 | 89 | ?or_gate: or "|" xor 90 | 91 | ?xor : and 92 | | xor_gate 93 | | xnor_gate 94 | 95 | ?xor_gate: xor "^" and 96 | 97 | ?xnor_gate: xor "~^" and 98 | | xor "^~" and 99 | 100 | ?and : unary 101 | | and_gate 102 | 103 | ?and_gate: and "&" unary 104 | 105 | ?unary : primary 106 | | not_gate 107 | 108 | not_gate: ( "!" | "~" ) primary 109 | 110 | ?primary : IDENTIFIER 111 | | constant_value 112 | | "(" or ")" 113 | 114 | 115 | // 8. General 116 | ?identifier: IDENTIFIER 117 | 118 | IDENTIFIER: CNAME 119 | | ESCAPED_IDENTIFIER 120 | 121 | 122 | // Lark 123 | ESCAPED_IDENTIFIER: /\\([^\s]+)/ 124 | COMMENT: "//" /[^\n]*/ NEWLINE 125 | NEWLINE: "\n" 126 | MULTILINE_COMMENT: /\/\*(\*(?!\/)|[^*])*\*\// 127 | 128 | %import common.CNAME 129 | %import common.ESCAPED_STRING 130 | %import common.WS 131 | 132 | %ignore WS 133 | %ignore COMMENT 134 | %ignore MULTILINE_COMMENT 135 | %ignore NEWLINE 136 | -------------------------------------------------------------------------------- /examples/indented_tree.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parsing Indentation 3 | =================== 4 | 5 | A demonstration of parsing indentation (“whitespace significant” language) 6 | and the usage of the ``Indenter`` class. 7 | 8 | Since indentation is context-sensitive, a postlex stage is introduced to 9 | manufacture ``INDENT``/``DEDENT`` tokens. 10 | 11 | It is crucial for the indenter that the ``NL_type`` matches the spaces (and 12 | tabs) after the newline. 13 | 14 | If your whitespace-significant grammar supports comments, then ``NL_type`` 15 | must match those comments too. Otherwise, comments that appear in the middle 16 | of a line will `confuse Lark`_. 17 | 18 | .. _`confuse Lark`: https://github.com/lark-parser/lark/issues/863 19 | """ 20 | from lark import Lark 21 | from lark.indenter import Indenter 22 | 23 | tree_grammar = r""" 24 | %import common.CNAME -> NAME 25 | %import common.WS_INLINE 26 | %import common.SH_COMMENT 27 | %ignore WS_INLINE 28 | %ignore SH_COMMENT 29 | %declare _INDENT _DEDENT 30 | 31 | ?start: _NL* tree 32 | tree: NAME _NL [_INDENT tree+ _DEDENT] 33 | _NL: (/\r?\n[\t ]*/ | SH_COMMENT)+ 34 | """ 35 | 36 | class TreeIndenter(Indenter): 37 | NL_type = '_NL' 38 | OPEN_PAREN_types = [] 39 | CLOSE_PAREN_types = [] 40 | INDENT_type = '_INDENT' 41 | DEDENT_type = '_DEDENT' 42 | tab_len = 8 43 | 44 | parser = Lark(tree_grammar, parser='lalr', postlex=TreeIndenter()) 45 | 46 | test_tree = """ 47 | a 48 | # check this comment out 49 | b 50 | c 51 | d 52 | e 53 | f 54 | g 55 | """ 56 | 57 | def test(): 58 | print(parser.parse(test_tree).pretty()) 59 | 60 | if __name__ == '__main__': 61 | test() 62 | -------------------------------------------------------------------------------- /examples/json_parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple JSON Parser 3 | ================== 4 | 5 | The code is short and clear, and outperforms every other parser (that's written in Python). 6 | For an explanation, check out the JSON parser tutorial at /docs/json_tutorial.md 7 | """ 8 | import sys 9 | 10 | from lark import Lark, Transformer, v_args 11 | 12 | json_grammar = r""" 13 | ?start: value 14 | 15 | ?value: object 16 | | array 17 | | string 18 | | SIGNED_NUMBER -> number 19 | | "true" -> true 20 | | "false" -> false 21 | | "null" -> null 22 | 23 | array : "[" [value ("," value)*] "]" 24 | object : "{" [pair ("," pair)*] "}" 25 | pair : string ":" value 26 | 27 | string : ESCAPED_STRING 28 | 29 | %import common.ESCAPED_STRING 30 | %import common.SIGNED_NUMBER 31 | %import common.WS 32 | 33 | %ignore WS 34 | """ 35 | 36 | 37 | class TreeToJson(Transformer): 38 | @v_args(inline=True) 39 | def string(self, s): 40 | return s[1:-1].replace('\\"', '"') 41 | 42 | array = list 43 | pair = tuple 44 | object = dict 45 | number = v_args(inline=True)(float) 46 | 47 | null = lambda self, _: None 48 | true = lambda self, _: True 49 | false = lambda self, _: False 50 | 51 | 52 | ### Create the JSON parser with Lark, using the Earley algorithm 53 | # json_parser = Lark(json_grammar, parser='earley', lexer='basic') 54 | # def parse(x): 55 | # return TreeToJson().transform(json_parser.parse(x)) 56 | 57 | ### Create the JSON parser with Lark, using the LALR algorithm 58 | json_parser = Lark(json_grammar, parser='lalr', 59 | # Using the basic lexer isn't required, and isn't usually recommended. 60 | # But, it's good enough for JSON, and it's slightly faster. 61 | lexer='basic', 62 | # Disabling propagate_positions and placeholders slightly improves speed 63 | propagate_positions=False, 64 | maybe_placeholders=False, 65 | # Using an internal transformer is faster and more memory efficient 66 | transformer=TreeToJson()) 67 | parse = json_parser.parse 68 | 69 | 70 | def test(): 71 | test_json = ''' 72 | { 73 | "empty_object" : {}, 74 | "empty_array" : [], 75 | "booleans" : { "YES" : true, "NO" : false }, 76 | "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], 77 | "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ], 78 | "nothing" : null 79 | } 80 | ''' 81 | 82 | j = parse(test_json) 83 | print(j) 84 | import json 85 | assert j == json.loads(test_json) 86 | 87 | 88 | if __name__ == '__main__': 89 | # test() 90 | with open(sys.argv[1]) as f: 91 | print(parse(f.read())) 92 | -------------------------------------------------------------------------------- /examples/lark_grammar.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lark Grammar 3 | ============ 4 | 5 | A reference implementation of the Lark grammar (using LALR(1)) 6 | """ 7 | import lark 8 | from pathlib import Path 9 | 10 | examples_path = Path(__file__).parent 11 | lark_path = Path(lark.__file__).parent 12 | 13 | parser = lark.Lark.open(lark_path / 'grammars/lark.lark', rel_to=__file__, parser="lalr") 14 | 15 | 16 | grammar_files = [ 17 | examples_path / 'advanced/python2.lark', 18 | examples_path / 'relative-imports/multiples.lark', 19 | examples_path / 'relative-imports/multiple2.lark', 20 | examples_path / 'relative-imports/multiple3.lark', 21 | examples_path / 'tests/no_newline_at_end.lark', 22 | examples_path / 'tests/negative_priority.lark', 23 | examples_path / 'standalone/json.lark', 24 | lark_path / 'grammars/common.lark', 25 | lark_path / 'grammars/lark.lark', 26 | lark_path / 'grammars/unicode.lark', 27 | lark_path / 'grammars/python.lark', 28 | ] 29 | 30 | def test(): 31 | for grammar_file in grammar_files: 32 | tree = parser.parse(open(grammar_file).read()) 33 | print("All grammars parsed successfully") 34 | 35 | if __name__ == '__main__': 36 | test() 37 | -------------------------------------------------------------------------------- /examples/relative-imports/multiple2.lark: -------------------------------------------------------------------------------- 1 | start: ("0" | "1")* "0" 2 | -------------------------------------------------------------------------------- /examples/relative-imports/multiple3.lark: -------------------------------------------------------------------------------- 1 | start: mod0mod0+ 2 | 3 | mod0mod0: "0" | "1" mod1mod0 4 | mod1mod0: "1" | "0" mod2mod1 mod1mod0 5 | mod2mod1: "0" | "1" mod2mod1 6 | -------------------------------------------------------------------------------- /examples/relative-imports/multiples.lark: -------------------------------------------------------------------------------- 1 | start: "2:" multiple2 2 | | "3:" multiple3 3 | 4 | %import .multiple2.start -> multiple2 5 | %import .multiple3.start -> multiple3 6 | -------------------------------------------------------------------------------- /examples/relative-imports/multiples.py: -------------------------------------------------------------------------------- 1 | # 2 | # This example demonstrates relative imports with rule rewrite 3 | # see multiples.lark 4 | # 5 | 6 | # 7 | # if b is a number written in binary, and m is either 2 or 3, 8 | # the grammar aims to recognise m:b iif b is a multiple of m 9 | # 10 | # for example, 3:1001 is recognised 11 | # because 9 (0b1001) is a multiple of 3 12 | # 13 | 14 | from lark import Lark, UnexpectedInput 15 | 16 | parser = Lark.open('multiples.lark', rel_to=__file__, parser='lalr') 17 | 18 | def is_in_grammar(data): 19 | try: 20 | parser.parse(data) 21 | except UnexpectedInput: 22 | return False 23 | return True 24 | 25 | for n_dec in range(100): 26 | n_bin = bin(n_dec)[2:] 27 | assert is_in_grammar('2:{}'.format(n_bin)) == (n_dec % 2 == 0) 28 | assert is_in_grammar('3:{}'.format(n_bin)) == (n_dec % 3 == 0) 29 | -------------------------------------------------------------------------------- /examples/standalone/README.rst: -------------------------------------------------------------------------------- 1 | Standalone example 2 | ================== 3 | 4 | To initialize, cd to this folder, and run: 5 | 6 | .. code-block:: bash 7 | 8 | ./create_standalone.sh 9 | 10 | Or: 11 | 12 | .. code-block:: bash 13 | 14 | python -m lark.tools.standalone json.lark > json_parser.py 15 | 16 | Then run using: 17 | 18 | .. code-block:: bash 19 | 20 | python json_parser_main.py 21 | -------------------------------------------------------------------------------- /examples/standalone/create_standalone.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | PYTHONPATH=../.. python -m lark.tools.standalone json.lark > json_parser.py 3 | -------------------------------------------------------------------------------- /examples/standalone/json.lark: -------------------------------------------------------------------------------- 1 | ?start: value 2 | 3 | ?value: object 4 | | array 5 | | string 6 | | SIGNED_NUMBER -> number 7 | | "true" -> true 8 | | "false" -> false 9 | | "null" -> null 10 | 11 | array : "[" [value ("," value)*] "]" 12 | object : "{" [pair ("," pair)*] "}" 13 | pair : string ":" value 14 | 15 | string : ESCAPED_STRING 16 | 17 | %import common.ESCAPED_STRING 18 | %import common.SIGNED_NUMBER 19 | %import common.WS 20 | 21 | %ignore WS 22 | -------------------------------------------------------------------------------- /examples/standalone/json_parser_main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Standalone Parser 3 | =================================== 4 | 5 | This example demonstrates how to generate and use the standalone parser, 6 | using the JSON example. 7 | 8 | See README.md for more details. 9 | """ 10 | 11 | import sys 12 | 13 | from json_parser import Lark_StandAlone, Transformer, v_args 14 | 15 | inline_args = v_args(inline=True) 16 | 17 | class TreeToJson(Transformer): 18 | @inline_args 19 | def string(self, s): 20 | return s[1:-1].replace('\\"', '"') 21 | 22 | array = list 23 | pair = tuple 24 | object = dict 25 | number = inline_args(float) 26 | 27 | null = lambda self, _: None 28 | true = lambda self, _: True 29 | false = lambda self, _: False 30 | 31 | 32 | parser = Lark_StandAlone(transformer=TreeToJson()) 33 | 34 | if __name__ == '__main__': 35 | with open(sys.argv[1]) as f: 36 | print(parser.parse(f.read())) 37 | -------------------------------------------------------------------------------- /examples/tests/negative_priority.lark: -------------------------------------------------------------------------------- 1 | start: r 2 | r.-1: "a" 3 | -------------------------------------------------------------------------------- /examples/tests/no_newline_at_end.lark: -------------------------------------------------------------------------------- 1 | start: "a" 2 | -------------------------------------------------------------------------------- /examples/turtle_dsl.py: -------------------------------------------------------------------------------- 1 | """ 2 | Turtle DSL 3 | ========== 4 | 5 | Implements a LOGO-like toy language for Python’s turtle, with interpreter. 6 | """ 7 | 8 | try: 9 | input = raw_input # For Python2 compatibility 10 | except NameError: 11 | pass 12 | 13 | import turtle 14 | 15 | from lark import Lark 16 | 17 | turtle_grammar = """ 18 | start: instruction+ 19 | 20 | instruction: MOVEMENT NUMBER -> movement 21 | | "c" COLOR [COLOR] -> change_color 22 | | "fill" code_block -> fill 23 | | "repeat" NUMBER code_block -> repeat 24 | 25 | code_block: "{" instruction+ "}" 26 | 27 | MOVEMENT: "f"|"b"|"l"|"r" 28 | COLOR: LETTER+ 29 | 30 | %import common.LETTER 31 | %import common.INT -> NUMBER 32 | %import common.WS 33 | %ignore WS 34 | """ 35 | 36 | parser = Lark(turtle_grammar) 37 | 38 | def run_instruction(t): 39 | if t.data == 'change_color': 40 | turtle.color(*t.children) # We just pass the color names as-is 41 | 42 | elif t.data == 'movement': 43 | name, number = t.children 44 | { 'f': turtle.fd, 45 | 'b': turtle.bk, 46 | 'l': turtle.lt, 47 | 'r': turtle.rt, }[name](int(number)) 48 | 49 | elif t.data == 'repeat': 50 | count, block = t.children 51 | for i in range(int(count)): 52 | run_instruction(block) 53 | 54 | elif t.data == 'fill': 55 | turtle.begin_fill() 56 | run_instruction(t.children[0]) 57 | turtle.end_fill() 58 | 59 | elif t.data == 'code_block': 60 | for cmd in t.children: 61 | run_instruction(cmd) 62 | else: 63 | raise SyntaxError('Unknown instruction: %s' % t.data) 64 | 65 | 66 | def run_turtle(program): 67 | parse_tree = parser.parse(program) 68 | for inst in parse_tree.children: 69 | run_instruction(inst) 70 | 71 | def main(): 72 | while True: 73 | code = input('> ') 74 | try: 75 | run_turtle(code) 76 | except Exception as e: 77 | print(e) 78 | 79 | def test(): 80 | text = """ 81 | c red yellow 82 | fill { repeat 36 { 83 | f200 l170 84 | }} 85 | """ 86 | run_turtle(text) 87 | 88 | if __name__ == '__main__': 89 | # test() 90 | main() 91 | -------------------------------------------------------------------------------- /lark/__init__.py: -------------------------------------------------------------------------------- 1 | from .exceptions import ( 2 | GrammarError, 3 | LarkError, 4 | LexError, 5 | ParseError, 6 | UnexpectedCharacters, 7 | UnexpectedEOF, 8 | UnexpectedInput, 9 | UnexpectedToken, 10 | ) 11 | from .lark import Lark 12 | from .lexer import Token 13 | from .tree import ParseTree, Tree 14 | from .utils import logger, TextSlice 15 | from .visitors import Discard, Transformer, Transformer_NonRecursive, Visitor, v_args 16 | 17 | __version__: str = "1.2.2" 18 | 19 | __all__ = ( 20 | "GrammarError", 21 | "LarkError", 22 | "LexError", 23 | "ParseError", 24 | "UnexpectedCharacters", 25 | "UnexpectedEOF", 26 | "UnexpectedInput", 27 | "UnexpectedToken", 28 | "Lark", 29 | "Token", 30 | "ParseTree", 31 | "Tree", 32 | "logger", 33 | "Discard", 34 | "Transformer", 35 | "Transformer_NonRecursive", 36 | "TextSlice", 37 | "Visitor", 38 | "v_args", 39 | ) 40 | -------------------------------------------------------------------------------- /lark/__pyinstaller/__init__.py: -------------------------------------------------------------------------------- 1 | # For usage of lark with PyInstaller. See https://pyinstaller-sample-hook.readthedocs.io/en/latest/index.html 2 | 3 | import os 4 | 5 | def get_hook_dirs(): 6 | return [os.path.dirname(__file__)] 7 | -------------------------------------------------------------------------------- /lark/__pyinstaller/hook-lark.py: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------------------- 2 | # Copyright (c) 2017-2020, PyInstaller Development Team. 3 | # 4 | # Distributed under the terms of the GNU General Public License (version 2 5 | # or later) with exception for distributing the bootloader. 6 | # 7 | # The full license is in the file COPYING.txt, distributed with this software. 8 | # 9 | # SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception) 10 | #----------------------------------------------------------------------------- 11 | 12 | from PyInstaller.utils.hooks import collect_data_files 13 | 14 | datas = collect_data_files('lark') 15 | -------------------------------------------------------------------------------- /lark/ast_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module of utilities for transforming a lark.Tree into a custom Abstract Syntax Tree (AST defined in classes) 3 | """ 4 | 5 | import inspect, re 6 | import types 7 | from typing import Optional, Callable 8 | 9 | from lark import Transformer, v_args 10 | 11 | class Ast: 12 | """Abstract class 13 | 14 | Subclasses will be collected by `create_transformer()` 15 | """ 16 | pass 17 | 18 | class AsList: 19 | """Abstract class 20 | 21 | Subclasses will be instantiated with the parse results as a single list, instead of as arguments. 22 | """ 23 | 24 | class WithMeta: 25 | """Abstract class 26 | 27 | Subclasses will be instantiated with the Meta instance of the tree. (see ``v_args`` for more detail) 28 | """ 29 | pass 30 | 31 | def camel_to_snake(name): 32 | return re.sub(r'(? Transformer: 37 | """Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. 38 | 39 | For each class, we create a corresponding rule in the transformer, with a matching name. 40 | CamelCase names will be converted into snake_case. Example: "CodeBlock" -> "code_block". 41 | 42 | Classes starting with an underscore (`_`) will be skipped. 43 | 44 | Parameters: 45 | ast_module: A Python module containing all the subclasses of ``ast_utils.Ast`` 46 | transformer (Optional[Transformer]): An initial transformer. Its attributes may be overwritten. 47 | decorator_factory (Callable): An optional callable accepting two booleans, inline, and meta, 48 | and returning a decorator for the methods of ``transformer``. (default: ``v_args``). 49 | """ 50 | t = transformer or Transformer() 51 | 52 | for name, obj in inspect.getmembers(ast_module): 53 | if not name.startswith('_') and inspect.isclass(obj): 54 | if issubclass(obj, Ast): 55 | wrapper = decorator_factory(inline=not issubclass(obj, AsList), meta=issubclass(obj, WithMeta)) 56 | obj = wrapper(obj).__get__(t) 57 | setattr(t, camel_to_snake(name), obj) 58 | 59 | return t 60 | -------------------------------------------------------------------------------- /lark/common.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import sys 3 | from types import ModuleType 4 | from typing import Callable, Collection, Dict, Optional, TYPE_CHECKING, List 5 | 6 | if TYPE_CHECKING: 7 | from .lark import PostLex 8 | from .lexer import Lexer 9 | from .grammar import Rule 10 | from typing import Union, Type 11 | from typing import Literal 12 | if sys.version_info >= (3, 10): 13 | from typing import TypeAlias 14 | else: 15 | from typing_extensions import TypeAlias 16 | 17 | from .utils import Serialize 18 | from .lexer import TerminalDef, Token 19 | 20 | ###{standalone 21 | 22 | _ParserArgType: 'TypeAlias' = 'Literal["earley", "lalr", "cyk", "auto"]' 23 | _LexerArgType: 'TypeAlias' = 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]' 24 | _LexerCallback = Callable[[Token], Token] 25 | ParserCallbacks = Dict[str, Callable] 26 | 27 | class LexerConf(Serialize): 28 | __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' 29 | __serialize_namespace__ = TerminalDef, 30 | 31 | terminals: Collection[TerminalDef] 32 | re_module: ModuleType 33 | ignore: Collection[str] 34 | postlex: 'Optional[PostLex]' 35 | callbacks: Dict[str, _LexerCallback] 36 | g_regex_flags: int 37 | skip_validation: bool 38 | use_bytes: bool 39 | lexer_type: Optional[_LexerArgType] 40 | strict: bool 41 | 42 | def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, 43 | callbacks: Optional[Dict[str, _LexerCallback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False, strict: bool=False): 44 | self.terminals = terminals 45 | self.terminals_by_name = {t.name: t for t in self.terminals} 46 | assert len(self.terminals) == len(self.terminals_by_name) 47 | self.ignore = ignore 48 | self.postlex = postlex 49 | self.callbacks = callbacks or {} 50 | self.g_regex_flags = g_regex_flags 51 | self.re_module = re_module 52 | self.skip_validation = skip_validation 53 | self.use_bytes = use_bytes 54 | self.strict = strict 55 | self.lexer_type = None 56 | 57 | def _deserialize(self): 58 | self.terminals_by_name = {t.name: t for t in self.terminals} 59 | 60 | def __deepcopy__(self, memo=None): 61 | return type(self)( 62 | deepcopy(self.terminals, memo), 63 | self.re_module, 64 | deepcopy(self.ignore, memo), 65 | deepcopy(self.postlex, memo), 66 | deepcopy(self.callbacks, memo), 67 | deepcopy(self.g_regex_flags, memo), 68 | deepcopy(self.skip_validation, memo), 69 | deepcopy(self.use_bytes, memo), 70 | ) 71 | 72 | class ParserConf(Serialize): 73 | __serialize_fields__ = 'rules', 'start', 'parser_type' 74 | 75 | rules: List['Rule'] 76 | callbacks: ParserCallbacks 77 | start: List[str] 78 | parser_type: _ParserArgType 79 | 80 | def __init__(self, rules: List['Rule'], callbacks: ParserCallbacks, start: List[str]): 81 | assert isinstance(start, list) 82 | self.rules = rules 83 | self.callbacks = callbacks 84 | self.start = start 85 | 86 | ###} 87 | -------------------------------------------------------------------------------- /lark/grammar.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple, ClassVar, Sequence 2 | 3 | from .utils import Serialize 4 | 5 | ###{standalone 6 | TOKEN_DEFAULT_PRIORITY = 0 7 | 8 | 9 | class Symbol(Serialize): 10 | __slots__ = ('name',) 11 | 12 | name: str 13 | is_term: ClassVar[bool] = NotImplemented 14 | 15 | def __init__(self, name: str) -> None: 16 | self.name = name 17 | 18 | def __eq__(self, other): 19 | if not isinstance(other, Symbol): 20 | return NotImplemented 21 | return self.is_term == other.is_term and self.name == other.name 22 | 23 | def __ne__(self, other): 24 | return not (self == other) 25 | 26 | def __hash__(self): 27 | return hash(self.name) 28 | 29 | def __repr__(self): 30 | return '%s(%r)' % (type(self).__name__, self.name) 31 | 32 | fullrepr = property(__repr__) 33 | 34 | def renamed(self, f): 35 | return type(self)(f(self.name)) 36 | 37 | 38 | class Terminal(Symbol): 39 | __serialize_fields__ = 'name', 'filter_out' 40 | 41 | is_term: ClassVar[bool] = True 42 | 43 | def __init__(self, name: str, filter_out: bool = False) -> None: 44 | self.name = name 45 | self.filter_out = filter_out 46 | 47 | @property 48 | def fullrepr(self): 49 | return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) 50 | 51 | def renamed(self, f): 52 | return type(self)(f(self.name), self.filter_out) 53 | 54 | 55 | class NonTerminal(Symbol): 56 | __serialize_fields__ = 'name', 57 | 58 | is_term: ClassVar[bool] = False 59 | 60 | 61 | class RuleOptions(Serialize): 62 | __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' 63 | 64 | keep_all_tokens: bool 65 | expand1: bool 66 | priority: Optional[int] 67 | template_source: Optional[str] 68 | empty_indices: Tuple[bool, ...] 69 | 70 | def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None: 71 | self.keep_all_tokens = keep_all_tokens 72 | self.expand1 = expand1 73 | self.priority = priority 74 | self.template_source = template_source 75 | self.empty_indices = empty_indices 76 | 77 | def __repr__(self): 78 | return 'RuleOptions(%r, %r, %r, %r)' % ( 79 | self.keep_all_tokens, 80 | self.expand1, 81 | self.priority, 82 | self.template_source 83 | ) 84 | 85 | 86 | class Rule(Serialize): 87 | """ 88 | origin : a symbol 89 | expansion : a list of symbols 90 | order : index of this expansion amongst all rules of the same name 91 | """ 92 | __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') 93 | 94 | __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' 95 | __serialize_namespace__ = Terminal, NonTerminal, RuleOptions 96 | 97 | origin: NonTerminal 98 | expansion: Sequence[Symbol] 99 | order: int 100 | alias: Optional[str] 101 | options: RuleOptions 102 | _hash: int 103 | 104 | def __init__(self, origin: NonTerminal, expansion: Sequence[Symbol], 105 | order: int=0, alias: Optional[str]=None, options: Optional[RuleOptions]=None): 106 | self.origin = origin 107 | self.expansion = expansion 108 | self.alias = alias 109 | self.order = order 110 | self.options = options or RuleOptions() 111 | self._hash = hash((self.origin, tuple(self.expansion))) 112 | 113 | def _deserialize(self): 114 | self._hash = hash((self.origin, tuple(self.expansion))) 115 | 116 | def __str__(self): 117 | return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion)) 118 | 119 | def __repr__(self): 120 | return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options) 121 | 122 | def __hash__(self): 123 | return self._hash 124 | 125 | def __eq__(self, other): 126 | if not isinstance(other, Rule): 127 | return False 128 | return self.origin == other.origin and self.expansion == other.expansion 129 | 130 | 131 | ###} 132 | -------------------------------------------------------------------------------- /lark/grammars/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/lark/grammars/__init__.py -------------------------------------------------------------------------------- /lark/grammars/common.lark: -------------------------------------------------------------------------------- 1 | // Basic terminals for common use 2 | 3 | 4 | // 5 | // Numbers 6 | // 7 | 8 | DIGIT: "0".."9" 9 | HEXDIGIT: "a".."f"|"A".."F"|DIGIT 10 | 11 | INT: DIGIT+ 12 | SIGNED_INT: ["+"|"-"] INT 13 | DECIMAL: INT "." INT? | "." INT 14 | 15 | // float = /-?\d+(\.\d+)?([eE][+-]?\d+)?/ 16 | _EXP: ("e"|"E") SIGNED_INT 17 | FLOAT: INT _EXP | DECIMAL _EXP? 18 | SIGNED_FLOAT: ["+"|"-"] FLOAT 19 | 20 | NUMBER: FLOAT | INT 21 | SIGNED_NUMBER: ["+"|"-"] NUMBER 22 | 23 | // 24 | // Strings 25 | // 26 | _STRING_INNER: /.*?/ 27 | _STRING_ESC_INNER: _STRING_INNER /(? ignore 19 | | "%import" import_path ["->" name] -> import 20 | | "%import" import_path name_list -> multi_import 21 | | "%override" rule -> override_rule 22 | | "%declare" name+ -> declare 23 | 24 | !import_path: "."? name ("." name)* 25 | name_list: "(" name ("," name)* ")" 26 | 27 | ?expansions: alias (_VBAR alias)* 28 | 29 | ?alias: expansion ["->" RULE] 30 | 31 | ?expansion: expr* 32 | 33 | ?expr: atom [OP | "~" NUMBER [".." NUMBER]] 34 | 35 | ?atom: "(" expansions ")" 36 | | "[" expansions "]" -> maybe 37 | | value 38 | 39 | ?value: STRING ".." STRING -> literal_range 40 | | name 41 | | (REGEXP | STRING) -> literal 42 | | name "{" value ("," value)* "}" -> template_usage 43 | 44 | name: RULE 45 | | TOKEN 46 | 47 | _VBAR: _NL? "|" 48 | OP: /[+*]|[?](?![a-z])/ 49 | RULE: /!?[_?]?[a-z][_a-z0-9]*/ 50 | TOKEN: /_?[A-Z][_A-Z0-9]*/ 51 | STRING: _STRING "i"? 52 | REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/])*?\/[imslux]*/ 53 | _NL: /(\r?\n)+\s*/ 54 | 55 | %import common.ESCAPED_STRING -> _STRING 56 | %import common.SIGNED_INT -> NUMBER 57 | %import common.WS_INLINE 58 | 59 | COMMENT: /\s*/ "//" /[^\n]/* | /\s*/ "#" /[^\n]/* 60 | 61 | %ignore WS_INLINE 62 | %ignore COMMENT 63 | -------------------------------------------------------------------------------- /lark/grammars/unicode.lark: -------------------------------------------------------------------------------- 1 | // TODO: LETTER, WORD, etc. 2 | 3 | // 4 | // Whitespace 5 | // 6 | WS_INLINE: /[ \t\xa0]/+ 7 | WS: /[ \t\xa0\f\r\n]/+ 8 | -------------------------------------------------------------------------------- /lark/indenter.py: -------------------------------------------------------------------------------- 1 | "Provides a post-lexer for implementing Python-style indentation." 2 | 3 | from abc import ABC, abstractmethod 4 | from typing import List, Iterator 5 | 6 | from .exceptions import LarkError 7 | from .lark import PostLex 8 | from .lexer import Token 9 | 10 | ###{standalone 11 | 12 | class DedentError(LarkError): 13 | pass 14 | 15 | class Indenter(PostLex, ABC): 16 | """This is a postlexer that "injects" indent/dedent tokens based on indentation. 17 | 18 | It keeps track of the current indentation, as well as the current level of parentheses. 19 | Inside parentheses, the indentation is ignored, and no indent/dedent tokens get generated. 20 | 21 | Note: This is an abstract class. To use it, inherit and implement all its abstract methods: 22 | - tab_len 23 | - NL_type 24 | - OPEN_PAREN_types, CLOSE_PAREN_types 25 | - INDENT_type, DEDENT_type 26 | 27 | See also: the ``postlex`` option in `Lark`. 28 | """ 29 | paren_level: int 30 | indent_level: List[int] 31 | 32 | def __init__(self) -> None: 33 | self.paren_level = 0 34 | self.indent_level = [0] 35 | assert self.tab_len > 0 36 | 37 | def handle_NL(self, token: Token) -> Iterator[Token]: 38 | if self.paren_level > 0: 39 | return 40 | 41 | yield token 42 | 43 | indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces 44 | indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len 45 | 46 | if indent > self.indent_level[-1]: 47 | self.indent_level.append(indent) 48 | yield Token.new_borrow_pos(self.INDENT_type, indent_str, token) 49 | else: 50 | while indent < self.indent_level[-1]: 51 | self.indent_level.pop() 52 | yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token) 53 | 54 | if indent != self.indent_level[-1]: 55 | raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1])) 56 | 57 | def _process(self, stream): 58 | for token in stream: 59 | if token.type == self.NL_type: 60 | yield from self.handle_NL(token) 61 | else: 62 | yield token 63 | 64 | if token.type in self.OPEN_PAREN_types: 65 | self.paren_level += 1 66 | elif token.type in self.CLOSE_PAREN_types: 67 | self.paren_level -= 1 68 | assert self.paren_level >= 0 69 | 70 | while len(self.indent_level) > 1: 71 | self.indent_level.pop() 72 | yield Token(self.DEDENT_type, '') 73 | 74 | assert self.indent_level == [0], self.indent_level 75 | 76 | def process(self, stream): 77 | self.paren_level = 0 78 | self.indent_level = [0] 79 | return self._process(stream) 80 | 81 | # XXX Hack for ContextualLexer. Maybe there's a more elegant solution? 82 | @property 83 | def always_accept(self): 84 | return (self.NL_type,) 85 | 86 | @property 87 | @abstractmethod 88 | def NL_type(self) -> str: 89 | "The name of the newline token" 90 | raise NotImplementedError() 91 | 92 | @property 93 | @abstractmethod 94 | def OPEN_PAREN_types(self) -> List[str]: 95 | "The names of the tokens that open a parenthesis" 96 | raise NotImplementedError() 97 | 98 | @property 99 | @abstractmethod 100 | def CLOSE_PAREN_types(self) -> List[str]: 101 | """The names of the tokens that close a parenthesis 102 | """ 103 | raise NotImplementedError() 104 | 105 | @property 106 | @abstractmethod 107 | def INDENT_type(self) -> str: 108 | """The name of the token that starts an indentation in the grammar. 109 | 110 | See also: %declare 111 | """ 112 | raise NotImplementedError() 113 | 114 | @property 115 | @abstractmethod 116 | def DEDENT_type(self) -> str: 117 | """The name of the token that end an indentation in the grammar. 118 | 119 | See also: %declare 120 | """ 121 | raise NotImplementedError() 122 | 123 | @property 124 | @abstractmethod 125 | def tab_len(self) -> int: 126 | """How many spaces does a tab equal""" 127 | raise NotImplementedError() 128 | 129 | 130 | class PythonIndenter(Indenter): 131 | """A postlexer that "injects" _INDENT/_DEDENT tokens based on indentation, according to the Python syntax. 132 | 133 | See also: the ``postlex`` option in `Lark`. 134 | """ 135 | 136 | NL_type = '_NEWLINE' 137 | OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE'] 138 | CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE'] 139 | INDENT_type = '_INDENT' 140 | DEDENT_type = '_DEDENT' 141 | tab_len = 8 142 | 143 | ###} 144 | -------------------------------------------------------------------------------- /lark/parsers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/lark/parsers/__init__.py -------------------------------------------------------------------------------- /lark/parsers/earley_common.py: -------------------------------------------------------------------------------- 1 | """This module implements useful building blocks for the Earley parser 2 | """ 3 | 4 | 5 | class Item: 6 | "An Earley Item, the atom of the algorithm." 7 | 8 | __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash') 9 | def __init__(self, rule, ptr, start): 10 | self.is_complete = len(rule.expansion) == ptr 11 | self.rule = rule # rule 12 | self.ptr = ptr # ptr 13 | self.start = start # j 14 | self.node = None # w 15 | if self.is_complete: 16 | self.s = rule.origin 17 | self.expect = None 18 | self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None 19 | else: 20 | self.s = (rule, ptr) 21 | self.expect = rule.expansion[ptr] 22 | self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None 23 | self._hash = hash((self.s, self.start, self.rule)) 24 | 25 | def advance(self): 26 | return Item(self.rule, self.ptr + 1, self.start) 27 | 28 | def __eq__(self, other): 29 | return self is other or (self.s == other.s and self.start == other.start and self.rule == other.rule) 30 | 31 | def __hash__(self): 32 | return self._hash 33 | 34 | def __repr__(self): 35 | before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] ) 36 | after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] ) 37 | symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after)) 38 | return '%s (%d)' % (symbol, self.start) 39 | 40 | 41 | # class TransitiveItem(Item): 42 | # ... # removed at commit 4c1cfb2faf24e8f8bff7112627a00b94d261b420 43 | -------------------------------------------------------------------------------- /lark/parsers/lalr_parser.py: -------------------------------------------------------------------------------- 1 | """This module implements a LALR(1) Parser 2 | """ 3 | # Author: Erez Shinan (2017) 4 | # Email : erezshin@gmail.com 5 | from typing import Dict, Any, Optional 6 | from ..lexer import Token, LexerThread 7 | from ..utils import Serialize 8 | from ..common import ParserConf, ParserCallbacks 9 | 10 | from .lalr_analysis import LALR_Analyzer, IntParseTable, ParseTableBase 11 | from .lalr_interactive_parser import InteractiveParser 12 | from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken 13 | from .lalr_parser_state import ParserState, ParseConf 14 | 15 | ###{standalone 16 | 17 | class LALR_Parser(Serialize): 18 | def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False): 19 | analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict) 20 | analysis.compute_lalr() 21 | callbacks = parser_conf.callbacks 22 | 23 | self._parse_table = analysis.parse_table 24 | self.parser_conf = parser_conf 25 | self.parser = _Parser(analysis.parse_table, callbacks, debug) 26 | 27 | @classmethod 28 | def deserialize(cls, data, memo, callbacks, debug=False): 29 | inst = cls.__new__(cls) 30 | inst._parse_table = IntParseTable.deserialize(data, memo) 31 | inst.parser = _Parser(inst._parse_table, callbacks, debug) 32 | return inst 33 | 34 | def serialize(self, memo: Any = None) -> Dict[str, Any]: 35 | return self._parse_table.serialize(memo) 36 | 37 | def parse_interactive(self, lexer: LexerThread, start: str): 38 | return self.parser.parse(lexer, start, start_interactive=True) 39 | 40 | def parse(self, lexer, start, on_error=None): 41 | try: 42 | return self.parser.parse(lexer, start) 43 | except UnexpectedInput as e: 44 | if on_error is None: 45 | raise 46 | 47 | while True: 48 | if isinstance(e, UnexpectedCharacters): 49 | s = e.interactive_parser.lexer_thread.state 50 | p = s.line_ctr.char_pos 51 | 52 | if not on_error(e): 53 | raise e 54 | 55 | if isinstance(e, UnexpectedCharacters): 56 | # If user didn't change the character position, then we should 57 | if p == s.line_ctr.char_pos: 58 | s.line_ctr.feed(s.text.text[p:p+1]) 59 | 60 | try: 61 | return e.interactive_parser.resume_parse() 62 | except UnexpectedToken as e2: 63 | if (isinstance(e, UnexpectedToken) 64 | and e.token.type == e2.token.type == '$END' 65 | and e.interactive_parser == e2.interactive_parser): 66 | # Prevent infinite loop 67 | raise e2 68 | e = e2 69 | except UnexpectedCharacters as e2: 70 | e = e2 71 | 72 | 73 | class _Parser: 74 | parse_table: ParseTableBase 75 | callbacks: ParserCallbacks 76 | debug: bool 77 | 78 | def __init__(self, parse_table: ParseTableBase, callbacks: ParserCallbacks, debug: bool=False): 79 | self.parse_table = parse_table 80 | self.callbacks = callbacks 81 | self.debug = debug 82 | 83 | def parse(self, lexer: LexerThread, start: str, value_stack=None, state_stack=None, start_interactive=False): 84 | parse_conf = ParseConf(self.parse_table, self.callbacks, start) 85 | parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) 86 | if start_interactive: 87 | return InteractiveParser(self, parser_state, parser_state.lexer) 88 | return self.parse_from_state(parser_state) 89 | 90 | 91 | def parse_from_state(self, state: ParserState, last_token: Optional[Token]=None): 92 | """Run the main LALR parser loop 93 | 94 | Parameters: 95 | state - the initial state. Changed in-place. 96 | last_token - Used only for line information in case of an empty lexer. 97 | """ 98 | try: 99 | token = last_token 100 | for token in state.lexer.lex(state): 101 | assert token is not None 102 | state.feed_token(token) 103 | 104 | end_token = Token.new_borrow_pos('$END', '', token) if token else Token('$END', '', 0, 1, 1) 105 | return state.feed_token(end_token, True) 106 | except UnexpectedInput as e: 107 | try: 108 | e.interactive_parser = InteractiveParser(self, state, state.lexer) 109 | except NameError: 110 | pass 111 | raise e 112 | except Exception as e: 113 | if self.debug: 114 | print("") 115 | print("STATE STACK DUMP") 116 | print("----------------") 117 | for i, s in enumerate(state.state_stack): 118 | print('%d)' % i , s) 119 | print("") 120 | 121 | raise 122 | ###} 123 | -------------------------------------------------------------------------------- /lark/parsers/lalr_parser_state.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy, copy 2 | from typing import Dict, Any, Generic, List 3 | from ..lexer import Token, LexerThread 4 | from ..common import ParserCallbacks 5 | 6 | from .lalr_analysis import Shift, ParseTableBase, StateT 7 | from lark.exceptions import UnexpectedToken 8 | 9 | ###{standalone 10 | 11 | class ParseConf(Generic[StateT]): 12 | __slots__ = 'parse_table', 'callbacks', 'start', 'start_state', 'end_state', 'states' 13 | 14 | parse_table: ParseTableBase[StateT] 15 | callbacks: ParserCallbacks 16 | start: str 17 | 18 | start_state: StateT 19 | end_state: StateT 20 | states: Dict[StateT, Dict[str, tuple]] 21 | 22 | def __init__(self, parse_table: ParseTableBase[StateT], callbacks: ParserCallbacks, start: str): 23 | self.parse_table = parse_table 24 | 25 | self.start_state = self.parse_table.start_states[start] 26 | self.end_state = self.parse_table.end_states[start] 27 | self.states = self.parse_table.states 28 | 29 | self.callbacks = callbacks 30 | self.start = start 31 | 32 | class ParserState(Generic[StateT]): 33 | __slots__ = 'parse_conf', 'lexer', 'state_stack', 'value_stack' 34 | 35 | parse_conf: ParseConf[StateT] 36 | lexer: LexerThread 37 | state_stack: List[StateT] 38 | value_stack: list 39 | 40 | def __init__(self, parse_conf: ParseConf[StateT], lexer: LexerThread, state_stack=None, value_stack=None): 41 | self.parse_conf = parse_conf 42 | self.lexer = lexer 43 | self.state_stack = state_stack or [self.parse_conf.start_state] 44 | self.value_stack = value_stack or [] 45 | 46 | @property 47 | def position(self) -> StateT: 48 | return self.state_stack[-1] 49 | 50 | # Necessary for match_examples() to work 51 | def __eq__(self, other) -> bool: 52 | if not isinstance(other, ParserState): 53 | return NotImplemented 54 | return len(self.state_stack) == len(other.state_stack) and self.position == other.position 55 | 56 | def __copy__(self): 57 | return self.copy() 58 | 59 | def copy(self, deepcopy_values=True) -> 'ParserState[StateT]': 60 | return type(self)( 61 | self.parse_conf, 62 | self.lexer, # XXX copy 63 | copy(self.state_stack), 64 | deepcopy(self.value_stack) if deepcopy_values else copy(self.value_stack), 65 | ) 66 | 67 | def feed_token(self, token: Token, is_end=False) -> Any: 68 | state_stack = self.state_stack 69 | value_stack = self.value_stack 70 | states = self.parse_conf.states 71 | end_state = self.parse_conf.end_state 72 | callbacks = self.parse_conf.callbacks 73 | 74 | while True: 75 | state = state_stack[-1] 76 | try: 77 | action, arg = states[state][token.type] 78 | except KeyError: 79 | expected = {s for s in states[state].keys() if s.isupper()} 80 | raise UnexpectedToken(token, expected, state=self, interactive_parser=None) 81 | 82 | assert arg != end_state 83 | 84 | if action is Shift: 85 | # shift once and return 86 | assert not is_end 87 | state_stack.append(arg) 88 | value_stack.append(token if token.type not in callbacks else callbacks[token.type](token)) 89 | return 90 | else: 91 | # reduce+shift as many times as necessary 92 | rule = arg 93 | size = len(rule.expansion) 94 | if size: 95 | s = value_stack[-size:] 96 | del state_stack[-size:] 97 | del value_stack[-size:] 98 | else: 99 | s = [] 100 | 101 | value = callbacks[rule](s) if callbacks else s 102 | 103 | _action, new_state = states[state_stack[-1]][rule.origin.name] 104 | assert _action is Shift 105 | state_stack.append(new_state) 106 | value_stack.append(value) 107 | 108 | if is_end and state_stack[-1] == end_state: 109 | return value_stack[-1] 110 | ###} 111 | -------------------------------------------------------------------------------- /lark/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/lark/py.typed -------------------------------------------------------------------------------- /lark/reconstruct.py: -------------------------------------------------------------------------------- 1 | """This is an experimental tool for reconstructing text from a shaped tree, based on a Lark grammar. 2 | """ 3 | 4 | from typing import Dict, Callable, Iterable, Optional 5 | 6 | from .lark import Lark 7 | from .tree import Tree, ParseTree 8 | from .visitors import Transformer_InPlace 9 | from .lexer import Token, PatternStr, TerminalDef 10 | from .grammar import Terminal, NonTerminal, Symbol 11 | 12 | from .tree_matcher import TreeMatcher, is_discarded_terminal 13 | from .utils import is_id_continue 14 | 15 | def is_iter_empty(i): 16 | try: 17 | _ = next(i) 18 | return False 19 | except StopIteration: 20 | return True 21 | 22 | 23 | class WriteTokensTransformer(Transformer_InPlace): 24 | "Inserts discarded tokens into their correct place, according to the rules of grammar" 25 | 26 | tokens: Dict[str, TerminalDef] 27 | term_subs: Dict[str, Callable[[Symbol], str]] 28 | 29 | def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: 30 | self.tokens = tokens 31 | self.term_subs = term_subs 32 | 33 | def __default__(self, data, children, meta): 34 | if not getattr(meta, 'match_tree', False): 35 | return Tree(data, children) 36 | 37 | iter_args = iter(children) 38 | to_write = [] 39 | for sym in meta.orig_expansion: 40 | if is_discarded_terminal(sym): 41 | try: 42 | v = self.term_subs[sym.name](sym) 43 | except KeyError: 44 | t = self.tokens[sym.name] 45 | if not isinstance(t.pattern, PatternStr): 46 | raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) 47 | 48 | v = t.pattern.value 49 | to_write.append(v) 50 | else: 51 | x = next(iter_args) 52 | if isinstance(x, list): 53 | to_write += x 54 | else: 55 | if isinstance(x, Token): 56 | assert Terminal(x.type) == sym, x 57 | else: 58 | assert NonTerminal(x.data) == sym, (sym, x) 59 | to_write.append(x) 60 | 61 | assert is_iter_empty(iter_args) 62 | return to_write 63 | 64 | 65 | class Reconstructor(TreeMatcher): 66 | """ 67 | A Reconstructor that will, given a full parse Tree, generate source code. 68 | 69 | Note: 70 | The reconstructor cannot generate values from regexps. If you need to produce discarded 71 | regexes, such as newlines, use `term_subs` and provide default values for them. 72 | 73 | Parameters: 74 | parser: a Lark instance 75 | term_subs: a dictionary of [Terminal name as str] to [output text as str] 76 | """ 77 | 78 | write_tokens: WriteTokensTransformer 79 | 80 | def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None: 81 | TreeMatcher.__init__(self, parser) 82 | 83 | self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) 84 | 85 | def _reconstruct(self, tree): 86 | unreduced_tree = self.match_tree(tree, tree.data) 87 | 88 | res = self.write_tokens.transform(unreduced_tree) 89 | for item in res: 90 | if isinstance(item, Tree): 91 | # TODO use orig_expansion.rulename to support templates 92 | yield from self._reconstruct(item) 93 | else: 94 | yield item 95 | 96 | def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str: 97 | x = self._reconstruct(tree) 98 | if postproc: 99 | x = postproc(x) 100 | y = [] 101 | prev_item = '' 102 | for item in x: 103 | if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]): 104 | y.append(' ') 105 | y.append(item) 106 | prev_item = item 107 | return ''.join(y) 108 | -------------------------------------------------------------------------------- /lark/tools/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from argparse import ArgumentParser, FileType 3 | from textwrap import indent 4 | from logging import DEBUG, INFO, WARN, ERROR 5 | from typing import Optional 6 | import warnings 7 | 8 | from lark import Lark, logger 9 | try: 10 | from interegular import logger as interegular_logger 11 | has_interegular = True 12 | except ImportError: 13 | has_interegular = False 14 | 15 | lalr_argparser = ArgumentParser(add_help=False, epilog='Look at the Lark documentation for more info on the options') 16 | 17 | flags = [ 18 | ('d', 'debug'), 19 | 'keep_all_tokens', 20 | 'regex', 21 | 'propagate_positions', 22 | 'maybe_placeholders', 23 | 'use_bytes' 24 | ] 25 | 26 | options = ['start', 'lexer'] 27 | 28 | lalr_argparser.add_argument('-v', '--verbose', action='count', default=0, help="Increase Logger output level, up to three times") 29 | lalr_argparser.add_argument('-s', '--start', action='append', default=[]) 30 | lalr_argparser.add_argument('-l', '--lexer', default='contextual', choices=('basic', 'contextual')) 31 | lalr_argparser.add_argument('-o', '--out', type=FileType('w', encoding='utf-8'), default=sys.stdout, help='the output file (default=stdout)') 32 | lalr_argparser.add_argument('grammar_file', type=FileType('r', encoding='utf-8'), help='A valid .lark file') 33 | 34 | for flag in flags: 35 | if isinstance(flag, tuple): 36 | options.append(flag[1]) 37 | lalr_argparser.add_argument('-' + flag[0], '--' + flag[1], action='store_true') 38 | elif isinstance(flag, str): 39 | options.append(flag) 40 | lalr_argparser.add_argument('--' + flag, action='store_true') 41 | else: 42 | raise NotImplementedError("flags must only contain strings or tuples of strings") 43 | 44 | 45 | def build_lalr(namespace): 46 | logger.setLevel((ERROR, WARN, INFO, DEBUG)[min(namespace.verbose, 3)]) 47 | if has_interegular: 48 | interegular_logger.setLevel(logger.getEffectiveLevel()) 49 | if len(namespace.start) == 0: 50 | namespace.start.append('start') 51 | kwargs = {n: getattr(namespace, n) for n in options} 52 | return Lark(namespace.grammar_file, parser='lalr', **kwargs), namespace.out 53 | 54 | 55 | def showwarning_as_comment(message, category, filename, lineno, file=None, line=None): 56 | # Based on warnings._showwarnmsg_impl 57 | text = warnings.formatwarning(message, category, filename, lineno, line) 58 | text = indent(text, '# ') 59 | if file is None: 60 | file = sys.stderr 61 | if file is None: 62 | return 63 | try: 64 | file.write(text) 65 | except OSError: 66 | pass 67 | 68 | 69 | def make_warnings_comments(): 70 | warnings.showwarning = showwarning_as_comment 71 | -------------------------------------------------------------------------------- /lark/tools/serialize.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | 4 | from lark.grammar import Rule 5 | from lark.lexer import TerminalDef 6 | from lark.tools import lalr_argparser, build_lalr 7 | 8 | import argparse 9 | 10 | argparser = argparse.ArgumentParser(prog='python -m lark.tools.serialize', parents=[lalr_argparser], 11 | description="Lark Serialization Tool - Stores Lark's internal state & LALR analysis as a JSON file", 12 | epilog='Look at the Lark documentation for more info on the options') 13 | 14 | 15 | def serialize(lark_inst, outfile): 16 | data, memo = lark_inst.memo_serialize([TerminalDef, Rule]) 17 | outfile.write('{\n') 18 | outfile.write(' "data": %s,\n' % json.dumps(data)) 19 | outfile.write(' "memo": %s\n' % json.dumps(memo)) 20 | outfile.write('}\n') 21 | 22 | 23 | def main(): 24 | if len(sys.argv)==1: 25 | argparser.print_help(sys.stderr) 26 | sys.exit(1) 27 | ns = argparser.parse_args() 28 | serialize(*build_lalr(ns)) 29 | 30 | 31 | if __name__ == '__main__': 32 | main() 33 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.2.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "lark" 7 | authors = [{name = "Erez Shinan", email = "erezshin@gmail.com"}] 8 | license = {text = "MIT"} 9 | description = "a modern parsing library" 10 | keywords = ["Earley", "LALR", "parser", "parsing", "ast"] 11 | classifiers = [ 12 | "Development Status :: 5 - Production/Stable", 13 | "Intended Audience :: Developers", 14 | "Programming Language :: Python :: 3", 15 | "Topic :: Software Development :: Libraries :: Python Modules", 16 | "Topic :: Text Processing :: General", 17 | "Topic :: Text Processing :: Linguistic", 18 | "License :: OSI Approved :: MIT License", 19 | ] 20 | requires-python = ">=3.8" 21 | dependencies = [] 22 | dynamic = ["version"] 23 | 24 | [project.readme] 25 | text = """ 26 | Lark is a modern general-purpose parsing library for Python. 27 | With Lark, you can parse any context-free grammar, efficiently, with very little code. 28 | Main Features: 29 | - Builds a parse-tree (AST) automagically, based on the structure of the grammar 30 | - Earley parser 31 | - Can parse all context-free grammars 32 | - Full support for ambiguous grammars 33 | - LALR(1) parser 34 | - Fast and light, competitive with PLY 35 | - Can generate a stand-alone parser 36 | - CYK parser, for highly ambiguous grammars 37 | - EBNF grammar 38 | - Unicode fully supported 39 | - Automatic line & column tracking 40 | - Standard library of terminals (strings, numbers, names, etc.) 41 | - Import grammars from Nearley.js 42 | - Extensive test suite 43 | - And much more! 44 | Since version 1.2, only Python versions 3.8 and up are supported.""" 45 | content-type = "text/markdown" 46 | 47 | [project.urls] 48 | Homepage = "https://github.com/lark-parser/lark" 49 | Download = "https://github.com/lark-parser/lark/tarball/master" 50 | 51 | [project.entry-points.pyinstaller40] 52 | hook-dirs = "lark.__pyinstaller:get_hook_dirs" 53 | 54 | [project.optional-dependencies] 55 | regex = ["regex"] 56 | nearley = ["js2py"] 57 | atomic_cache = ["atomicwrites"] 58 | interegular = ["interegular>=0.3.1,<0.4.0"] 59 | 60 | [tool.setuptools] 61 | packages = [ 62 | "lark", 63 | "lark.parsers", 64 | "lark.tools", 65 | "lark.grammars", 66 | "lark.__pyinstaller", 67 | ] 68 | include-package-data = true 69 | 70 | [tool.setuptools.package-data] 71 | "*" = ["*.lark"] 72 | lark = ["py.typed"] 73 | 74 | [tool.setuptools.dynamic] 75 | version = {attr = "lark.__version__"} 76 | 77 | [tool.mypy] 78 | files = "lark" 79 | python_version = "3.8" 80 | show_error_codes = true 81 | enable_error_code = ["ignore-without-code", "unused-ignore"] 82 | exclude = [ 83 | "^lark/__pyinstaller", 84 | ] 85 | 86 | # You can disable imports or control per-module/file settings here 87 | [[tool.mypy.overrides]] 88 | module = [ "js2py" ] 89 | ignore_missing_imports = true 90 | 91 | [tool.coverage.report] 92 | exclude_lines = [ 93 | "pragma: no cover", 94 | "if TYPE_CHECKING:" 95 | ] 96 | [tool.pyright] 97 | include = ["lark"] 98 | 99 | [tool.pytest.ini_options] 100 | minversion = 6.0 101 | addopts = "-ra -q" 102 | testpaths =[ 103 | "tests" 104 | ] 105 | python_files = "__main__.py" 106 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | formats: all 4 | 5 | build: 6 | os: ubuntu-22.04 7 | tools: 8 | python: "3.7" 9 | 10 | python: 11 | # version: 3.7 12 | install: 13 | - requirements: docs/requirements.txt 14 | 15 | # Build documentation in the docs/ directory with Sphinx 16 | sphinx: 17 | configuration: docs/conf.py 18 | -------------------------------------------------------------------------------- /test-requirements.txt: -------------------------------------------------------------------------------- 1 | interegular>=0.3.1,<0.4.0 2 | Js2Py==0.68 3 | regex 4 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/tests/__init__.py -------------------------------------------------------------------------------- /tests/__main__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function 2 | 3 | import unittest 4 | import logging 5 | import sys 6 | from lark import logger 7 | 8 | from .test_trees import TestTrees 9 | from .test_tools import TestStandalone 10 | from .test_cache import TestCache 11 | from .test_grammar import TestGrammar 12 | from .test_reconstructor import TestReconstructor 13 | from .test_tree_forest_transformer import TestTreeForestTransformer 14 | from .test_lexer import TestLexer 15 | from .test_python_grammar import TestPythonParser 16 | from .test_tree_templates import * # We define __all__ to list which TestSuites to run 17 | 18 | try: 19 | from .test_nearley.test_nearley import TestNearley 20 | except ImportError: 21 | logger.warning("Warning: Skipping tests for Nearley grammar imports (js2py required)") 22 | 23 | # from .test_selectors import TestSelectors 24 | # from .test_grammars import TestPythonG, TestConfigG 25 | 26 | from .test_logger import Testlogger 27 | 28 | from .test_parser import * # We define __all__ to list which TestSuites to run 29 | 30 | if sys.version_info >= (3, 10): 31 | from .test_pattern_matching import TestPatternMatching 32 | 33 | logger.setLevel(logging.INFO) 34 | 35 | if __name__ == '__main__': 36 | unittest.main() 37 | -------------------------------------------------------------------------------- /tests/grammars/ab.lark: -------------------------------------------------------------------------------- 1 | startab: expr 2 | 3 | expr: A B 4 | | A expr B 5 | 6 | A: "a" 7 | B: "b" 8 | 9 | %import common.WS 10 | %ignore WS 11 | -------------------------------------------------------------------------------- /tests/grammars/leading_underscore_grammar.lark: -------------------------------------------------------------------------------- 1 | A: "A" 2 | 3 | _SEP: "x" 4 | _a: A 5 | 6 | c: _a _SEP -------------------------------------------------------------------------------- /tests/grammars/templates.lark: -------------------------------------------------------------------------------- 1 | sep{item, delim}: item (delim item)* -------------------------------------------------------------------------------- /tests/grammars/test.lark: -------------------------------------------------------------------------------- 1 | %import common.NUMBER 2 | %import common.WORD 3 | %import common.WS 4 | -------------------------------------------------------------------------------- /tests/grammars/test_relative_import_of_nested_grammar.lark: -------------------------------------------------------------------------------- 1 | 2 | start: rule_to_import 3 | 4 | %import .test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import -------------------------------------------------------------------------------- /tests/grammars/test_relative_import_of_nested_grammar__grammar_to_import.lark: -------------------------------------------------------------------------------- 1 | 2 | rule_to_import: NESTED_TERMINAL 3 | 4 | %import .test_relative_import_of_nested_grammar__nested_grammar.NESTED_TERMINAL 5 | -------------------------------------------------------------------------------- /tests/grammars/test_relative_import_of_nested_grammar__nested_grammar.lark: -------------------------------------------------------------------------------- 1 | NESTED_TERMINAL: "N" 2 | -------------------------------------------------------------------------------- /tests/grammars/test_unicode.lark: -------------------------------------------------------------------------------- 1 | UNICODE : /[a-zØ-öø-ÿ]/ -------------------------------------------------------------------------------- /tests/grammars/three_rules_using_same_token.lark: -------------------------------------------------------------------------------- 1 | %import common.INT 2 | 3 | a: A 4 | b: A 5 | c: A 6 | 7 | A: "A" -------------------------------------------------------------------------------- /tests/test_lexer.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, main 2 | 3 | from lark import Lark, Tree, TextSlice 4 | 5 | 6 | class TestLexer(TestCase): 7 | def setUp(self): 8 | pass 9 | 10 | def test_basic(self): 11 | p = Lark(""" 12 | start: "a" "b" "c" "d" 13 | %ignore " " 14 | """) 15 | 16 | res = list(p.lex("abc cba dd")) 17 | assert res == list('abccbadd') 18 | 19 | res = list(p.lex("abc cba dd", dont_ignore=True)) 20 | assert res == list('abc cba dd') 21 | 22 | def test_subset_lex(self): 23 | p = Lark(""" 24 | start: "a" "b" "c" "d" 25 | %ignore " " 26 | """) 27 | 28 | res = list(p.lex(TextSlice("xxxabc cba ddxx", 3, -2))) 29 | assert res == list('abccbadd') 30 | 31 | res = list(p.lex(TextSlice("aaaabc cba dddd", 3, -2))) 32 | assert res == list('abccbadd') 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /tests/test_logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from contextlib import contextmanager 3 | from lark import Lark, logger 4 | from unittest import TestCase, main, skipIf 5 | 6 | from io import StringIO 7 | 8 | try: 9 | import interegular 10 | except ImportError: 11 | interegular = None 12 | 13 | @contextmanager 14 | def capture_log(): 15 | stream = StringIO() 16 | orig_handler = logger.handlers[0] 17 | del logger.handlers[:] 18 | logger.addHandler(logging.StreamHandler(stream)) 19 | yield stream 20 | del logger.handlers[:] 21 | logger.addHandler(orig_handler) 22 | 23 | class Testlogger(TestCase): 24 | 25 | def test_debug(self): 26 | logger.setLevel(logging.DEBUG) 27 | collision_grammar = ''' 28 | start: as as 29 | as: a* 30 | a: "a" 31 | ''' 32 | with capture_log() as log: 33 | Lark(collision_grammar, parser='lalr', debug=True) 34 | 35 | log = log.getvalue() 36 | # since there are conflicts about A 37 | # symbol A should appear in the log message for hint 38 | self.assertIn("A", log) 39 | 40 | def test_non_debug(self): 41 | logger.setLevel(logging.WARNING) 42 | collision_grammar = ''' 43 | start: as as 44 | as: a* 45 | a: "a" 46 | ''' 47 | with capture_log() as log: 48 | Lark(collision_grammar, parser='lalr', debug=False) 49 | log = log.getvalue() 50 | # no log message 51 | self.assertEqual(log, "") 52 | 53 | def test_loglevel_higher(self): 54 | logger.setLevel(logging.ERROR) 55 | collision_grammar = ''' 56 | start: as as 57 | as: a* 58 | a: "a" 59 | ''' 60 | with capture_log() as log: 61 | Lark(collision_grammar, parser='lalr', debug=True) 62 | log = log.getvalue() 63 | # no log message 64 | self.assertEqual(len(log), 0) 65 | 66 | @skipIf(interegular is None, "interegular is not installed, can't test regex collisions") 67 | def test_regex_collision(self): 68 | logger.setLevel(logging.WARNING) 69 | collision_grammar = ''' 70 | start: A | B 71 | A: /a+/ 72 | B: /(a|b)+/ 73 | ''' 74 | with capture_log() as log: 75 | Lark(collision_grammar, parser='lalr') 76 | 77 | log = log.getvalue() 78 | # since there are conflicts between A and B 79 | # symbols A and B should appear in the log message 80 | self.assertIn("A", log) 81 | self.assertIn("B", log) 82 | 83 | @skipIf(interegular is None, "interegular is not installed, can't test regex collisions") 84 | def test_no_regex_collision(self): 85 | logger.setLevel(logging.WARNING) 86 | collision_grammar = ''' 87 | start: A " " B 88 | A: /a+/ 89 | B: /(a|b)+/ 90 | ''' 91 | with capture_log() as log: 92 | Lark(collision_grammar, parser='lalr') 93 | 94 | log = log.getvalue() 95 | self.assertEqual(log, "") 96 | 97 | 98 | if __name__ == '__main__': 99 | main() 100 | -------------------------------------------------------------------------------- /tests/test_nearley/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lark-parser/lark/87bb8efe0d425187159b39fd788640da33d1878e/tests/test_nearley/__init__.py -------------------------------------------------------------------------------- /tests/test_nearley/grammars/include_unicode.ne: -------------------------------------------------------------------------------- 1 | @include "unicode.ne" 2 | 3 | main -> x 4 | -------------------------------------------------------------------------------- /tests/test_nearley/grammars/unicode.ne: -------------------------------------------------------------------------------- 1 | x -> "±a" 2 | -------------------------------------------------------------------------------- /tests/test_nearley/test_nearley.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | 4 | import unittest 5 | import logging 6 | import os 7 | import codecs 8 | 9 | from lark import logger 10 | from lark.tools.nearley import create_code_for_nearley_grammar, main as nearley_tool_main 11 | 12 | logger.setLevel(logging.INFO) 13 | 14 | TEST_PATH = os.path.abspath(os.path.dirname(__file__)) 15 | NEARLEY_PATH = os.path.join(TEST_PATH, 'nearley') 16 | BUILTIN_PATH = os.path.join(NEARLEY_PATH, 'builtin') 17 | 18 | if not os.path.exists(BUILTIN_PATH): 19 | logger.warning("Nearley not included. Skipping Nearley tests! (use git submodule to add)") 20 | raise ImportError("Skipping Nearley tests!") 21 | 22 | try: 23 | import js2py # Ensures that js2py exists, to avoid failing tests 24 | except RuntimeError as e: 25 | if "python version" in str(e): 26 | raise ImportError("js2py does not support this python version") 27 | raise 28 | 29 | 30 | class TestNearley(unittest.TestCase): 31 | def test_css(self): 32 | fn = os.path.join(NEARLEY_PATH, 'examples/csscolor.ne') 33 | with open(fn) as f: 34 | grammar = f.read() 35 | 36 | code = create_code_for_nearley_grammar(grammar, 'csscolor', BUILTIN_PATH, os.path.dirname(fn)) 37 | d = {} 38 | exec (code, d) 39 | parse = d['parse'] 40 | 41 | c = parse('#a199ff') 42 | assert c['r'] == 161 43 | assert c['g'] == 153 44 | assert c['b'] == 255 45 | 46 | c = parse('rgb(255, 70%, 3)') 47 | assert c['r'] == 255 48 | assert c['g'] == 178 49 | assert c['b'] == 3 50 | 51 | def test_include(self): 52 | fn = os.path.join(NEARLEY_PATH, 'test/grammars/folder-test.ne') 53 | with open(fn) as f: 54 | grammar = f.read() 55 | 56 | code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, os.path.dirname(fn)) 57 | d = {} 58 | exec (code, d) 59 | parse = d['parse'] 60 | 61 | parse('a') 62 | parse('b') 63 | 64 | def test_multi_include(self): 65 | fn = os.path.join(NEARLEY_PATH, 'test/grammars/multi-include-test.ne') 66 | with open(fn) as f: 67 | grammar = f.read() 68 | 69 | code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, os.path.dirname(fn)) 70 | d = {} 71 | exec (code, d) 72 | parse = d['parse'] 73 | 74 | parse('a') 75 | parse('b') 76 | parse('c') 77 | 78 | def test_utf8(self): 79 | grammar = u'main -> "±a"' 80 | code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') 81 | d = {} 82 | exec (code, d) 83 | parse = d['parse'] 84 | 85 | parse(u'±a') 86 | 87 | def test_backslash(self): 88 | grammar = r'main -> "\""' 89 | code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') 90 | d = {} 91 | exec (code, d) 92 | parse = d['parse'] 93 | parse(u'"') 94 | 95 | def test_null(self): 96 | grammar = r'main -> "a" | null' 97 | code = create_code_for_nearley_grammar(grammar, 'main', BUILTIN_PATH, './') 98 | d = {} 99 | exec (code, d) 100 | parse = d['parse'] 101 | parse('a') 102 | parse('') 103 | 104 | def test_utf8_2(self): 105 | fn = os.path.join(TEST_PATH, 'grammars/unicode.ne') 106 | nearley_tool_main(fn, 'x', NEARLEY_PATH) 107 | 108 | def test_include_utf8(self): 109 | fn = os.path.join(TEST_PATH, 'grammars/include_unicode.ne') 110 | nearley_tool_main(fn, 'main', NEARLEY_PATH) 111 | 112 | 113 | if __name__ == '__main__': 114 | unittest.main() 115 | -------------------------------------------------------------------------------- /tests/test_pattern_matching.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase, main 2 | 3 | from lark import Token, Tree 4 | 5 | 6 | class TestPatternMatching(TestCase): 7 | token = Token('A', 'a') 8 | 9 | def setUp(self): 10 | pass 11 | 12 | def test_matches_with_string(self): 13 | match self.token: 14 | case 'a': 15 | pass 16 | case _: 17 | assert False 18 | 19 | def test_matches_with_str_positional_arg(self): 20 | match self.token: 21 | case str('a'): 22 | pass 23 | case _: 24 | assert False 25 | 26 | def test_matches_with_token_positional_arg(self): 27 | match self.token: 28 | case Token('a'): 29 | assert False 30 | case Token('A'): 31 | pass 32 | case _: 33 | assert False 34 | 35 | def test_matches_with_token_kwarg_type(self): 36 | match self.token: 37 | case Token(type='A'): 38 | pass 39 | case _: 40 | assert False 41 | 42 | def test_matches_with_bad_token_type(self): 43 | match self.token: 44 | case Token(type='B'): 45 | assert False 46 | case _: 47 | pass 48 | 49 | def test_match_on_tree(self): 50 | tree1 = Tree('a', [Tree(x, y) for x, y in zip('bcd', 'xyz')]) 51 | tree2 = Tree('a', [ 52 | Tree('b', [Token('T', 'x')]), 53 | Tree('c', [Token('T', 'y')]), 54 | Tree('d', [Tree('z', [Token('T', 'zz'), Tree('zzz', 'zzz')])]), 55 | ]) 56 | 57 | match tree1: 58 | case Tree('X', []): 59 | assert False 60 | case Tree('a', []): 61 | assert False 62 | case Tree(_, 'b'): 63 | assert False 64 | case Tree('X', _): 65 | assert False 66 | tree = Tree('q', [Token('T', 'x')]) 67 | match tree: 68 | case Tree('q', [Token('T', 'x')]): 69 | pass 70 | case _: 71 | assert False 72 | tr = Tree('a', [Tree('b', [Token('T', 'a')])]) 73 | match tr: 74 | case Tree('a', [Tree('b', [Token('T', 'a')])]): 75 | pass 76 | case _: 77 | assert False 78 | # test nested trees 79 | match tree2: 80 | case Tree('a', [ 81 | Tree('b', [Token('T', 'x')]), 82 | Tree('c', [Token('T', 'y')]), 83 | Tree('d', [ 84 | Tree('z', [ 85 | Token('T', 'zz'), 86 | Tree('zzz', 'zzz') 87 | ]) 88 | ]) 89 | ]): 90 | pass 91 | case _: 92 | assert False 93 | 94 | 95 | 96 | if __name__ == '__main__': 97 | main() 98 | -------------------------------------------------------------------------------- /tests/test_reconstructor.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import json 4 | import sys 5 | import unittest 6 | from itertools import product 7 | from unittest import TestCase 8 | 9 | from lark import Lark 10 | from lark.reconstruct import Reconstructor 11 | 12 | common = """ 13 | %import common (WS_INLINE, NUMBER, WORD) 14 | %ignore WS_INLINE 15 | """ 16 | 17 | 18 | def _remove_ws(s): 19 | return s.replace(' ', '').replace('\n', '') 20 | 21 | 22 | class TestReconstructor(TestCase): 23 | 24 | def assert_reconstruct(self, grammar, code, **options): 25 | parser = Lark(grammar, parser='lalr', maybe_placeholders=False, **options) 26 | tree = parser.parse(code) 27 | new = Reconstructor(parser).reconstruct(tree) 28 | self.assertEqual(_remove_ws(code), _remove_ws(new)) 29 | 30 | def test_starred_rule(self): 31 | g = """ 32 | start: item* 33 | item: NL 34 | | rule 35 | rule: WORD ":" NUMBER 36 | NL: /(\\r?\\n)+\\s*/ 37 | """ + common 38 | 39 | code = """ 40 | Elephants: 12 41 | """ 42 | 43 | self.assert_reconstruct(g, code) 44 | 45 | def test_starred_group(self): 46 | g = """ 47 | start: (rule | NL)* 48 | rule: WORD ":" NUMBER 49 | NL: /(\\r?\\n)+\\s*/ 50 | """ + common 51 | 52 | code = """ 53 | Elephants: 12 54 | """ 55 | 56 | self.assert_reconstruct(g, code) 57 | 58 | def test_alias(self): 59 | g = """ 60 | start: line* 61 | line: NL 62 | | rule 63 | | "hello" -> hi 64 | rule: WORD ":" NUMBER 65 | NL: /(\\r?\\n)+\\s*/ 66 | """ + common 67 | 68 | code = """ 69 | Elephants: 12 70 | hello 71 | """ 72 | 73 | self.assert_reconstruct(g, code) 74 | 75 | def test_keep_tokens(self): 76 | g = """ 77 | start: (NL | stmt)* 78 | stmt: var op var 79 | !op: ("+" | "-" | "*" | "/") 80 | var: WORD 81 | NL: /(\\r?\\n)+\\s*/ 82 | """ + common 83 | 84 | code = """ 85 | a+b 86 | """ 87 | 88 | self.assert_reconstruct(g, code) 89 | 90 | def test_expand_rule(self): 91 | g = """ 92 | ?start: (NL | mult_stmt)* 93 | ?mult_stmt: sum_stmt ["*" sum_stmt] 94 | ?sum_stmt: var ["+" var] 95 | var: WORD 96 | NL: /(\\r?\\n)+\\s*/ 97 | """ + common 98 | 99 | code = ['a', 'a*b', 'a+b', 'a*b+c', 'a+b*c', 'a+b*c+d'] 100 | 101 | for c in code: 102 | self.assert_reconstruct(g, c) 103 | 104 | def test_json_example(self): 105 | test_json = ''' 106 | { 107 | "empty_object" : {}, 108 | "empty_array" : [], 109 | "booleans" : { "YES" : true, "NO" : false }, 110 | "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], 111 | "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ], 112 | "nothing" : null 113 | } 114 | ''' 115 | 116 | json_grammar = r""" 117 | ?start: value 118 | 119 | ?value: object 120 | | array 121 | | string 122 | | SIGNED_NUMBER -> number 123 | | "true" -> true 124 | | "false" -> false 125 | | "null" -> null 126 | 127 | array : "[" [value ("," value)*] "]" 128 | object : "{" [pair ("," pair)*] "}" 129 | pair : string ":" value 130 | 131 | string : ESCAPED_STRING 132 | 133 | %import common.ESCAPED_STRING 134 | %import common.SIGNED_NUMBER 135 | %import common.WS 136 | 137 | %ignore WS 138 | """ 139 | 140 | json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False) 141 | tree = json_parser.parse(test_json) 142 | 143 | new_json = Reconstructor(json_parser).reconstruct(tree) 144 | self.assertEqual(json.loads(new_json), json.loads(test_json)) 145 | 146 | def test_keep_all_tokens(self): 147 | g = """ 148 | start: "a"? _B? c? _d? 149 | _B: "b" 150 | c: "c" 151 | _d: "d" 152 | """ 153 | examples = list(map(''.join, product(('', 'a'), ('', 'b'), ('', 'c'), ('', 'd'), ))) 154 | for code in examples: 155 | self.assert_reconstruct(g, code, keep_all_tokens=True) 156 | 157 | def test_switch_grammar_unicode_terminal(self): 158 | """ 159 | This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed 160 | with a grammar that has unicode rules (or vice versa). The original bug assigned ANON terminals to unicode 161 | keywords, which offsets the ANON terminal count in the unicode grammar and causes subsequent identical ANON 162 | tokens (e.g., `+=`) to mismatch between the two grammars. 163 | """ 164 | 165 | g1 = """ 166 | start: (NL | stmt)* 167 | stmt: "keyword" var op var 168 | !op: ("+=" | "-=" | "*=" | "/=") 169 | var: WORD 170 | NL: /(\\r?\\n)+\\s*/ 171 | """ + common 172 | 173 | g2 = """ 174 | start: (NL | stmt)* 175 | stmt: "குறிப்பு" var op var 176 | !op: ("+=" | "-=" | "*=" | "/=") 177 | var: WORD 178 | NL: /(\\r?\\n)+\\s*/ 179 | """ + common 180 | 181 | code = """ 182 | keyword x += y 183 | """ 184 | 185 | l1 = Lark(g1, parser='lalr', maybe_placeholders=False) 186 | l2 = Lark(g2, parser='lalr', maybe_placeholders=False) 187 | r = Reconstructor(l2) 188 | 189 | tree = l1.parse(code) 190 | code2 = r.reconstruct(tree) 191 | assert l2.parse(code2) == tree 192 | 193 | 194 | if __name__ == '__main__': 195 | unittest.main() 196 | -------------------------------------------------------------------------------- /tests/test_relative_import.lark: -------------------------------------------------------------------------------- 1 | start: NUMBER WORD 2 | 3 | %import .grammars.test.NUMBER 4 | %import common.WORD 5 | %import common.WS 6 | %ignore WS 7 | 8 | -------------------------------------------------------------------------------- /tests/test_relative_import_preserves_leading_underscore.lark: -------------------------------------------------------------------------------- 1 | start: c 2 | 3 | %import .grammars.leading_underscore_grammar.c -------------------------------------------------------------------------------- /tests/test_relative_import_rename.lark: -------------------------------------------------------------------------------- 1 | start: N WORD 2 | 3 | %import .grammars.test.NUMBER -> N 4 | %import common.WORD 5 | %import common.WS 6 | %ignore WS 7 | 8 | -------------------------------------------------------------------------------- /tests/test_relative_import_rules_dependencies_imported_only_once.lark: -------------------------------------------------------------------------------- 1 | %import .grammars.three_rules_using_same_token.a 2 | %import .grammars.three_rules_using_same_token.b 3 | %import .grammars.three_rules_using_same_token.c -> d 4 | 5 | start: a b d 6 | -------------------------------------------------------------------------------- /tests/test_relative_import_unicode.lark: -------------------------------------------------------------------------------- 1 | start: UNICODE 2 | 3 | %import .grammars.test_unicode.UNICODE -------------------------------------------------------------------------------- /tests/test_relative_multi_import.lark: -------------------------------------------------------------------------------- 1 | start: NUMBER WORD 2 | 3 | %import .grammars.test (NUMBER, WORD, WS) 4 | %ignore WS 5 | -------------------------------------------------------------------------------- /tests/test_relative_rule_import.lark: -------------------------------------------------------------------------------- 1 | start: X expr Y 2 | 3 | X: "x" 4 | Y: "y" 5 | 6 | %import .grammars.ab.expr 7 | 8 | -------------------------------------------------------------------------------- /tests/test_relative_rule_import_drop_ignore.lark: -------------------------------------------------------------------------------- 1 | start: X expr Y 2 | 3 | X: "x" 4 | Y: "y" 5 | 6 | %import .grammars.ab.expr 7 | 8 | -------------------------------------------------------------------------------- /tests/test_relative_rule_import_rename.lark: -------------------------------------------------------------------------------- 1 | start: X ab Y 2 | 3 | X: "x" 4 | Y: "y" 5 | 6 | %import .grammars.ab.expr -> ab 7 | 8 | -------------------------------------------------------------------------------- /tests/test_relative_rule_import_subrule.lark: -------------------------------------------------------------------------------- 1 | start: X startab Y 2 | 3 | X: "x" 4 | Y: "y" 5 | 6 | %import .grammars.ab.startab 7 | 8 | -------------------------------------------------------------------------------- /tests/test_relative_rule_import_subrule_no_conflict.lark: -------------------------------------------------------------------------------- 1 | start: expr 2 | 3 | expr: X startab Y 4 | 5 | X: "x" 6 | Y: "y" 7 | 8 | %import .grammars.ab.startab 9 | 10 | -------------------------------------------------------------------------------- /tests/test_templates_import.lark: -------------------------------------------------------------------------------- 1 | start: "[" sep{NUMBER, ","} "]" 2 | NUMBER: /\d+/ 3 | %ignore " " 4 | %import .grammars.templates.sep -------------------------------------------------------------------------------- /tests/test_tools.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function 2 | 3 | from unittest import TestCase, main 4 | 5 | from lark import Lark 6 | from lark.tree import Tree 7 | from lark.tools import standalone 8 | 9 | from io import StringIO 10 | 11 | 12 | class TestStandalone(TestCase): 13 | def setUp(self): 14 | pass 15 | 16 | def _create_standalone(self, grammar, compress=False): 17 | code_buf = StringIO() 18 | standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress) 19 | code = code_buf.getvalue() 20 | 21 | context = {'__doc__': None, '__name__': 'test_standalone'} 22 | exec(code, context) 23 | return context 24 | 25 | def test_simple(self): 26 | grammar = """ 27 | start: NUMBER WORD 28 | 29 | %import common.NUMBER 30 | %import common.WORD 31 | %import common.WS 32 | %ignore WS 33 | 34 | """ 35 | 36 | context = self._create_standalone(grammar) 37 | 38 | _Lark = context['Lark_StandAlone'] 39 | l = _Lark() 40 | x = l.parse('12 elephants') 41 | self.assertEqual(x.children, ['12', 'elephants']) 42 | x = l.parse('16 candles') 43 | self.assertEqual(x.children, ['16', 'candles']) 44 | 45 | self.assertRaises(context['UnexpectedToken'], l.parse, 'twelve monkeys') 46 | self.assertRaises(context['UnexpectedToken'], l.parse, 'twelve') 47 | self.assertRaises(context['UnexpectedCharacters'], l.parse, '$ talks') 48 | 49 | context = self._create_standalone(grammar, compress=True) 50 | _Lark = context['Lark_StandAlone'] 51 | l = _Lark() 52 | x = l.parse('12 elephants') 53 | 54 | def test_interactive(self): 55 | grammar = """ 56 | start: A+ B* 57 | A: "a" 58 | B: "b" 59 | """ 60 | context = self._create_standalone(grammar) 61 | parser: Lark = context['Lark_StandAlone']() 62 | 63 | ip = parser.parse_interactive() 64 | 65 | UnexpectedToken = context['UnexpectedToken'] 66 | Token = context['Token'] 67 | 68 | self.assertRaises(UnexpectedToken, ip.feed_eof) 69 | self.assertRaises(TypeError, ip.exhaust_lexer) 70 | ip.feed_token(Token('A', 'a')) 71 | res = ip.feed_eof() 72 | self.assertEqual(res, Tree('start', ['a'])) 73 | 74 | ip = parser.parse_interactive("ab") 75 | 76 | ip.exhaust_lexer() 77 | 78 | ip_copy = ip.copy() 79 | self.assertEqual(ip_copy.parser_state, ip.parser_state) 80 | self.assertEqual(ip_copy.lexer_thread.state, ip.lexer_thread.state) 81 | self.assertIsNot(ip_copy.parser_state, ip.parser_state) 82 | self.assertIsNot(ip_copy.lexer_thread.state, ip.lexer_thread.state) 83 | self.assertIsNot(ip_copy.lexer_thread.state.line_ctr, ip.lexer_thread.state.line_ctr) 84 | 85 | res = ip.feed_eof(ip.lexer_thread.state.last_token) 86 | self.assertEqual(res, Tree('start', ['a', 'b'])) 87 | self.assertRaises(UnexpectedToken, ip.feed_eof) 88 | 89 | self.assertRaises(UnexpectedToken, ip_copy.feed_token, Token('A', 'a')) 90 | ip_copy.feed_token(Token('B', 'b')) 91 | res = ip_copy.feed_eof() 92 | self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) 93 | 94 | def test_contextual(self): 95 | grammar = """ 96 | start: a b 97 | a: "A" "B" 98 | b: "AB" 99 | """ 100 | 101 | context = self._create_standalone(grammar) 102 | 103 | _Lark = context['Lark_StandAlone'] 104 | l = _Lark() 105 | x = l.parse('ABAB') 106 | 107 | _v_args = context['v_args'] 108 | @_v_args(inline=True) 109 | class T(context['Transformer']): 110 | def a(self): 111 | return 'a' 112 | def b(self): 113 | return 'b' 114 | 115 | start = _v_args(inline=False)(list) 116 | 117 | x = T().transform(x) 118 | self.assertEqual(x, ['a', 'b']) 119 | 120 | l2 = _Lark(transformer=T()) 121 | x = l2.parse('ABAB') 122 | self.assertEqual(x, ['a', 'b']) 123 | 124 | def test_postlex(self): 125 | from lark.indenter import Indenter 126 | class MyIndenter(Indenter): 127 | NL_type = '_NEWLINE' 128 | OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE'] 129 | CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE'] 130 | INDENT_type = '_INDENT' 131 | DEDENT_type = '_DEDENT' 132 | tab_len = 8 133 | 134 | grammar = r""" 135 | start: "(" ")" _NEWLINE 136 | _NEWLINE: /\n/ 137 | """ 138 | 139 | context = self._create_standalone(grammar) 140 | _Lark = context['Lark_StandAlone'] 141 | 142 | l = _Lark(postlex=MyIndenter()) 143 | x = l.parse('()\n') 144 | self.assertEqual(x, Tree('start', [])) 145 | l = _Lark(postlex=MyIndenter()) 146 | x = l.parse('(\n)\n') 147 | self.assertEqual(x, Tree('start', [])) 148 | 149 | def test_transformer(self): 150 | grammar = r""" 151 | start: some_rule "(" SOME_TERMINAL ")" 152 | some_rule: SOME_TERMINAL 153 | SOME_TERMINAL: /[A-Za-z_][A-Za-z0-9_]*/ 154 | """ 155 | context = self._create_standalone(grammar) 156 | _Lark = context["Lark_StandAlone"] 157 | 158 | _Token = context["Token"] 159 | _Tree = context["Tree"] 160 | 161 | class MyTransformer(context["Transformer"]): 162 | def SOME_TERMINAL(self, token): 163 | return _Token("SOME_TERMINAL", "token is transformed") 164 | 165 | def some_rule(self, children): 166 | return _Tree("rule_is_transformed", []) 167 | 168 | parser = _Lark(transformer=MyTransformer()) 169 | self.assertEqual( 170 | parser.parse("FOO(BAR)"), 171 | _Tree("start", [ 172 | _Tree("rule_is_transformed", []), 173 | _Token("SOME_TERMINAL", "token is transformed") 174 | ]) 175 | ) 176 | 177 | 178 | if __name__ == '__main__': 179 | main() 180 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = lint, type, py38, py39, py310, py311, py312, py313, pypy3 3 | skip_missing_interpreters = true 4 | 5 | [testenv] 6 | whitelist_externals = git 7 | deps = 8 | -rtest-requirements.txt 9 | passenv = 10 | TERM 11 | 12 | # to always force recreation and avoid unexpected side effects 13 | recreate = True 14 | 15 | # Require since the commands use `git` 16 | allowlist_externals = git 17 | 18 | commands = 19 | git submodule sync -q 20 | git submodule update --init 21 | python -m tests {posargs} 22 | 23 | [testenv:type] 24 | description = run type check on code base 25 | skip_install = true 26 | recreate = false 27 | deps = 28 | mypy==1.10 29 | interegular>=0.3.1,<0.4.0 30 | types-atomicwrites 31 | types-regex 32 | rich<=13.4.1 33 | commands = 34 | mypy 35 | 36 | 37 | [testenv:lint] 38 | description = run linters on code base 39 | skip_install = true 40 | recreate = false 41 | deps = 42 | pre-commit 43 | commands = 44 | pre-commit run --all-files --show-diff-on-failure 45 | --------------------------------------------------------------------------------