├── .fleet
    └── run.json
├── .github
    └── workflows
    │   ├── gh-pages.yml
    │   ├── publish-to-pypi.yml
    │   └── python-checks.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── Makefile
├── README.md
├── docs
    ├── api
    │   ├── index.md
    │   ├── lexer.md
    │   ├── parser.md
    │   └── util.md
    ├── changes.md
    ├── getting-started
    │   ├── index.md
    │   ├── parse-tree.md
    │   ├── parsing.md
    │   ├── tips-and-tricks.md
    │   └── tokenizing.md
    ├── index.md
    └── media
    │   └── extra.css
├── funcparserlib
    ├── __init__.py
    ├── lexer.py
    ├── parser.py
    ├── py.typed
    └── util.py
├── mkdocs.yml
├── mypy.ini
├── poetry.lock
├── pyproject.toml
├── tests
    ├── __init__.py
    ├── dot.py
    ├── json.py
    ├── test_dot.py
    ├── test_json.py
    └── test_parsing.py
└── tox.ini


/.fleet/run.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "configurations": [
 3 |         {
 4 |             "type": "python-tests",
 5 |             "name": "Unit tests",
 6 |             "testFramework": "unittest"
 7 |         },
 8 |         {
 9 |             "type": "command",
10 |             "name": "pre-commit",
11 |             "program": "pre-commit",
12 |             "args": ["run", "-a"]
13 |         }
14 |     ]
15 | }


--------------------------------------------------------------------------------
/.github/workflows/gh-pages.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy docs to GitHub Pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   deploy:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |       - name: Set up Python 3.12
14 |         uses: actions/setup-python@v3
15 |         with:
16 |           python-version: "3.12"
17 |       - name: Install dependencies
18 |         run: |
19 |           python -m pip install --upgrade pip
20 |           pip install poetry
21 |           poetry install
22 |       - name: Build docs with mkdocs
23 |         run: |
24 |           poetry run mkdocs build
25 |       - name: Deploy
26 |         uses: peaceiris/actions-gh-pages@v3
27 |         with:
28 |           github_token: ${{ secrets.GITHUB_TOKEN }}
29 |           publish_dir: ./site
30 |           cname: funcparserlib.pirx.ru
31 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-to-pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "*"
 7 | 
 8 | jobs:
 9 |   build-n-publish:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v3
13 |       - name: Set up Python 3.12
14 |         uses: actions/setup-python@v3
15 |         with:
16 |           python-version: "3.12"
17 |       - name: Install build tools
18 |         run: |
19 |           python -m pip install --upgrade pip
20 |           pip install poetry
21 |       - name: Build sdist and wheel
22 |         run: |
23 |           poetry build --no-interaction
24 |       - name: Publish distribution to PyPI
25 |         uses: pypa/gh-action-pypi-publish@release/v1
26 |         with:
27 |           user: __token__
28 |           password: ${{ secrets.PYPI_API_TOKEN }}
29 | 


--------------------------------------------------------------------------------
/.github/workflows/python-checks.yml:
--------------------------------------------------------------------------------
 1 | name: Python checks
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   pre-commit-checks:
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         python-version:
17 |           - "3.8"
18 |           - "3.9"
19 |           - "3.10"
20 |           - "3.11"
21 |           - "3.12"
22 |     steps:
23 |       - uses: actions/checkout@v3
24 |       - name: Set up Python ${{ matrix.python-version }}
25 |         uses: actions/setup-python@v3
26 |         with:
27 |           python-version: ${{ matrix.python-version }}
28 |       - name: Install dependencies
29 |         run: |
30 |           python -m pip install --upgrade pip
31 |           pip install poetry
32 |           poetry install
33 |       - name: Run pre-commit checks
34 |         uses: pre-commit/action@v3.0.0
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.egg-info/
 2 | *.pyc
 3 | *.swp
 4 | .eggs/
 5 | .idea/
 6 | .tox/
 7 | __pycache__/
 8 | build/
 9 | dist/
10 | site/
11 | venv/
12 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/psf/black
 3 |     rev: "23.11.0"
 4 |     hooks:
 5 |       - id: black
 6 |   - repo: https://github.com/PyCQA/flake8
 7 |     rev: "6.1.0"
 8 |     hooks:
 9 |       - id: flake8
10 |         args: ["--max-line-length=88"]
11 |   - repo: https://github.com/pre-commit/mirrors-mypy
12 |     rev: "v1.7.0"
13 |     hooks:
14 |       - id: mypy
15 |   - repo: local
16 |     hooks:
17 |       - id: unittest
18 |         name: unittest
19 |         entry: poetry run python -m unittest discover
20 |         language: system
21 |         types:
22 |           - python
23 |         pass_filenames: false
24 |   - repo: local
25 |     hooks:
26 |       - id: doctest
27 |         name: doctest
28 |         entry: poetry run python -m doctest
29 |         language: system
30 |         files: (^funcparserlib/|^docs/)
31 |         types_or:
32 |           - python
33 |           - markdown
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright © 2009/2023 Andrey Vlasovskikh
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 4 | software and associated documentation files (the "Software"), to deal in the Software
 5 | without restriction, including without limitation the rights to use, copy, modify,
 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 7 | permit persons to whom the Software is furnished to do so, subject to the following
 8 | conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all copies or
11 | substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
15 | PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
16 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
17 | OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
18 | OTHER DEALINGS IN THE SOFTWARE.
19 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: default install test doctest unittest clean poetry-install tox mypy
 2 | 
 3 | default: poetry-install
 4 | 	poetry build
 5 | 
 6 | poetry-install:
 7 | 	poetry install
 8 | 
 9 | test: unittest
10 | 
11 | tox:
12 | 	poetry run tox
13 | 
14 | clean:
15 | 	rm -fr build dist *.egg-info .tox
16 | 	find . -name '*.pyc' | xargs rm -f
17 | 	find . -name __pycache__ | xargs rm -fr
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Funcparserlib
  2 | =============
  3 | 
  4 | Recursive descent parsing library for Python based on functional combinators.
  5 | 
  6 | [![PyPI](https://img.shields.io/pypi/v/funcparserlib)](https://pypi.org/project/funcparserlib/)
  7 | [![PyPI - Downloads](https://img.shields.io/pypi/dm/funcparserlib)](https://pypi.org/project/funcparserlib/)
  8 | 
  9 | 
 10 | Description
 11 | -----------
 12 | 
 13 | The primary focus of `funcparserlib` is **parsing little languages** or **external DSLs** (domain specific languages).
 14 | 
 15 | Parsers made with `funcparserlib` are pure-Python LL(\*) parsers. It means that it's **very easy to write parsers** without thinking about lookaheads and other hardcore parsing stuff. However, recursive descent parsing is a rather slow method compared to LL(k) or LR(k) algorithms. Still, parsing with `funcparserlib` is **at least twice faster than PyParsing**, a very popular library for Python.
 16 | 
 17 | The source code of `funcparserlib` is only 1.2K lines of code, with lots of comments. Its API is fully type hinted. It features the longest parsed prefix error reporting, as well as a tiny lexer generator for token position tracking.
 18 | 
 19 | The idea of parser combinators used in `funcparserlib` comes from the [Introduction to Functional Programming](https://www.cl.cam.ac.uk/teaching/Lectures/funprog-jrh-1996/) course. We have converted it from ML into Python.
 20 | 
 21 | 
 22 | Installation
 23 | ------------
 24 | 
 25 | You can install `funcparserlib` from [PyPI](https://pypi.org/project/funcparserlib/):
 26 | 
 27 | ```shell
 28 | $ pip install funcparserlib
 29 | ```
 30 | 
 31 | There are no dependencies on other libraries.
 32 | 
 33 | 
 34 | Documentation
 35 | -------------
 36 | 
 37 | * [Getting Started](https://funcparserlib.pirx.ru/getting-started/)
 38 |     * Your **starting point** with `funcparserlib`
 39 | * [API Reference](https://funcparserlib.pirx.ru/api/)
 40 |     * Learn the details of the API
 41 | 
 42 | There are several examples available in the `tests/` directory:
 43 | 
 44 | * [GraphViz DOT parser](https://github.com/vlasovskikh/funcparserlib/blob/master/tests/dot.py)
 45 | * [JSON parser](https://github.com/vlasovskikh/funcparserlib/blob/master/tests/json.py)
 46 | 
 47 | See also [the changelog](https://funcparserlib.pirx.ru/changes/).
 48 | 
 49 | 
 50 | Example
 51 | -------
 52 | 
 53 | Let's consider a little language of **numeric expressions** with a syntax similar to Python expressions. Here are some expression strings in this language:
 54 | 
 55 | ```
 56 | 0
 57 | 1 + 2 + 3
 58 | -1 + 2 ** 32
 59 | 3.1415926 * (2 + 7.18281828e-1) * 42
 60 | ```
 61 | 
 62 | 
 63 | Here is **the complete source code** of the tokenizer and the parser for this language written using `funcparserlib`:
 64 | 
 65 | ```python
 66 | from typing import List, Tuple, Union
 67 | from dataclasses import dataclass
 68 | 
 69 | from funcparserlib.lexer import make_tokenizer, TokenSpec, Token
 70 | from funcparserlib.parser import tok, Parser, many, forward_decl, finished
 71 | 
 72 | 
 73 | @dataclass
 74 | class BinaryExpr:
 75 |     op: str
 76 |     left: "Expr"
 77 |     right: "Expr"
 78 | 
 79 | 
 80 | Expr = Union[BinaryExpr, int, float]
 81 | 
 82 | 
 83 | def tokenize(s: str) -> List[Token]:
 84 |     specs = [
 85 |         TokenSpec("whitespace", r"\s+"),
 86 |         TokenSpec("float", r"[+\-]?\d+\.\d*([Ee][+\-]?\d+)*"),
 87 |         TokenSpec("int", r"[+\-]?\d+"),
 88 |         TokenSpec("op", r"(\*\*)|[+\-*/()]"),
 89 |     ]
 90 |     tokenizer = make_tokenizer(specs)
 91 |     return [t for t in tokenizer(s) if t.type != "whitespace"]
 92 | 
 93 | 
 94 | def parse(tokens: List[Token]) -> Expr:
 95 |     int_num = tok("int") >> int
 96 |     float_num = tok("float") >> float
 97 |     number = int_num | float_num
 98 | 
 99 |     expr: Parser[Token, Expr] = forward_decl()
100 |     parenthesized = -op("(") + expr + -op(")")
101 |     primary = number | parenthesized
102 |     power = primary + many(op("**") + primary) >> to_expr
103 |     term = power + many((op("*") | op("/")) + power) >> to_expr
104 |     sum = term + many((op("+") | op("-")) + term) >> to_expr
105 |     expr.define(sum)
106 | 
107 |     document = expr + -finished
108 | 
109 |     return document.parse(tokens)
110 | 
111 | 
112 | def op(name: str) -> Parser[Token, str]:
113 |     return tok("op", name)
114 | 
115 | 
116 | def to_expr(args: Tuple[Expr, List[Tuple[str, Expr]]]) -> Expr:
117 |     first, rest = args
118 |     result = first
119 |     for op, expr in rest:
120 |         result = BinaryExpr(op, result, expr)
121 |     return result
122 | ```
123 | 
124 | Now, consider this numeric expression: `3.1415926 * (2 + 7.18281828e-1) * 42`.
125 | 
126 | Let's `tokenize()` it using the tokenizer we've created with `funcparserlib.lexer`:
127 | 
128 | ```
129 | [
130 |     Token('float', '3.1415926'),
131 |     Token('op', '*'),
132 |     Token('op', '('),
133 |     Token('int', '2'),
134 |     Token('op', '+'),
135 |     Token('float', '7.18281828e-1'),
136 |     Token('op', ')'),
137 |     Token('op', '*'),
138 |     Token('int', '42'),
139 | ]
140 | ```
141 | 
142 | Let's `parse()` these tokens into an expression tree using our parser created with `funcparserlib.parser`:
143 | 
144 | ```
145 | BinaryExpr(
146 |     op='*',
147 |     left=BinaryExpr(
148 |         op='*',
149 |         left=3.1415926,
150 |         right=BinaryExpr(op='+', left=2, right=0.718281828),
151 |     ),
152 |     right=42,
153 | )
154 | ```
155 | 
156 | Learn how to write this parser using `funcparserlib` in the [Getting Started](https://funcparserlib.pirx.ru/getting-started/) guide!
157 | 
158 | 
159 | Used By
160 | -------
161 | 
162 | Some open-source projects that use `funcparserlib` as an explicit dependency:
163 | 
164 | * [Hy](https://github.com/hylang/hy), a Lisp dialect that's embedded in Python
165 |     * 4.7K stars, version `~=1.0`, Python 3.8+
166 | * [Splash](https://github.com/scrapinghub/splash), a JavaScript rendering service with HTTP API, by Scrapinghub
167 |     * 3.9K stars, version `*`. Python 3 in Docker
168 | * [graphite-beacon](https://github.com/klen/graphite-beacon), a simple alerting system for Graphite metrics
169 |     * 453 stars, version `==0.3.6`, Python 2 and 3
170 | * [blockdiag](https://github.com/blockdiag/blockdiag), generates block-diagram image file from spec-text file
171 |     * 194 stars, version `>= 1.0.0a0`, Python 3.7+
172 | * [kll](https://github.com/kiibohd/kll), Keyboard Layout Language (KLL) compiler
173 |     * 113 stars, copied source code, Python 3.5+
174 | 
175 | 
176 | Next
177 | ----
178 | 
179 | Read the [Getting Started](https://funcparserlib.pirx.ru/getting-started/) guide to start learning `funcparserlib`.
180 | 


--------------------------------------------------------------------------------
/docs/api/index.md:
--------------------------------------------------------------------------------
1 | # API Reference 
2 | 
3 | Funcparserlib consists of the following modules:
4 | 
5 | * [`funcparserlib.lexer` — Regexp-based tokenizer](lexer.md)
6 | * [`funcparserlib.parser` — Functional parsing combinators](parser.md)
7 | * [`funcparserlib.util` — Various utilities](util.md)
8 | 


--------------------------------------------------------------------------------
/docs/api/lexer.md:
--------------------------------------------------------------------------------
 1 | # `funcparserlib.lexer` — Regexp-based tokenizer
 2 | 
 3 | ::: funcparserlib.lexer.make_tokenizer
 4 | 
 5 | ::: funcparserlib.lexer.TokenSpec
 6 | 
 7 | ::: funcparserlib.lexer.TokenSpec.__init__
 8 |     rendering:
 9 |         heading_level: 3
10 | 
11 | ::: funcparserlib.lexer.Token
12 | 


--------------------------------------------------------------------------------
/docs/api/parser.md:
--------------------------------------------------------------------------------
  1 | # `funcparserlib.parser` — Functional parsing combinators
  2 | 
  3 | ::: funcparserlib.parser
  4 |     rendering:
  5 |         show_root_heading: false
  6 | 
  7 | ::: funcparserlib.parser.Parser
  8 | 
  9 | ::: funcparserlib.parser.Parser.parse
 10 |     rendering:
 11 |         heading_level: 3
 12 | 
 13 | ::: funcparserlib.parser.Parser.define
 14 |     rendering:
 15 |         heading_level: 3
 16 | 
 17 | ::: funcparserlib.parser.Parser.named
 18 |     rendering:
 19 |         heading_level: 3
 20 | 
 21 | 
 22 | Primitive Parsers
 23 | -----------------
 24 | 
 25 | ::: funcparserlib.parser.tok
 26 |     rendering:
 27 |         heading_level: 3
 28 | 
 29 | ::: funcparserlib.parser.a
 30 |     rendering:
 31 |         heading_level: 3
 32 | 
 33 | ::: funcparserlib.parser.some
 34 |     rendering:
 35 |         heading_level: 3
 36 | 
 37 | ::: funcparserlib.parser.forward_decl
 38 |     rendering:
 39 |         heading_level: 3
 40 | 
 41 | ### `finished`
 42 | 
 43 | A parser that throws an exception if there are any unparsed tokens left in the sequence.
 44 | 
 45 | Type: `Parser[Any, None]`
 46 | 
 47 | **Examples:**
 48 | 
 49 | ```pycon
 50 | >>> from funcparserlib.parser import a, finished
 51 | >>> expr = a("x") + finished
 52 | >>> expr.parse("x")
 53 | ('x', None)
 54 | 
 55 | ```
 56 | 
 57 | ```pycon
 58 | >>> expr = a("x") + finished
 59 | >>> expr.parse("xy")
 60 | Traceback (most recent call last):
 61 |     ...
 62 | funcparserlib.parser.NoParseError: got unexpected token: 'y', expected: end of input
 63 | 
 64 | ```
 65 | 
 66 | 
 67 | Parser Combinators
 68 | ------------------
 69 | 
 70 | ::: funcparserlib.parser.Parser.__add__
 71 |     rendering:
 72 |         heading_level: 3
 73 | 
 74 | ::: funcparserlib.parser.Parser.__neg__
 75 |     rendering:
 76 |         heading_level: 3
 77 | 
 78 | ::: funcparserlib.parser.Parser.__or__
 79 |     rendering:
 80 |         heading_level: 3
 81 | 
 82 | ::: funcparserlib.parser.Parser.__rshift__
 83 |     rendering:
 84 |         heading_level: 3
 85 | 
 86 | ::: funcparserlib.parser.maybe
 87 |     rendering:
 88 |         heading_level: 3
 89 | 
 90 | ::: funcparserlib.parser.many
 91 |     rendering:
 92 |         heading_level: 3
 93 | 
 94 | ::: funcparserlib.parser.oneplus
 95 |     rendering:
 96 |         heading_level: 3
 97 | 
 98 | ::: funcparserlib.parser.skip
 99 |     rendering:
100 |         heading_level: 3
101 | 
102 | 
103 | Extra: Parser Monad
104 | -------------------
105 | 
106 | As a functional programmer, you might be pleased to know, that parsers in funcparserlib
107 | form _a monad_ with `Parser.bind()` as `>>=` and `pure()` as `return`.
108 | 
109 | We could have expressed other parsing combinators in terms of `bind()`, but would be
110 | inefficient in Python:
111 | 
112 | ```python
113 | # noinspection PyUnresolvedReferences
114 | class Parser:
115 |     def __add__(self, other):
116 |         return self.bind(lambda x: other.bind(lambda y: pure((x, y))))
117 | 
118 |     def __rshift__(self, other):
119 |         return self.bind(lambda x: pure(x))
120 | ```
121 | 
122 | ::: funcparserlib.parser.Parser.bind
123 |     rendering:
124 |         heading_level: 3
125 | 
126 | ::: funcparserlib.parser.pure
127 |     rendering:
128 |         heading_level: 3
129 | 


--------------------------------------------------------------------------------
/docs/api/util.md:
--------------------------------------------------------------------------------
1 | # `funcparserlib.util` — Various utilities
2 | 
3 | ::: funcparserlib.util.pretty_tree
4 | 


--------------------------------------------------------------------------------
/docs/changes.md:
--------------------------------------------------------------------------------
  1 | The Changelog
  2 | =============
  3 | 
  4 | 2.0.0 — to be released
  5 | ----------------------
  6 | 
  7 | Dropped support for Python 2.7 (end of life). For compatibility with Python 2.7 please
  8 | use version `>=1.0,==1.*` (`~=1.0`).
  9 | 
 10 | ### Added
 11 | 
 12 | * Added support for Python 3.12
 13 | 
 14 | ### Changed
 15 | 
 16 | * Dropped support for Python 2.7
 17 | * Dropped support for Python 3.7
 18 | 
 19 | 
 20 | 1.0.1 — 2022-11-04
 21 | ------------------
 22 | 
 23 | ### Added
 24 | 
 25 | * Added support for Python 3.11
 26 | 
 27 | 
 28 | 1.0.0 — 2022-05-02
 29 | ------------------
 30 | 
 31 | The stable 1.0.0 release freezes the API of funcparserlib 0.3.6 which was released on
 32 | 2013-05-02, with a few bug fixes and small features.
 33 | 
 34 | ### Added
 35 | 
 36 | * Added support for Python 3.10
 37 | * Added support for Python 3.9
 38 |   ([#63](https://github.com/vlasovskikh/funcparserlib/pull/63))
 39 |   (Thanks to [@pkulev](https://github.com/pkulev))
 40 | * Added support for Python 3.8
 41 | * Added `-p` (the same as `skip(p)`) with more strict type hints for `-p` and `p1 + p2`
 42 | * Added `tok(type[, value])` for more compact grammars, better error messages
 43 | * Added `TokenSpec(type, pattern[, flags])` to simplify the use of `make_tokenizer()`
 44 | * Added type hints for the public API
 45 | * Added the new library homepage with the new Getting Started guide and the new API
 46 |   reference
 47 | 
 48 | ### Changed
 49 | 
 50 | * Parse exceptions now show expected tokens and grammar rules at the stopped position
 51 |   ([#52](https://github.com/vlasovskikh/funcparserlib/issues/52))
 52 | * Dropped support for Python 3.4, 3.5, 3.6 (end of life)
 53 | * Dropped support for Python 2.5, 2.6, 3.3 (end of life), modernized code for Python 
 54 |   3 to run without obsolete `2to3`
 55 |   ([#57](https://github.com/vlasovskikh/funcparserlib/pull/57))
 56 |   (Thanks to [@jdufresne](https://github.com/jdufresne))
 57 | * Removed documentation and unit tests from the distribution
 58 | * Switched from setuptools to Poetry
 59 | * Switched to poetry-core for lighter PEP 517 builds
 60 |   ([#73](https://github.com/vlasovskikh/funcparserlib/pull/73))
 61 |   (Thanks to [@fabaff](https://github.com/fabaff))
 62 | * Run unit tests on GitHub Actions for all supported Pythons
 63 | 
 64 | ### Fixed
 65 | 
 66 | * Fixed `TypeError` in `oneplus` when applying it `parser + parser` 
 67 |   ([#66](https://github.com/vlasovskikh/funcparserlib/issues/66))
 68 |   (Thanks to [@martica](https://github.com/martica))
 69 | * Fixed `AttributeError` when comparing `Token` objects to `None`
 70 |   ([#58](https://github.com/vlasovskikh/funcparserlib/pull/58))
 71 |   (Thanks to [@Halolegend94](https://github.com/Halolegend94))
 72 | * Fixed doctests in the tutorial
 73 |   ([#49](https://github.com/vlasovskikh/funcparserlib/issues/49))
 74 | * Fixed several cases of wrong expected tokens in error messages
 75 | 
 76 | 
 77 | 0.3.6 — 2013-05-02
 78 | ------------------
 79 | 
 80 | ### Changed
 81 | 
 82 | * Python 3 compatibility
 83 | * More info available in exception objects
 84 |   ([#14](https://github.com/vlasovskikh/funcparserlib/issues/14))
 85 | 
 86 | ### Fixed
 87 | 
 88 | * Fixed `many()` that consumed too many tokens in some cases
 89 |   ([#31](https://github.com/vlasovskikh/funcparserlib/issues/31))
 90 | 
 91 | 
 92 | 0.3.5 — 2011-01-13
 93 | ------------------
 94 | 
 95 | ### Changed
 96 | 
 97 | * Python 2.4 compatibility
 98 | * More readable terminal names for error reporting
 99 | 
100 | ### Fixed
101 | 
102 | * Fixed wrong token positions in lexer error messages
103 | 
104 | 
105 | 0.3.4 — 2009-10-06
106 | ------------------
107 | 
108 | ### Changed
109 | 
110 | * Switched from `setuptools` to `distutils`
111 | * Improved the `run-tests` utility
112 | 
113 | ### Fixed
114 | 
115 | * Fixed importing all symbols from `funcparserlib.lexer`
116 | 
117 | 
118 | 0.3.3 — 2009-08-03
119 | ------------------
120 | 
121 | ### Added
122 | 
123 | * Added a FAQ question about infinite loops in parsers
124 | 
125 | ### Changed
126 | 
127 | * Debug rule tracing can be enabled again
128 | 
129 | ### Fixed
130 | 
131 | * Fixed a bug in results of skip + skip parsers
132 | 
133 | 
134 | 0.3.2 — 2009-07-26
135 | ------------------
136 | 
137 | ### Added
138 | 
139 | * Added the Parsing Stages Illustrated page
140 | 
141 | ### Fixed
142 | 
143 | * Fixed some string and number encoding issues in examples
144 | 
145 | 
146 | 0.3.1 — 2009-07-26
147 | ------------------
148 | 
149 | Major optimizations (10x faster than the version 0.3).
150 | 
151 | ### Added
152 | 
153 | * Added the `forward_decl` function, that performs better than `with_forward_decls`
154 | * Added the `pretty_tree` function for creating pseudo-graphic trees
155 | * Added the Nested Brackets Mini-HOWTO
156 | * Added `Makefile` and this `CHANGES.md` file
157 | 
158 | ### Changed
159 | 
160 | * Use a single immutable input sequence in parsers
161 | * Call a wrapped parser directly using `run` (without `__call__`)
162 | * The slow `logging` is enabled only when the `debug` flag is set
163 | 
164 | 
165 | 0.3 — 2009-07-23
166 | ----------------
167 | 
168 | ### Added
169 | 
170 | * Added `pure` and `bind` functions on `Parser`s making them monads
171 | * Added the Funcparserlib Tutorial
172 | * Added a JSON parser as an example
173 | 
174 | ### Changed
175 | 
176 | * Translated the docs from Russian into English
177 | 
178 | 
179 | 0.2 — 2009-07-07
180 | ----------------
181 | 
182 | ### Added
183 | 
184 | * Added the `with_forward_decls` combinator for dealing with forward declarations
185 | 
186 | ### Changed
187 | 
188 | * Switched to the iterative implementation of `many`
189 | * Un-curried the parser function type in order to simplify things
190 | * Improvements to the DOT parser
191 | 
192 | 
193 | 0.1 — 2009-06-26
194 | ----------------
195 | 
196 | Initial release.
197 | 


--------------------------------------------------------------------------------
/docs/getting-started/index.md:
--------------------------------------------------------------------------------
  1 | Getting Started
  2 | ===============
  3 | 
  4 | 
  5 | Intro
  6 | -----
  7 | 
  8 | In this guide, we will write **a parser for a numeric expression calculator** with a syntax similar to Python expressions. Writing a calculator is a common example in articles related to parsers and parsing techniques, so it is a good starting point in learning `funcparserlib`.
  9 | 
 10 | You will learn how to write a parser of numeric expressions using
 11 | `funcparserlib`. Here are some expression strings we want to be able to parse:
 12 | 
 13 | ```
 14 | 0
 15 | 1 + 2 + 3
 16 | -1 + 2 ** 32
 17 | 3.1415926 * (2 + 7.18281828e-1) * 42
 18 | ```
 19 | 
 20 | We will parse these strings into trees of objects like this one:
 21 | 
 22 | ```
 23 | BinaryExpr('*')
 24 | |-- BinaryExpr('*')
 25 | |   |-- 3.1415926
 26 | |   `-- BinaryExpr('+')
 27 | |       |-- 2
 28 | |       `-- 0.718281828
 29 | `-- 42
 30 | ```
 31 | 
 32 | 
 33 | Diving In
 34 | ---------
 35 | 
 36 | Here is the complete source code of the expression parser we are going to write.
 37 | 
 38 | You are **not** supposed to understand it now. Just look at its shape and try to get some feeling about its structure. By the end of this guide, **you will fully understand this code** and will be able to write parsers for your own needs.
 39 | 
 40 | 
 41 | ```pycon
 42 | >>> from typing import List, Tuple, Union
 43 | >>> from dataclasses import dataclass
 44 | >>> from funcparserlib.lexer import make_tokenizer, TokenSpec, Token
 45 | >>> from funcparserlib.parser import tok, Parser, many, forward_decl, finished
 46 | 
 47 | 
 48 | >>> @dataclass
 49 | ... class BinaryExpr:
 50 | ...     op: str
 51 | ...     left: "Expr"
 52 | ...     right: "Expr"
 53 | 
 54 | 
 55 | >>> Expr = Union[BinaryExpr, int, float]
 56 | 
 57 | 
 58 | >>> def tokenize(s: str) -> List[Token]:
 59 | ...     specs = [
 60 | ...         TokenSpec("whitespace", r"\s+"),
 61 | ...         TokenSpec("float", r"[+\-]?\d+\.\d*([Ee][+\-]?\d+)*"),
 62 | ...         TokenSpec("int", r"[+\-]?\d+"),
 63 | ...         TokenSpec("op", r"(\*\*)|[+\-*/()]"),
 64 | ...     ]
 65 | ...     tokenizer = make_tokenizer(specs)
 66 | ...     return [t for t in tokenizer(s) if t.type != "whitespace"]
 67 | 
 68 | 
 69 | >>> def parse(tokens: List[Token]) -> Expr:
 70 | ...     int_num = tok("int") >> int
 71 | ...     float_num = tok("float") >> float
 72 | ...     number = int_num | float_num
 73 | ...
 74 | ...     expr: Parser[Token, Expr] = forward_decl()
 75 | ...     parenthesized = -op("(") + expr + -op(")")
 76 | ...     primary = number | parenthesized
 77 | ...     power = primary + many(op("**") + primary) >> to_expr
 78 | ...     term = power + many((op("*") | op("/")) + power) >> to_expr
 79 | ...     sum = term + many((op("+") | op("-")) + term) >> to_expr
 80 | ...     expr.define(sum)
 81 | ...
 82 | ...     document = expr + -finished
 83 | ...
 84 | ...     return document.parse(tokens)
 85 | 
 86 | 
 87 | >>> def op(name: str) -> Parser[Token, str]:
 88 | ...     return tok("op", name)
 89 | 
 90 | 
 91 | >>> def to_expr(args: Tuple[Expr, List[Tuple[str, Expr]]]) -> Expr:
 92 | ...     first, rest = args
 93 | ...     result = first
 94 | ...     for op, expr in rest:
 95 | ...         result = BinaryExpr(op, result, expr)
 96 | ...     return result
 97 | 
 98 | ```
 99 | 
100 | !!! Note
101 | 
102 |     The code examples in this guide are actually executable. You can clone the [funcparserlib](https://github.com/vlasovskikh/funcparserlib) repository from GitHub and run the examples from the document via `doctest`:
103 | 
104 |     ```sh
105 |     python3 -m doctest -v docs/getting-started/*.md
106 | 
107 |     ```
108 | 
109 | Test the expression parser:
110 | 
111 | ```pycon
112 | >>> parse(tokenize("0"))
113 | 0
114 | 
115 | >>> parse(tokenize("1 + 2 + 3"))
116 | BinaryExpr(op='+', left=BinaryExpr(op='+', left=1, right=2), right=3)
117 | 
118 | >>> parse(tokenize("-1 + 2 ** 32"))
119 | BinaryExpr(op='+', left=-1, right=BinaryExpr(op='**', left=2, right=32))
120 | 
121 | >>> parse(tokenize("3.1415926 * (2 + 7.18281828e-1) * 42"))
122 | BinaryExpr(op='*', left=BinaryExpr(op='*', left=3.1415926, right=BinaryExpr(op='+', left=2, right=0.718281828)), right=42)
123 | 
124 | ```
125 | 
126 | 
127 | Next
128 | ----
129 | 
130 | Now let's start learning how to write a numeric expression parser using `funcparserlib`.
131 | 
132 | In [the next chapter](tokenizing.md) you will learn about the first step in parsing: tokenizing the input. It means splitting your input string into a sequence of tokens that are easier to parse.
133 | 


--------------------------------------------------------------------------------
/docs/getting-started/parse-tree.md:
--------------------------------------------------------------------------------
  1 | Preparing the Parse Tree
  2 | ========================
  3 | 
  4 | So far we have defined the parser for our calculator expressions language:
  5 | 
  6 | 
  7 | ```pycon
  8 | >>> from typing import List
  9 | >>> from funcparserlib.lexer import make_tokenizer, TokenSpec, Token
 10 | >>> from funcparserlib.parser import tok, Parser, many, forward_decl, finished
 11 | 
 12 | 
 13 | >>> def tokenize(s: str) -> List[Token]:
 14 | ...     specs = [
 15 | ...         TokenSpec("whitespace", r"\s+"),
 16 | ...         TokenSpec("float", r"[+\-]?\d+\.\d*([Ee][+\-]?\d+)*"),
 17 | ...         TokenSpec("int", r"[+\-]?\d+"),
 18 | ...         TokenSpec("op", r"(\*\*)|[+\-*/()]"),
 19 | ...     ]
 20 | ...     tokenizer = make_tokenizer(specs)
 21 | ...     return [t for t in tokenizer(s) if t.type != "whitespace"]
 22 | 
 23 | 
 24 | >>> def op(name: str) -> Parser[Token, str]:
 25 | ...     return tok("op", name)
 26 | 
 27 | 
 28 | >>> int_str = tok("int")
 29 | >>> float_str = tok("float")
 30 | >>> number = int_str | float_str
 31 | >>> expr = forward_decl()
 32 | >>> parenthesized = op("(") + expr + op(")")
 33 | >>> primary = number | parenthesized
 34 | >>> power = primary + many(op("**") + primary)
 35 | >>> expr.define(power)
 36 | >>> document = expr + finished
 37 | 
 38 | ```
 39 | 
 40 | Here is how its parse results look so far:
 41 | 
 42 | 
 43 | ```pycon
 44 | >>> document.parse(tokenize("2 ** (3 ** 4)"))
 45 | ('2', [('**', ('(', ('3', [('**', '4')]), ')'))], None)
 46 | 
 47 | ```
 48 | 
 49 | 
 50 | `p >> f`: Transforming Parse Results
 51 | ------------------------------------
 52 | 
 53 | Let's start improving our parse results by converting numbers from `str` to `int` or `float`. We will use the [`Parser.__rshift__`](../api/parser.md#funcparserlib.parser.Parser.__rshift__) combinator for that. `p >> f` takes a parser `p` and a function `f` of a single argument and returns a new parser, that applies `f` to the parse result of `p`.
 54 | 
 55 | An integer parser that returns `int` values:
 56 | 
 57 | ```pycon
 58 | >>> int_num: Parser[Token, int] = tok("int") >> int
 59 | 
 60 | ```
 61 | 
 62 | !!! Note
 63 | 
 64 |     We specify the type hint for the parser only for clarity here. We wanted to highlight that `>>` here changes the output type of the parser from `str` to `int`. You may omit type hints for parsers and rely on type inference features of your text editor and type checker to get code completion and linting warnings:
 65 | 
 66 |     ```pycon
 67 |     >>> int_num = tok("int") >> int
 68 | 
 69 |     ```
 70 | 
 71 |     The only combinator which type is not inferrable is `forward_decl()`. You should specify its type explicitly to get your parser fully type checked.
 72 | 
 73 | Try it:
 74 | 
 75 | 
 76 | ```pycon
 77 | >>> int_num.parse(tokenize("42"))
 78 | 42
 79 | 
 80 | ```
 81 | 
 82 | Let's redefine our `number` parser so that it returns either `int` or `float`:
 83 | 
 84 | ```pycon
 85 | >>> from typing import Union
 86 | 
 87 | 
 88 | >>> float_num: Parser[Token, float] = tok("float") >> float
 89 | >>> number: Parser[Token, Union[int, float]] = int_num | float_num
 90 | 
 91 | ```
 92 | 
 93 | Test it:
 94 | 
 95 | ```pycon
 96 | >>> number.parse(tokenize("42"))
 97 | 42
 98 | 
 99 | >>> number.parse(tokenize("3.14"))
100 | 3.14
101 | 
102 | ```
103 | 
104 | 
105 | `-p`: Skipping Parse Results
106 | ----------------------------
107 | 
108 | Let's recall our nested parenthesized numbers example:
109 | 
110 | ```pycon
111 | >>> p = forward_decl()
112 | >>> p.define(number | (op("(") + p + op(")")))
113 | 
114 | ```
115 | 
116 | Test it:
117 | 
118 | ```pycon
119 | >>> p.parse(tokenize("((1))"))
120 | ('(', ('(', 1, ')'), ')')
121 | 
122 | ```
123 | 
124 | We have successfully parsed numbers in nested parentheses, but we don't want to see parentheses in the parsing results. Let's skip them using the [`Parser.__neg__`](../api/parser.md#funcparserlib.parser.Parser.__neg__) combinator. It allows you to skip any parts of a sequence of parsers concatenated via `p1 + p2 + ... + pN` by using a unary `-p` operator on the ones you want to skip:
125 | 
126 | ```pycon
127 | >>> p = forward_decl()
128 | >>> p.define(number | (-op("(") + p + -op(")")))
129 | 
130 | ```
131 | 
132 | The result is cleaner now:
133 | 
134 | 
135 | ```pycon
136 | >>> p.parse(tokenize("1"))
137 | 1
138 | 
139 | >>> p.parse(tokenize("(1)"))
140 | 1
141 | 
142 | >>> p.parse(tokenize("((1))"))
143 | 1
144 | 
145 | ```
146 | 
147 | Let's re-define our grammar using the [`Parser.__neg__`](../api/parser.md#funcparserlib.parser.Parser.__neg__) combinator to get rid of extra parentheses in the parse results, as well as of extra `None` returned by `finished`:
148 | 
149 | ```pycon
150 | >>> expr = forward_decl()
151 | >>> parenthesized = -op("(") + expr + -op(")")
152 | >>> primary = number | parenthesized
153 | >>> power = primary + many(op("**") + primary)
154 | >>> expr.define(power)
155 | >>> document = expr + -finished
156 | 
157 | ```
158 | 
159 | Test it:
160 | 
161 | ```pycon
162 | >>> document.parse(tokenize("2 ** (3 ** 4)"))
163 | (2, [('**', (3, [('**', 4)]))])
164 | 
165 | ```
166 | 
167 | User-Defined Classes for the Parse Tree
168 | ---------------------------------------
169 | 
170 | We have many types of binary operators in our grammar, but we've defined only the `**` power operator so far. Let's define them for `*`, `/`, `+`, `-` as well:
171 | 
172 | ```pycon
173 | >>> expr = forward_decl()
174 | >>> parenthesized = -op("(") + expr + -op(")")
175 | >>> primary = number | parenthesized
176 | >>> power = primary + many(op("**") + primary)
177 | >>> term = power + many((op("*") | op("/")) + power)
178 | >>> sum = term + many((op("+") | op("-")) + term)
179 | >>> expr.define(sum)
180 | >>> document = expr + -finished
181 | 
182 | ```
183 | 
184 | Here we've introduced a hierarchy of nested parsers: `expr -> sum -> term -> power -> primary -> parenthesized -> expr -> ...` to reflect the order of calculations set by our operator priorities: `+` < `*` < `**` < `()`.
185 | 
186 | Test it:
187 | 
188 | 
189 | ```pycon
190 | >>> document.parse(tokenize("1 * (2 + 0) ** 3"))
191 | (1, [], [('*', (2, [], [], [('+', (0, [], []))], [('**', 3)]))], [])
192 | 
193 | ```
194 | 
195 | It's hard to understand the results without proper user-defined classes for our expression types. We actually have 3 expression types:
196 | 
197 | * Integer numbers
198 | * Floating point numbers
199 | * Binary expressions
200 | 
201 | For integers and floats we will use Python `int` and `float` classes. For binary expressions we'll introduce the `BinaryExpr` class:
202 | 
203 | ```pycon
204 | >>> from dataclasses import dataclass
205 | 
206 | 
207 | >>> @dataclass
208 | ... class BinaryExpr:
209 | ...     op: str
210 | ...     left: "Expr"
211 | ...     right: "Expr"
212 | 
213 | ```
214 | 
215 | Since we don't use a common base class for our expressions, we have to define `Expr` as a union of possible expression types:
216 | 
217 | 
218 | ```
219 | >>> Expr = Union[BinaryExpr, int, float]
220 | 
221 | ```
222 | 
223 | Now let's define a function to transform the parse results of our binary operators into `BinaryExpr` objects. Take a look at our parsers of various binary expressions. You can infer that each of them returns _(expression, list of (operator, expression))_. We will transform these nested tuples and lists into a tree of nested expressions by defining a function `to_expr(args)` and applying `>> to_expr` to our expression parsers:
224 | 
225 | ```pycon
226 | >>> from typing import Tuple
227 | 
228 | 
229 | >>> def to_expr(args: Tuple[Expr, List[Tuple[str, Expr]]]) -> Expr:
230 | ...     first, rest = args
231 | ...     result = first
232 | ...     for op, expr in rest:
233 | ...         result = BinaryExpr(op, result, expr)
234 | ...     return result
235 | 
236 | ```
237 | 
238 | Let's re-define our grammar using this transformation:
239 | 
240 | 
241 | ```pycon
242 | >>> expr: Parser[Token, Expr] = forward_decl()
243 | >>> parenthesized = -op("(") + expr + -op(")")
244 | >>> primary = number | parenthesized
245 | >>> power = primary + many(op("**") + primary) >> to_expr
246 | >>> term = power + many((op("*") | op("/")) + power) >> to_expr
247 | >>> sum = term + many((op("+") | op("-")) + term) >> to_expr
248 | >>> expr.define(sum)
249 | >>> document = expr + -finished
250 | 
251 | ```
252 | 
253 | Test it:
254 | 
255 | ```pycon
256 | >>> document.parse(tokenize("3.1415926 * (2 + 7.18281828e-1) * 42"))
257 | BinaryExpr(op='*', left=BinaryExpr(op='*', left=3.1415926, right=BinaryExpr(op='+', left=2, right=0.718281828)), right=42)
258 | 
259 | ```
260 | 
261 | Let's pretty-print it using the [`pretty_tree(x, kids, show)`](../api/util.md#funcparserlib.util.pretty_tree) function:
262 | 
263 | ```pycon
264 | >>> from funcparserlib.util import pretty_tree
265 | 
266 | 
267 | >>> def pretty_expr(expr: Expr) -> str:
268 | ... 
269 | ...     def kids(expr: Expr) -> List[Expr]:
270 | ...         if isinstance(expr, BinaryExpr):
271 | ...             return [expr.left, expr.right]
272 | ...         else:
273 | ...             return []
274 | ... 
275 | ...     def show(expr: Expr) -> str:
276 | ...         if isinstance(expr, BinaryExpr):
277 | ...             return f"BinaryExpr({expr.op!r})"
278 | ...         else:
279 | ...             return repr(expr)
280 | ... 
281 | ...     return pretty_tree(expr, kids, show)
282 | 
283 | ```
284 | 
285 | Test it:
286 | 
287 | ```pycon
288 | >>> print(pretty_expr(document.parse(tokenize("3.1415926 * (2 + 7.18281828e-1) * 42"))))
289 | BinaryExpr('*')
290 | |-- BinaryExpr('*')
291 | |   |-- 3.1415926
292 | |   `-- BinaryExpr('+')
293 | |       |-- 2
294 | |       `-- 0.718281828
295 | `-- 42
296 | 
297 | ```
298 | 
299 | 
300 | 
301 | Finally, we have a proper parse tree that is easy to understand and work with!
302 | 
303 | 
304 | Next
305 | ----
306 | 
307 | 
308 | We've finished writing our numeric expressions parser.
309 | 
310 | If you want to learn more, let's discuss a few tips and tricks about parsing in [the next chapter](tips-and-tricks.md).
311 | 


--------------------------------------------------------------------------------
/docs/getting-started/parsing.md:
--------------------------------------------------------------------------------
  1 | Parsing Tokens
  2 | ==============
  3 | 
  4 | So far we have defined the tokenizer for our calculator expressions language:
  5 | 
  6 | ```pycon
  7 | >>> from typing import List
  8 | >>> from funcparserlib.lexer import make_tokenizer, TokenSpec, Token
  9 | 
 10 | 
 11 | >>> def tokenize(s: str) -> List[Token]:
 12 | ...     specs = [
 13 | ...         TokenSpec("whitespace", r"\s+"),
 14 | ...         TokenSpec("float", r"[+\-]?\d+\.\d*([Ee][+\-]?\d+)*"),
 15 | ...         TokenSpec("int", r"[+\-]?\d+"),
 16 | ...         TokenSpec("op", r"(\*\*)|[+\-*/()]"),
 17 | ...     ]
 18 | ...     tokenizer = make_tokenizer(specs)
 19 | ...     return [t for t in tokenizer(s) if t.type != "whitespace"]
 20 | 
 21 | ```
 22 | 
 23 | It results a list of tokens which we want to parse according to our expressions grammar:
 24 | 
 25 | ```pycon
 26 | >>> from pprint import pprint
 27 | 
 28 | 
 29 | >>> pprint(tokenize("3.1415926 * (2 + 7.18281828e-1) * 42"))
 30 | [Token('float', '3.1415926'),
 31 |  Token('op', '*'),
 32 |  Token('op', '('),
 33 |  Token('int', '2'),
 34 |  Token('op', '+'),
 35 |  Token('float', '7.18281828e-1'),
 36 |  Token('op', ')'),
 37 |  Token('op', '*'),
 38 |  Token('int', '42')]
 39 | 
 40 | ```
 41 | 
 42 | 
 43 | Parser Combinators
 44 | ------------------
 45 | 
 46 | A **parser** is an object that takes input tokens and transforms them into a parse result. For example, a **primitive parser** [`tok(type, value)`](../api/parser.md#funcparserlib.parser.tok) parses a single token of a certain type and, optionally, with a certain value.
 47 | 
 48 | Parsing a single token is not exciting at all. The interesting part comes when you start combining parsers via **parser combinators** to build bigger parsers of complex token sequences.
 49 | 
 50 | Parsers from [`funcparserlib.parser`](../api/parser.md) have a nice layered structure that allows you to express the grammar rules of the langauge you want to parse:
 51 | 
 52 | ```
 53 | ┌──────────┬──────────────────────┬───────────┐
 54 | │          │ Primitive Parsers    │           │
 55 | │          ├──────────────────────┘           │
 56 | │          │                                  │
 57 | │          │ tok(type, value)  forward_decl() │
 58 | │          │                                  │
 59 | │          │ a(token)  some(pred)  finished   │
 60 | │          │                                  │
 61 | │          ├──────────────────────┬───────────┤
 62 | │          │ Parser Combinators   │           │
 63 | │          ├──────────────────────┘           │
 64 | │          │                                  │
 65 | │  Parser  │ p1 + p2   p1 | p2   p >> f   -p  │
 66 | │  objects │                                  │
 67 | │          │ many(p)  oneplus(p)  maybe(p)    │
 68 | │          │                                  │
 69 | │          ├──────────────────────┬───────────┤
 70 | │          │ Means of Abstraction │           │
 71 | │          ├──────────────────────┘           │
 72 | │          │                                  │
 73 | │          │ Python assignments: =            │
 74 | │          │                                  │
 75 | │          │ Python functions: def            │
 76 | └──────────┴──────────────────────────────────┘
 77 | ```
 78 | 
 79 | You get a new [`Parser`](../api/parser.md#funcparserlib.parser.Parser) object each time you apply a parser combinator to your parsers. Therefore, the set of all parsers it closed under the operations defined by parser combinators.
 80 | 
 81 | Parsers are regular Python objects of type [`Parser`](../api/parser.md#funcparserlib.parser.Parser). It means that you can write arbitrary Python code that builds parser objects: assign parsers to variables, pass parsers as call arguments, get them as the return values of calls, etc.
 82 | 
 83 | !!! Note
 84 | 
 85 |     The type [`Parser`](../api/parser.md#funcparserlib.parser.Parser) is actually parameterized as `Parser[T, V]` where:
 86 | 
 87 |     * `T` is the type of input tokens
 88 |     * `V` is the type of the parse result
 89 | 
 90 |     Your text editor or type checker will provide better code completion and error checking for your parsing code based on the types defined in `funcparserlib` and their type inference capabilities.
 91 | 
 92 | 
 93 | `tok()`: Parsing a Single Token
 94 | -------------------------------
 95 | 
 96 | Let's recall the expressions we would like to be able to parse:
 97 | 
 98 | ```
 99 | 0
100 | 1 + 2 + 3
101 | -1 + 2 ** 32
102 | 3.1415926 * (2 + 7.18281828e-1) * 42
103 | ```
104 | 
105 | It looks like our grammar should have expressions that consist of numbers or nested expressions. Let's start with just numbers.
106 | 
107 | 
108 | We'll use [`tok(type, value)`](../api/parser.md#funcparserlib.parser.tok) to create a primitive parser of a single integer token. Let's import it:
109 | 
110 | ```pycon
111 | >>> from funcparserlib.parser import tok
112 | 
113 | ```
114 | 
115 | Here is our parser of a single integer token. The string `"int"` is the type of the integer token spec for our tokenizer:
116 | 
117 | 
118 | ```pycon
119 | >>> int_str = tok("int")
120 | 
121 | ```
122 | 
123 | Let's try it in action. In order to invoke a parser, we should pass a sequence of tokens to its [`Parser.parse(tokens)`](../api/parser.md#funcparserlib.parser.Parser.parse) method:
124 | 
125 | ```pycon
126 | >>> int_str.parse(tokenize("42"))
127 | '42'
128 | 
129 | ```
130 | 
131 | !!! Note
132 | 
133 |     Our parser returns integer numbers as strings at the moment. We'll cover transforming parse results and creating a proper parse tree in the next chapter.
134 | 
135 | If the first token in the input is _not_ of type `"int"`, our parser raises an exception:
136 | 
137 | ```pycon
138 | >>> int_str.parse(tokenize("+"))  # doctest: +IGNORE_EXCEPTION_DETAIL
139 | Traceback (most recent call last):
140 |     ...
141 | NoParseError: 1,1-1,1: got unexpected token: '+', expected: int
142 | 
143 | ```
144 | 
145 | 
146 | `p1 | p2`: Parsing Alternatives
147 | -------------------------------
148 | 
149 | We want to support floating point numbers as well. We already know how to do it:
150 | 
151 | ```pycon
152 | >>> float_str = tok("float")
153 | 
154 | ```
155 | 
156 | Let's define our number expression as either an integer or a float number. We can parse alternatives using the [`Parser.__or__`](../api/parser.md#funcparserlib.parser.Parser.__or__) combinator:
157 | 
158 | ```pycon
159 | >>> number = int_str | float_str
160 | 
161 | ```
162 | 
163 | Test it:
164 | 
165 | ```pycon
166 | >>> number.parse(tokenize("42"))
167 | '42'
168 | 
169 | >>> number.parse(tokenize("3.14"))
170 | '3.14'
171 | 
172 | >>> number.parse(tokenize("*"))  # doctest: +IGNORE_EXCEPTION_DETAIL
173 | Traceback (most recent call last):
174 |     ...
175 | NoParseError: 1,1-1,1: got unexpected token: '*', expected: int or float
176 | 
177 | ```
178 | 
179 | 
180 | `p1 + p2`: Parsing a Sequence
181 | -----------------------------
182 | 
183 | Since we can parse numbers now, let's proceeed with expressions. The first expression we will parse is the power operator:
184 | 
185 | ```
186 | 2 ** 32
187 | ```
188 | 
189 | We need a new parser combinator to parse sequences of tokens. We can combine parsers sequentially using the [`Parser.__add__`](../api/parser.md#funcparserlib.parser.Parser.__add__) combinator.
190 | 
191 | Let's try it on sequences of numbers first:
192 | 
193 | ```pycon
194 | >>> p = number + number
195 | 
196 | ```
197 | 
198 | Test it:
199 | 
200 | ```pycon
201 | >>> p.parse(tokenize("1 2"))
202 | ('1', '2')
203 | 
204 | ```
205 | 
206 | The sequence combinator returns its results as a tuple of the parse results of its arguments. The size of the resulting tuple depends on the number of the parsers in the sequence. Let's try it for three numbers:
207 | 
208 | ```pycon
209 | >>> p = number + number + number
210 | 
211 | ```
212 | 
213 | Test it:
214 | 
215 | ```pycon
216 | >>> p.parse(tokenize("1 2 3"))
217 | ('1', '2', '3')
218 | 
219 | ```
220 | 
221 | Back to parsing the power operator of our calculator expressions language. We will need to parse several different operator tokens besides `"**"` in our grammar, so let's define a helper function:
222 | 
223 | ```pycon
224 | >>> from funcparserlib.parser import Parser
225 | 
226 | 
227 | >>> def op(name: str) -> Parser[Token, str]:
228 | ...     return tok("op", name)
229 | 
230 | ```
231 | 
232 | Let's define the parser of the power operator expressions using our new `op(name)` helper:
233 | 
234 | ```pycon
235 | >>> power = number + op("**") + number
236 | 
237 | ```
238 | 
239 | Test it:
240 | 
241 | ```pycon
242 | >>> power.parse(tokenize("2 ** 32"))
243 | ('2', '**', '32')
244 | 
245 | ```
246 | 
247 | 
248 | `many()`: Parsing Repeated Parts
249 | --------------------------------
250 | 
251 | We want to allow sequences of power operators. Let's parse the first number, followed by zero or more pairs of the power operator and a number. We'll use the [`many(p)`](../api/parser.md#funcparserlib.parser.many) combinator for that. Let's import it:
252 | 
253 | ```pycon
254 | >>> from funcparserlib.parser import many
255 | 
256 | ```
257 | 
258 | Here is our parser of sequences of power operators:
259 | 
260 | ```pycon
261 | >>> power = number + many(op("**") + number)
262 | 
263 | ```
264 | 
265 | Test it:
266 | 
267 | ```pycon
268 | >>> power.parse(tokenize("2 ** 3 ** 4"))
269 | ('2', [('**', '3'), ('**', '4')])
270 | 
271 | ```
272 | 
273 | The `many(p)` combinator applies its argument parser `p` to the input sequence of tokens many times until it fails, returning a list of the results. If `p` fails to parse any tokens, `many(p)` still succeeds and returns an empty list:
274 | 
275 | ```pycon
276 | >>> power.parse(tokenize("1 + 2"))
277 | ('1', [])
278 | 
279 | ```
280 | 
281 | 
282 | `forward_decl()`: Parsing Recursive Parts
283 | -----------------------------------------
284 | 
285 | We want to allow using parentheses to specify the order of calculations.
286 | 
287 | Ideally, we would like to write a recursive assignment like this one, but the Python syntax doesn't allow it:
288 | 
289 | ```pycon
290 | >>> expr = power | number | (op("(") + expr + op(")"))   # doctest: +IGNORE_EXCEPTION_DETAIL
291 | Traceback (most recent call last):
292 |     ...
293 | NameError: name 'expr' is not defined
294 | 
295 | ```
296 | 
297 | We will use the [`forward_decl()`](../api/parser.md#funcparserlib.parser.forward_decl) parser to solve the recursive assignment problem:
298 | 
299 | 1. We create a forward declaration
300 | 2. We use the declaration in other parsers
301 | 3. We define the value of the declaration
302 | 
303 | Let's start with a simple example first. We'll create a parser numbers in properly nested parentheses:
304 | 
305 | ```pycon
306 | >>> from funcparserlib.parser import forward_decl
307 | >>> p = forward_decl()
308 | >>> p.define(number | (op("(") + p + op(")")))
309 | 
310 | ```
311 | 
312 | Test it:
313 | 
314 | ```pycon
315 | >>> p.parse(tokenize("1"))
316 | '1'
317 | 
318 | >>> p.parse(tokenize("(1)"))
319 | ('(', '1', ')')
320 | 
321 | >>> p.parse(tokenize("((1))"))
322 | ('(', ('(', '1', ')'), ')')
323 | 
324 | ```
325 | 
326 | Back to our recursive `expr` problem. Let's re-write our grammar using `forward_decl()` for expressions:
327 | 
328 | ```pycon
329 | >>> expr = forward_decl()
330 | >>> parenthesized = op("(") + expr + op(")")
331 | >>> primary = number | parenthesized
332 | >>> power = primary + many(op("**") + primary)
333 | >>> expr.define(power)
334 | 
335 | ```
336 | 
337 | Test it:
338 | 
339 | ```pycon
340 | >>> expr.parse(tokenize("2 ** 3 ** 4"))
341 | ('2', [('**', '3'), ('**', '4')])
342 | 
343 | >>> expr.parse(tokenize("2 ** (3 ** 4)"))
344 | ('2', [('**', ('(', ('3', [('**', '4')]), ')'))])
345 | 
346 | ```
347 | 
348 | 
349 | `finished`: Expecting No More Input
350 | -----------------------------------
351 | 
352 | Surprisingly, our `expr` parser tolerates incomplete expressions by ignoring the incomplete parts:
353 | 
354 | ```pycon
355 | >>> expr.parse(tokenize("2 ** (3 ** 4"))
356 | ('2', [])
357 | 
358 | ```
359 | 
360 | The problem is that its `many(p)` part parses the input while `p` succeeds, and it doesn't look any further than that. We can make a parser expect the end of the input via the [`finished`](../api/parser.md#finished) parser. Let's define a parser for the whole input document:
361 | 
362 | ```pycon
363 | >>> from funcparserlib.parser import finished
364 | >>> document = expr + finished
365 | 
366 | ```
367 | 
368 | !!! Note
369 | 
370 |     Usually you finish the topmost parser of your grammar with `... + finished` to indicate that you expect no further input.
371 | 
372 | Let's try it for our grammar:
373 | 
374 | ```pycon
375 | >>> document.parse(tokenize("2 ** (3 ** 4"))   # doctest: +IGNORE_EXCEPTION_DETAIL
376 | Traceback (most recent call last):
377 |     ...
378 | NoParseError: got unexpected end of input, expected: ')'
379 | 
380 | >>> document.parse(tokenize("2 ** (3 ** 4)"))
381 | ('2', [('**', ('(', ('3', [('**', '4')]), ')'))], None)
382 | 
383 | ```
384 | 
385 | Next
386 | ----
387 | 
388 | We have created a parser for power operator expressions. Its parse results are correct, but they look hard to undersand and work with:
389 | 
390 | * Our integer and floating point numbers are strings, not `int` or `float` objects
391 | * The results contain `'('` and `')'` strings even though we need parentheses only temporarily to set the operator priorities
392 | * The results contain `None`, which is the parse result of [`finished`](../api/parser.md#finished), even though we don't need it
393 | * The results are lists of tuples of strings, not user-defined classes that reflect the grammar of our calculator expressions language
394 | 
395 | In [the next chapter](parse-tree.md) you will learn how to transform parse results and prepare a proper, cleaned up parse tree.
396 | 


--------------------------------------------------------------------------------
/docs/getting-started/tips-and-tricks.md:
--------------------------------------------------------------------------------
  1 | Tips and Tricks
  2 | ===============
  3 | 
  4 | Let's use the tokenizer we have defined previously for our examples in this chapter:
  5 | 
  6 | ```pycon
  7 | >>> from typing import List
  8 | >>> from funcparserlib.lexer import make_tokenizer, TokenSpec, Token
  9 | >>> from funcparserlib.parser import tok, Parser, many, forward_decl, finished
 10 | 
 11 | 
 12 | >>> def tokenize(s: str) -> List[Token]:
 13 | ...     specs = [
 14 | ...         TokenSpec("whitespace", r"\s+"),
 15 | ...         TokenSpec("float", r"[+\-]?\d+\.\d*([Ee][+\-]?\d+)*"),
 16 | ...         TokenSpec("int", r"[+\-]?\d+"),
 17 | ...         TokenSpec("op", r"(\*\*)|[+\-*/()]"),
 18 | ...     ]
 19 | ...     tokenizer = make_tokenizer(specs)
 20 | ...     return [t for t in tokenizer(s) if t.type != "whitespace"]
 21 | 
 22 | 
 23 | >>> def op(name: str) -> Parser[Token, str]:
 24 | ...     return tok("op", name)
 25 | 
 26 | ```
 27 | 
 28 | ## Name Alternative Parsers for Better Error Messages
 29 | 
 30 | Consider the following grammar:
 31 | 
 32 | ```pycon
 33 | >>> number = (tok("int") >> int) | (tok("float") >> float)
 34 | >>> paren = -op("(") + number + -op(")")
 35 | >>> mul = number + op("*") + number
 36 | >>> expr = paren | mul
 37 | 
 38 | ```
 39 | 
 40 | When a parser fails to parse its input, it usually reports the token it expects:
 41 | 
 42 | ```pycon
 43 | >>> paren.parse(tokenize("(1"))   # doctest: +IGNORE_EXCEPTION_DETAIL
 44 | Traceback (most recent call last):
 45 |     ...
 46 | NoParseError: got unexpected end of input, expected: ')'
 47 | 
 48 | ```
 49 | 
 50 | If there were several parsing alternatives, the parser will report an error after the longest successfully parsed sequence:
 51 | 
 52 | ```pycon
 53 | 
 54 | >>> expr.parse(tokenize("1 + 2"))   # doctest: +IGNORE_EXCEPTION_DETAIL
 55 | Traceback (most recent call last):
 56 |     ...
 57 | NoParseError: 1,3-1,3: got unexpected token: '+', expected: '*'
 58 | 
 59 | ```
 60 | 
 61 | If there were several parsing alternatives and all of them failed to parse the current token, then the parser will report its name as the expected input:
 62 | 
 63 | ```pycon
 64 | >>> number.parse(tokenize("*"))   # doctest: +IGNORE_EXCEPTION_DETAIL
 65 | Traceback (most recent call last):
 66 |     ...
 67 | NoParseError: 1,1-1,1: got unexpected token: '*', expected: int or float
 68 | 
 69 | >>> expr.parse(tokenize("+"))   # doctest: +IGNORE_EXCEPTION_DETAIL
 70 | Traceback (most recent call last):
 71 |     ...
 72 | NoParseError: 1,1-1,1: got unexpected token: '+', expected: int or float or (('(', int or float), ')')
 73 | 
 74 | ```
 75 | 
 76 | Parser names are auto-generated and may be quite long and hard to understand. For better error messages you may want to name your parsers explicitly via [`Parser.named(name)`](../api/parser.md#funcparserlib.parser.Parser.named). The naming style is up to you. For example:
 77 | 
 78 | ```pycon
 79 | >>> number = ((tok("int") >> int) | (tok("float") >> float)).named("number")
 80 | >>> paren = -op("(") + number + -op(")")
 81 | >>> mul = number + op("*") + number
 82 | >>> expr = (paren | mul).named("number or '('")
 83 | 
 84 | ```
 85 | 
 86 | Test it:
 87 | 
 88 | 
 89 | ```pycon
 90 | >>> number.parse(tokenize("*"))   # doctest: +IGNORE_EXCEPTION_DETAIL
 91 | Traceback (most recent call last):
 92 |     ...
 93 | NoParseError: 1,1-1,1: got unexpected token: '*', expected: number
 94 | 
 95 | >>> expr.parse(tokenize("+"))   # doctest: +IGNORE_EXCEPTION_DETAIL
 96 | Traceback (most recent call last):
 97 |     ...
 98 | NoParseError: 1,1-1,1: got unexpected token: '+', expected: number or '('
 99 | 
100 | ```
101 | 
102 | 
103 | ## How to Handle Conflicting Alternatives
104 | 
105 | If one of the parsing alternatives is a subpart of another one, then you should put the longest alternative first. Otherwise parsing the shorter one will make another one unreachable:
106 | 
107 | ```pycon
108 | >>> p = (number + number) | (number + number + number)
109 | 
110 | >>> p.parse(tokenize("1 2 3"))
111 | (1, 2)
112 | 
113 | ```
114 | 
115 | Parse the longest alternative first:
116 | 
117 | ```pycon
118 | >>> p = (number + number + number) | (number + number)
119 | 
120 | >>> p.parse(tokenize("1 2 3"))
121 | (1, 2, 3)
122 | 
123 | >>> p.parse(tokenize("1 2"))
124 | (1, 2)
125 | 
126 | ```
127 | 
128 | 
129 | ## Watch Out for Left Recursion
130 | 
131 | There are certain kinds grammar rules you cannot use with `funcparserlib`. These are the rules that contain recursion in their leftmost parts. These rules lead to infinite recursion during parsing, that results in a `RecursionError` exception.
132 | 
133 | For example, we want to define an expression `expr` either a multiplication operator `mul` or a `number`. We also want an expression to be a sequence of an expression `expr`, followed by an operator `"**"`, followed by another expression `expr`:
134 | 
135 | 
136 | ```pycon
137 | >>> expr = forward_decl()
138 | >>> mul = expr + op("*") + expr
139 | >>> expr.define(mul | number)
140 | 
141 | ```
142 | 
143 | This looks reasonable at the first glance, but it contains left recursion. In order to parse the first token for `expr`, we need to parse the first token for `mul`, for that we need to parse the first token for `expr`, and so on. This left recursion in your grammar results in a stack overflow exception:
144 | 
145 | ```pycon
146 | >>> expr.parse(tokenize("1 * 2"))   # doctest: +IGNORE_EXCEPTION_DETAIL
147 | Traceback (most recent call last):
148 |     ...
149 | RecursionError: maximum recursion depth exceeded
150 | 
151 | ```
152 | 
153 | You should think how to re-write your grammar to avoid left-recursive definitions. In our case of several multiplication operators we really want a number, followed by zero or more pairs of `*` and number:
154 | 
155 | ```pycon
156 | >>> expr = forward_decl()
157 | >>> mul = number + many(op("**") + number)
158 | >>> expr.define(mul)
159 | 
160 | ```
161 | 
162 | Test it:
163 | 
164 | ```pycon
165 | >>> expr.parse(tokenize("1 ** 2"))
166 | (1, [('**', 2)])
167 | 
168 | 
169 | >>> expr.parse(tokenize("3"))
170 | (3, [])
171 | 
172 | ```
173 | 
174 | Remember that your parsers have to consume at least one token from the input before going into recursive defintions.
175 | 


--------------------------------------------------------------------------------
/docs/getting-started/tokenizing.md:
--------------------------------------------------------------------------------
 1 | Tokenizing Input
 2 | ================
 3 | 
 4 | Parsing is usually split into two steps:
 5 | 
 6 | 1. Tokenizing the input string into a sequence of tokens
 7 | 2. Parsing the tokens into a parse tree
 8 | 
 9 | 
10 | ```
11 |          ┌────────────┐               ┌─────────┐
12 |    str   │            │  List[Token]  │         │   Expr
13 | ─────────► tokenize() ├───────────────► parse() ├─────────►
14 |          │            │               │         │
15 |          └────────────┘               └─────────┘
16 | ```
17 | 
18 | **Tokens** are larger pieces of the input text such as words, punctuation marks, spaces, etc. It's easier to parse a list of tokens than a string, since you can skip auxiliary tokens (spaces, newlines, commments) during tokenizing and focus on the main ones. Tokens usually track their position in the text, which is helpful in parsing error messages.
19 | 
20 | 
21 | Tokenizing with `make_tokenizer()`
22 | ----------------------------------
23 | 
24 | One of the most common ways to define tokens and tokenizing rules is via regular expressions. `funcparserlib` comes with the module [`funcparserlib.lexer`](../api/lexer.md) for creating regexp-based tokenizers.
25 | 
26 | !!! Note
27 | 
28 |     Parsers defined with `funcparserlib` can work with _any_ tokens. You can plug your custom tokenizers and token types or even parse raw strings as lists of character tokens.
29 | 
30 |     In this guide we will use the _recommended_ way of writing tokenizers: `make_tokenizer()` from the `funcparserlib.lexer` module.
31 | 
32 | Let's identify token types in our numeric expressions language:
33 | 
34 | * Whitespace
35 |     * Spaces, tabs, newlines
36 | * Integer numbers
37 |     * `0`, `256`, `-42`, ...
38 | * Floating point numbers
39 |     * `3.1415`, `27.1828e-01`, ...
40 | * Operators
41 |     * `(`, `)`, `*`, `+`, `/`, `-`, `**`
42 | 
43 | We will define our token specs and pass them to `make_tokenizer()` to generate our tokenizer. We will also drop whitespace tokens from the result, since we don't need them.
44 | 
45 | Some imports first:
46 | 
47 | ```pycon
48 | >>> from typing import List
49 | >>> from funcparserlib.lexer import make_tokenizer, TokenSpec, Token
50 | 
51 | ```
52 | 
53 | The tokenizer itself:
54 | 
55 | ```pycon
56 | >>> def tokenize(s: str) -> List[Token]:
57 | ...     specs = [
58 | ...         TokenSpec("whitespace", r"\s+"),
59 | ...         TokenSpec("float", r"[+\-]?\d+\.\d*([Ee][+\-]?\d+)*"),
60 | ...         TokenSpec("int", r"[+\-]?\d+"),
61 | ...         TokenSpec("op", r"(\*\*)|[+\-*/()]"),
62 | ...     ]
63 | ...     tokenizer = make_tokenizer(specs)
64 | ...     return [t for t in tokenizer(s) if t.type != "whitespace"]
65 | 
66 | ```
67 | 
68 | !!! Warning
69 | 
70 |     Be careful with ordering your token specs and your regexps so that larger tokens come first before their smaller subparts. In our token specs:
71 | 
72 |     * _Float_ tokens should come before _int_ tokens
73 |     * `**` should come before `*`
74 | 
75 | Let's try our tokenizer:
76 | 
77 | ```pycon
78 | >>> tokenize("42 + 1337")
79 | [Token('int', '42'), Token('op', '+'), Token('int', '1337')]
80 | 
81 | ```
82 | 
83 | The `str()` form of the token shows its position in the input text, also available via `t.start` and `t.end`:
84 | 
85 | ```pycon
86 | >>> [str(t) for t in tokenize("42 + 1337")]
87 | ["1,1-1,2: int '42'", "1,4-1,4: op '+'", "1,6-1,9: int '1337'"]
88 | 
89 | ```
90 | 
91 | 
92 | Next
93 | ----
94 | 
95 | We have tokenized an numeric expression string into a list of tokens.
96 | 
97 | In [the next chapter](parsing.md) you will learn how to parse these tokens by defining a grammar for our numeric expressions language.
98 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
  1 | Funcparserlib
  2 | =============
  3 | 
  4 | Recursive descent parsing library for Python based on functional combinators.
  5 | 
  6 | [![PyPI](https://img.shields.io/pypi/v/funcparserlib)](https://pypi.org/project/funcparserlib/)
  7 | [![PyPI - Downloads](https://img.shields.io/pypi/dm/funcparserlib)](https://pypi.org/project/funcparserlib/)
  8 | 
  9 | 
 10 | Description
 11 | -----------
 12 | 
 13 | The primary focus of `funcparserlib` is **parsing little languages** or **external DSLs** (domain specific languages).
 14 | 
 15 | Parsers made with `funcparserlib` are pure-Python LL(\*) parsers. It means that it's **very easy to write parsers** without thinking about lookaheads and other hardcore parsing stuff. However, recursive descent parsing is a rather slow method compared to LL(k) or LR(k) algorithms. Still, parsing with `funcparserlib` is **at least twice faster than PyParsing**, a very popular library for Python.
 16 | 
 17 | The source code of `funcparserlib` is only 1.2K lines of code, with lots of comments. Its API is fully type hinted. It features the longest parsed prefix error reporting, as well as a tiny lexer generator for token position tracking.
 18 | 
 19 | The idea of parser combinators used in `funcparserlib` comes from the [Introduction to Functional Programming](https://www.cl.cam.ac.uk/teaching/Lectures/funprog-jrh-1996/) course. We have converted it from ML into Python.
 20 | 
 21 | 
 22 | Installation
 23 | ------------
 24 | 
 25 | You can install `funcparserlib` from [PyPI](https://pypi.org/project/funcparserlib/):
 26 | 
 27 | ```shell
 28 | $ pip install funcparserlib
 29 | ```
 30 | 
 31 | There are no dependencies on other libraries.
 32 | 
 33 | 
 34 | Documentation
 35 | -------------
 36 | 
 37 | * [Getting Started](https://funcparserlib.pirx.ru/getting-started/)
 38 |     * Your **starting point** with `funcparserlib`
 39 | * [API Reference](https://funcparserlib.pirx.ru/api/)
 40 |     * Learn the details of the API
 41 | 
 42 | There are several examples available in the `tests/` directory:
 43 | 
 44 | * [GraphViz DOT parser](https://github.com/vlasovskikh/funcparserlib/blob/master/tests/dot.py)
 45 | * [JSON parser](https://github.com/vlasovskikh/funcparserlib/blob/master/tests/json.py)
 46 | 
 47 | See also [the changelog](https://funcparserlib.pirx.ru/changes/).
 48 | 
 49 | 
 50 | Example
 51 | -------
 52 | 
 53 | Let's consider a little language of **numeric expressions** with a syntax similar to Python expressions. Here are some expression strings in this language:
 54 | 
 55 | ```
 56 | 0
 57 | 1 + 2 + 3
 58 | -1 + 2 ** 32
 59 | 3.1415926 * (2 + 7.18281828e-1) * 42
 60 | ```
 61 | 
 62 | 
 63 | Here is **the complete source code** of the tokenizer and the parser for this language written using `funcparserlib`:
 64 | 
 65 | ```python
 66 | from typing import List, Tuple, Union
 67 | from dataclasses import dataclass
 68 | 
 69 | from funcparserlib.lexer import make_tokenizer, TokenSpec, Token
 70 | from funcparserlib.parser import tok, Parser, many, forward_decl, finished
 71 | 
 72 | 
 73 | @dataclass
 74 | class BinaryExpr:
 75 |     op: str
 76 |     left: "Expr"
 77 |     right: "Expr"
 78 | 
 79 | 
 80 | Expr = Union[BinaryExpr, int, float]
 81 | 
 82 | 
 83 | def tokenize(s: str) -> List[Token]:
 84 |     specs = [
 85 |         TokenSpec("whitespace", r"\s+"),
 86 |         TokenSpec("float", r"[+\-]?\d+\.\d*([Ee][+\-]?\d+)*"),
 87 |         TokenSpec("int", r"[+\-]?\d+"),
 88 |         TokenSpec("op", r"(\*\*)|[+\-*/()]"),
 89 |     ]
 90 |     tokenizer = make_tokenizer(specs)
 91 |     return [t for t in tokenizer(s) if t.type != "whitespace"]
 92 | 
 93 | 
 94 | def parse(tokens: List[Token]) -> Expr:
 95 |     int_num = tok("int") >> int
 96 |     float_num = tok("float") >> float
 97 |     number = int_num | float_num
 98 | 
 99 |     expr: Parser[Token, Expr] = forward_decl()
100 |     parenthesized = -op("(") + expr + -op(")")
101 |     primary = number | parenthesized
102 |     power = primary + many(op("**") + primary) >> to_expr
103 |     term = power + many((op("*") | op("/")) + power) >> to_expr
104 |     sum = term + many((op("+") | op("-")) + term) >> to_expr
105 |     expr.define(sum)
106 | 
107 |     document = expr + -finished
108 | 
109 |     return document.parse(tokens)
110 | 
111 | 
112 | def op(name: str) -> Parser[Token, str]:
113 |     return tok("op", name)
114 | 
115 | 
116 | def to_expr(args: Tuple[Expr, List[Tuple[str, Expr]]]) -> Expr:
117 |     first, rest = args
118 |     result = first
119 |     for op, expr in rest:
120 |         result = BinaryExpr(op, result, expr)
121 |     return result
122 | ```
123 | 
124 | Now, consider this numeric expression: `3.1415926 * (2 + 7.18281828e-1) * 42`.
125 | 
126 | Let's `tokenize()` it using the tokenizer we've created with `funcparserlib.lexer`:
127 | 
128 | ```
129 | [
130 |     Token('float', '3.1415926'),
131 |     Token('op', '*'),
132 |     Token('op', '('),
133 |     Token('int', '2'),
134 |     Token('op', '+'),
135 |     Token('float', '7.18281828e-1'),
136 |     Token('op', ')'),
137 |     Token('op', '*'),
138 |     Token('int', '42'),
139 | ]
140 | ```
141 | 
142 | Let's `parse()` these tokens into an expression tree using our parser created with `funcparserlib.parser`:
143 | 
144 | ```
145 | BinaryExpr(
146 |     op='*',
147 |     left=BinaryExpr(
148 |         op='*',
149 |         left=3.1415926,
150 |         right=BinaryExpr(op='+', left=2, right=0.718281828),
151 |     ),
152 |     right=42,
153 | )
154 | ```
155 | 
156 | Learn how to write this parser using `funcparserlib` in the [Getting Started](https://funcparserlib.pirx.ru/getting-started/) guide!
157 | 
158 | 
159 | Used By
160 | -------
161 | 
162 | Some open-source projects that use `funcparserlib` as an explicit dependency:
163 | 
164 | * [Hy](https://github.com/hylang/hy), a Lisp dialect that's embedded in Python
165 |   * 4.7K stars, version `~=1.0`, Python 3.8+
166 | * [Splash](https://github.com/scrapinghub/splash), a JavaScript rendering service with HTTP API, by Scrapinghub
167 |   * 3.9K stars, version `*`. Python 3 in Docker
168 | * [graphite-beacon](https://github.com/klen/graphite-beacon), a simple alerting system for Graphite metrics
169 |   * 453 stars, version `==0.3.6`, Python 2 and 3
170 | * [blockdiag](https://github.com/blockdiag/blockdiag), generates block-diagram image file from spec-text file
171 |   * 194 stars, version `>= 1.0.0a0`, Python 3.7+
172 | * [kll](https://github.com/kiibohd/kll), Keyboard Layout Language (KLL) compiler
173 |   * 113 stars, copied source code, Python 3.5+
174 | 
175 | 
176 | Next
177 | ----
178 | 
179 | Read the [Getting Started](https://funcparserlib.pirx.ru/getting-started/) guide to start learning `funcparserlib`.
180 | 


--------------------------------------------------------------------------------
/docs/media/extra.css:
--------------------------------------------------------------------------------
1 | .doc-heading code {
2 |     font-weight: bold;
3 | }


--------------------------------------------------------------------------------
/funcparserlib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlasovskikh/funcparserlib/18c0a99dcdb427e35226c74b7cc2617223c8e1fa/funcparserlib/__init__.py


--------------------------------------------------------------------------------
/funcparserlib/lexer.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2009/2023 Andrey Vlasovskikh
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
  4 | # software and associated documentation files (the "Software"), to deal in the Software
  5 | # without restriction, including without limitation the rights to use, copy, modify,
  6 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
  7 | # permit persons to whom the Software is furnished to do so, subject to the following
  8 | # conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in all copies
 11 | # or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 14 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 15 | # PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 16 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 17 | # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
 18 | # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | 
 20 | __all__ = ["make_tokenizer", "TokenSpec", "Token", "LexerError"]
 21 | 
 22 | import re
 23 | from typing import Callable, Iterable, List, Tuple, Optional, Sequence, Pattern, Union
 24 | 
 25 | 
 26 | _Place = Tuple[int, int]
 27 | _Spec = Tuple[str, Tuple]
 28 | 
 29 | 
 30 | class LexerError(Exception):
 31 |     def __init__(self, place: _Place, msg: str) -> None:
 32 |         self.place = place
 33 |         self.msg = msg
 34 | 
 35 |     def __str__(self) -> str:
 36 |         s = "cannot tokenize data"
 37 |         line, pos = self.place
 38 |         return '%s: %d,%d: "%s"' % (s, line, pos, self.msg)
 39 | 
 40 | 
 41 | class TokenSpec:
 42 |     """A token specification for generating a lexer via `make_tokenizer()`."""
 43 | 
 44 |     def __init__(self, type: str, pattern: str, flags: int = 0) -> None:
 45 |         """Initialize a `TokenSpec` object.
 46 | 
 47 |         Parameters:
 48 |             type (str): User-defined type of the token (e.g. `"name"`, `"number"`,
 49 |                 `"operator"`)
 50 |             pattern (str): Regexp for matching this token type
 51 |             flags (int, optional): Regexp flags, the second argument of `re.compile()`
 52 |         """
 53 |         self.type = type
 54 |         self.pattern = pattern
 55 |         self.flags = flags
 56 | 
 57 |     def __repr__(self) -> str:
 58 |         return "TokenSpec(%r, %r, %r)" % (self.type, self.pattern, self.flags)
 59 | 
 60 | 
 61 | class Token:
 62 |     """A token object that represents a substring of certain type in your text.
 63 | 
 64 |     You can compare tokens for equality using the `==` operator. Tokens also define
 65 |     custom `repr()` and `str()`.
 66 | 
 67 |     Attributes:
 68 |         type (str): User-defined type of the token (e.g. `"name"`, `"number"`,
 69 |             `"operator"`)
 70 |         value (str): Text value of the token
 71 |         start (Optional[Tuple[int, int]]): Start position (_line_, _column_)
 72 |         end (Optional[Tuple[int, int]]): End position (_line_, _column_)
 73 |     """
 74 | 
 75 |     def __init__(
 76 |         self,
 77 |         type: str,
 78 |         value: str,
 79 |         start: Optional[_Place] = None,
 80 |         end: Optional[_Place] = None,
 81 |     ) -> None:
 82 |         """Initialize a `Token` object."""
 83 |         self.type = type
 84 |         self.value = value
 85 |         self.start = start
 86 |         self.end = end
 87 | 
 88 |     def __repr__(self) -> str:
 89 |         return "Token(%r, %r)" % (self.type, self.value)
 90 | 
 91 |     def __eq__(self, other: object) -> bool:
 92 |         # FIXME: Case sensitivity is assumed here
 93 |         if not isinstance(other, Token):
 94 |             return False
 95 |         else:
 96 |             return self.type == other.type and self.value == other.value
 97 | 
 98 |     def _pos_str(self) -> str:
 99 |         if self.start is None or self.end is None:
100 |             return ""
101 |         else:
102 |             sl, sp = self.start
103 |             el, ep = self.end
104 |             return "%d,%d-%d,%d:" % (sl, sp, el, ep)
105 | 
106 |     def __str__(self) -> str:
107 |         s = "%s %s '%s'" % (self._pos_str(), self.type, self.value)
108 |         return s.strip()
109 | 
110 |     @property
111 |     def name(self) -> str:
112 |         return self.value
113 | 
114 |     def pformat(self) -> str:
115 |         return "%s %s '%s'" % (
116 |             self._pos_str().ljust(20),  # noqa
117 |             self.type.ljust(14),
118 |             self.value,
119 |         )
120 | 
121 | 
122 | def make_tokenizer(
123 |     specs: Sequence[Union[TokenSpec, _Spec]],
124 | ) -> Callable[[str], Iterable[Token]]:
125 |     # noinspection GrazieInspection
126 |     """Make a function that tokenizes text based on the regexp specs.
127 | 
128 |     Type: `(Sequence[TokenSpec | Tuple]) -> Callable[[str], Iterable[Token]]`
129 | 
130 |     A token spec is `TokenSpec` instance.
131 | 
132 |     !!! Note
133 | 
134 |         For legacy reasons, a token spec may also be a tuple of (_type_, _args_), where
135 |         _type_ sets the value of `Token.type` for the token, and _args_ are the
136 |         positional arguments for `re.compile()`: either just (_pattern_,) or
137 |         (_pattern_, _flags_).
138 | 
139 |     It returns a tokenizer function that takes a string and returns an iterable of
140 |     `Token` objects, or raises `LexerError` if it cannot tokenize the string according
141 |     to its token specs.
142 | 
143 |     Examples:
144 | 
145 |     ```pycon
146 |     >>> tokenize = make_tokenizer([
147 |     ...     TokenSpec("space", r"\\s+"),
148 |     ...     TokenSpec("id", r"\\w+"),
149 |     ...     TokenSpec("op", r"[,!]"),
150 |     ... ])
151 |     >>> text = "Hello, World!"
152 |     >>> [t for t in tokenize(text) if t.type != "space"]  # noqa
153 |     [Token('id', 'Hello'), Token('op', ','), Token('id', 'World'), Token('op', '!')]
154 |     >>> text = "Bye?"
155 |     >>> list(tokenize(text))
156 |     Traceback (most recent call last):
157 |         ...
158 |     lexer.LexerError: cannot tokenize data: 1,4: "Bye?"
159 | 
160 |     ```
161 |     """
162 |     compiled: List[Tuple[str, Pattern[str]]] = []
163 |     for spec in specs:
164 |         if isinstance(spec, TokenSpec):
165 |             c = spec.type, re.compile(spec.pattern, spec.flags)
166 |         else:
167 |             name, args = spec
168 |             c = name, re.compile(*args)
169 |         compiled.append(c)
170 | 
171 |     def match_specs(s: str, i: int, position: Tuple[int, int]) -> Token:
172 |         line, pos = position
173 |         for type, regexp in compiled:
174 |             m = regexp.match(s, i)
175 |             if m is not None:
176 |                 value = m.group()
177 |                 nls = value.count("\n")
178 |                 n_line = line + nls
179 |                 if nls == 0:
180 |                     n_pos = pos + len(value)
181 |                 else:
182 |                     n_pos = len(value) - value.rfind("\n") - 1
183 |                 return Token(type, value, (line, pos + 1), (n_line, n_pos))
184 |         else:
185 |             err_line = s.splitlines()[line - 1]
186 |             raise LexerError((line, pos + 1), err_line)
187 | 
188 |     def f(s: str) -> Iterable[Token]:
189 |         length = len(s)
190 |         line, pos = 1, 0
191 |         i = 0
192 |         while i < length:
193 |             t = match_specs(s, i, (line, pos))
194 |             yield t
195 |             if t.end is None:
196 |                 raise ValueError("Token %r has no end specified", (t,))
197 |             line, pos = t.end
198 |             i += len(t.value)
199 | 
200 |     return f
201 | 
202 | 
203 | # This is an example of token specs. See also [this article][1] for a
204 | # discussion of searching for multiline comments using regexps (including `*?`).
205 | #
206 | #   [1]: http://ostermiller.org/findcomment.html
207 | _example_token_specs = [
208 |     TokenSpec("COMMENT", r"\(\*(.|[\r\n])*?\*\)", re.MULTILINE),
209 |     TokenSpec("COMMENT", r"\{(.|[\r\n])*?\}", re.MULTILINE),
210 |     TokenSpec("COMMENT", r"//.*"),
211 |     TokenSpec("NL", r"[\r\n]+"),
212 |     TokenSpec("SPACE", r"[ \t\r\n]+"),
213 |     TokenSpec("NAME", r"[A-Za-z_][A-Za-z_0-9]*"),
214 |     TokenSpec("REAL", r"[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*"),
215 |     TokenSpec("INT", r"[0-9]+"),
216 |     TokenSpec("INT", r"\$[0-9A-Fa-f]+"),
217 |     TokenSpec("OP", r"(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/@\^]"),
218 |     TokenSpec("STRING", r"'([^']|(''))*'"),
219 |     TokenSpec("CHAR", r"#[0-9]+"),
220 |     TokenSpec("CHAR", r"#\$[0-9A-Fa-f]+"),
221 | ]
222 | # tokenize = make_tokenizer(_example_token_specs)
223 | 


--------------------------------------------------------------------------------
/funcparserlib/parser.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2009/2023 Andrey Vlasovskikh
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
  4 | # software and associated documentation files (the "Software"), to deal in the Software
  5 | # without restriction, including without limitation the rights to use, copy, modify,
  6 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
  7 | # permit persons to whom the Software is furnished to do so, subject to the following
  8 | # conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in all copies
 11 | # or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 14 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 15 | # PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 16 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 17 | # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
 18 | # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | 
 20 | """Functional parsing combinators.
 21 | 
 22 | Parsing combinators define an internal domain-specific language (DSL) for describing
 23 | the parsing rules of a grammar. The DSL allows you to start with a few primitive
 24 | parsers, then combine your parsers to get more complex ones, and finally cover
 25 | the whole grammar you want to parse.
 26 | 
 27 | The structure of the language:
 28 | 
 29 | * Class `Parser`
 30 |     * All the primitives and combinators of the language return `Parser` objects
 31 |     * It defines the main `Parser.parse(tokens)` method
 32 | * Primitive parsers
 33 |     * `tok(type, value)`, `a(value)`, `some(pred)`, `forward_decl()`, `finished`
 34 | * Parser combinators
 35 |     * `p1 + p2`, `p1 | p2`, `p >> f`, `-p`, `maybe(p)`, `many(p)`, `oneplus(p)`,
 36 |       `skip(p)`
 37 | * Abstraction
 38 |     * Use regular Python variables `p = ...  # Expression of type Parser` to define new
 39 |       rules (non-terminals) of your grammar
 40 | 
 41 | Every time you apply one of the combinators, you get a new `Parser` object. In other
 42 | words, the set of `Parser` objects is closed under the means of combination.
 43 | 
 44 | !!! Note
 45 | 
 46 |     We took the parsing combinators language from the book [Introduction to Functional
 47 |     Programming][1] and translated it from ML into Python.
 48 | 
 49 |   [1]: https://www.cl.cam.ac.uk/teaching/Lectures/funprog-jrh-1996/
 50 | """
 51 | 
 52 | __all__ = [
 53 |     "some",
 54 |     "a",
 55 |     "tok",
 56 |     "many",
 57 |     "pure",
 58 |     "finished",
 59 |     "maybe",
 60 |     "skip",
 61 |     "oneplus",
 62 |     "forward_decl",
 63 |     "NoParseError",
 64 |     "Parser",
 65 | ]
 66 | 
 67 | import logging
 68 | import warnings
 69 | from typing import (
 70 |     Any,
 71 |     Callable,
 72 |     Generic,
 73 |     List,
 74 |     Optional,
 75 |     Sequence,
 76 |     Tuple,
 77 |     TypeVar,
 78 |     Union,
 79 |     cast,
 80 |     overload,
 81 | )
 82 | 
 83 | from funcparserlib.lexer import Token
 84 | 
 85 | log = logging.getLogger("funcparserlib")
 86 | 
 87 | debug = False
 88 | 
 89 | _A = TypeVar("_A")
 90 | _B = TypeVar("_B")
 91 | _C = TypeVar("_C")
 92 | 
 93 | 
 94 | class Parser(Generic[_A, _B]):
 95 |     """A parser object that can parse a sequence of tokens or can be combined with
 96 |     other parsers using `+`, `|`, `>>`, `many()`, and other parsing combinators.
 97 | 
 98 |     Type: `Parser[A, B]`
 99 | 
100 |     The generic variables in the type are: `A` — the type of the tokens in the
101 |     sequence to parse,`B` — the type of the parsed value.
102 | 
103 |     In order to define a parser for your grammar:
104 | 
105 |     1. You start with primitive parsers by calling `a(value)`, `some(pred)`,
106 |        `forward_decl()`, `finished`
107 |     2. You use parsing combinators `p1 + p2`, `p1 | p2`, `p >> f`, `many(p)`, and
108 |        others to combine parsers into a more complex parser
109 |     3. You can assign complex parsers to variables to define names that correspond to
110 |        the rules of your grammar
111 | 
112 |     !!! Note
113 | 
114 |         The constructor `Parser.__init__()` is considered **internal** and may be
115 |         changed in future versions. Use primitive parsers and parsing combinators to
116 |         construct new parsers.
117 |     """
118 | 
119 |     def __init__(
120 |         self,
121 |         p: Union[
122 |             "Parser[_A, _B]",
123 |             Callable[[Sequence[_A], "State"], Tuple[_B, "State"]],
124 |         ],
125 |     ) -> None:
126 |         """Wrap the parser function `p` into a `Parser` object."""
127 |         self.name = ""
128 |         self.define(p)
129 | 
130 |     def named(self, name: str) -> "Parser[_A, _B]":
131 |         # noinspection GrazieInspection
132 |         """Specify the name of the parser for easier debugging.
133 | 
134 |         Type: `(str) -> Parser[A, B]`
135 | 
136 |         This name is used in the debug-level parsing log. You can also get it via the
137 |         `Parser.name` attribute.
138 | 
139 |         Examples:
140 | 
141 |         ```pycon
142 |         >>> expr = (a("x") + a("y")).named("expr")
143 |         >>> expr.name
144 |         'expr'
145 | 
146 |         ```
147 | 
148 |         ```pycon
149 |         >>> expr = a("x") + a("y")
150 |         >>> expr.name
151 |         "('x', 'y')"
152 | 
153 |         ```
154 | 
155 |         !!! Note
156 | 
157 |             You can enable the parsing log this way:
158 | 
159 |             ```python
160 |             import logging
161 |             logging.basicConfig(level=logging.DEBUG)
162 |             import funcparserlib.parser
163 |             funcparserlib.parser.debug = True
164 |             ```
165 | 
166 |             The way to enable the parsing log may be changed in future versions.
167 |         """
168 |         self.name = name
169 |         return self
170 | 
171 |     def define(
172 |         self,
173 |         p: Union[
174 |             "Parser[_A, _B]",
175 |             Callable[[Sequence[_A], "State"], Tuple[_B, "State"]],
176 |         ],
177 |     ) -> None:
178 |         """Define the parser created earlier as a forward declaration.
179 | 
180 |         Type: `(Parser[A, B]) -> None`
181 | 
182 |         Use `p = forward_decl()` in combination with `p.define(...)` to define
183 |         recursive parsers.
184 | 
185 |         See the examples in the docs for `forward_decl()`.
186 |         """
187 |         f = getattr(p, "run", p)
188 |         if debug:
189 |             setattr(self, "_run", f)
190 |         else:
191 |             setattr(self, "run", f)
192 |         name = getattr(p, "name", p.__doc__)
193 |         if name is not None:
194 |             self.named(name)
195 | 
196 |     def run(self, tokens: Sequence[_A], s: "State") -> Tuple[_B, "State"]:
197 |         """Run the parser against the tokens with the specified parsing state.
198 | 
199 |         Type: `(Sequence[A], State) -> Tuple[B, State]`
200 | 
201 |         The parsing state includes the current position in the sequence being parsed,
202 |         and the position of the rightmost token that has been consumed while parsing for
203 |         better error messages.
204 | 
205 |         If the parser fails to parse the tokens, it raises `NoParseError`.
206 | 
207 |         !!! Warning
208 | 
209 |             This is method is **internal** and may be changed in future versions. Use
210 |             `Parser.parse(tokens)` instead and let the parser object take care of
211 |             updating the parsing state.
212 |         """
213 |         if debug:
214 |             log.debug("trying %s" % self.name)
215 |         return self._run(tokens, s)
216 | 
217 |     def _run(self, tokens: Sequence[_A], s: "State") -> Tuple[_B, "State"]:
218 |         raise NotImplementedError("you must define() a parser")
219 | 
220 |     def parse(self, tokens: Sequence[_A]) -> _B:
221 |         """Parse the sequence of tokens and return the parsed value.
222 | 
223 |         Type: `(Sequence[A]) -> B`
224 | 
225 |         It takes a sequence of tokens of arbitrary type `A` and returns the parsed value
226 |         of arbitrary type `B`.
227 | 
228 |         If the parser fails to parse the tokens, it raises `NoParseError`.
229 | 
230 |         !!! Note
231 | 
232 |             Although `Parser.parse()` can parse sequences of any objects (including
233 |             `str` which is a sequence of `str` chars), **the recommended way** is
234 |             parsing sequences of `Token` objects.
235 | 
236 |             You **should** use a regexp-based tokenizer `make_tokenizer()` defined in
237 |             `funcparserlib.lexer` to convert your text into a sequence of `Token`
238 |             objects before parsing it. You will get more readable parsing error messages
239 |             (as `Token` objects contain their position in the source file) and good
240 |             separation of the lexical and syntactic levels of the grammar.
241 |         """
242 |         try:
243 |             (tree, _) = self.run(tokens, State(0, 0, None))
244 |             return tree
245 |         except NoParseError as e:
246 |             max = e.state.max
247 |             if len(tokens) > max:
248 |                 t = tokens[max]
249 |                 if isinstance(t, Token):
250 |                     if t.start is None or t.end is None:
251 |                         loc = ""
252 |                     else:
253 |                         s_line, s_pos = t.start
254 |                         e_line, e_pos = t.end
255 |                         loc = "%d,%d-%d,%d: " % (s_line, s_pos, e_line, e_pos)
256 |                     msg = "%s%s: %r" % (loc, e.msg, t.value)
257 |                 elif isinstance(t, str):
258 |                     msg = "%s: %r" % (e.msg, t)
259 |                 else:
260 |                     msg = "%s: %s" % (e.msg, t)
261 |             else:
262 |                 msg = "got unexpected end of input"
263 |             e_parser = e.state.parser
264 |             if isinstance(e_parser, Parser):
265 |                 msg = "%s, expected: %s" % (msg, e_parser.name)
266 |             e.msg = msg
267 |             raise
268 | 
269 |     @overload
270 |     def __add__(  # type: ignore[misc]
271 |         self, other: "_IgnoredParser[_A]"
272 |     ) -> "Parser[_A, _B]":
273 |         pass
274 | 
275 |     @overload
276 |     def __add__(self, other: "Parser[_A, _C]") -> "_TupleParser[_A, Tuple[_B, _C]]":
277 |         pass
278 | 
279 |     def __add__(
280 |         self,
281 |         other: Union["_IgnoredParser[_A]", "Parser[_A, _C]"],
282 |     ) -> Union["Parser[_A, _B]", "_TupleParser[_A, Tuple[_B, _C]]"]:
283 |         """Sequential combination of parsers. It runs this parser, then the other
284 |         parser.
285 | 
286 |         The return value of the resulting parser is a tuple of each parsed value in
287 |         the sum of parsers. We merge all parsing results of `p1 + p2 + ... + pN` into a
288 |         single tuple. It means that the parsing result may be a 2-tuple, a 3-tuple,
289 |         a 4-tuple, etc. of parsed values. You avoid this by transforming the parsed
290 |         pair into a new value using the `>>` combinator.
291 | 
292 |         You can also skip some parsing results in the resulting parsers by using `-p`
293 |         or `skip(p)` for some parsers in your sum of parsers. It means that the parsing
294 |         result might be a single value, not a tuple of parsed values. See the docs
295 |         for `Parser.__neg__()` for more examples.
296 | 
297 |         Overloaded types (lots of them to provide stricter checking for the quite
298 |         dynamic return type of this method):
299 | 
300 |         * `(self: Parser[A, B], _IgnoredParser[A]) -> Parser[A, B]`
301 |         * `(self: Parser[A, B], Parser[A, C]) -> _TupleParser[A, Tuple[B, C]]`
302 |         * `(self: _TupleParser[A, B], _IgnoredParser[A]) -> _TupleParser[A, B]`
303 |         * `(self: _TupleParser[A, B], Parser[A, Any]) -> Parser[A, Any]`
304 |         * `(self: _IgnoredParser[A], _IgnoredParser[A]) -> _IgnoredParser[A]`
305 |         * `(self: _IgnoredParser[A], Parser[A, C]) -> Parser[A, C]`
306 | 
307 |         Examples:
308 | 
309 |         ```pycon
310 |         >>> expr = a("x") + a("y")
311 |         >>> expr.parse("xy")
312 |         ('x', 'y')
313 | 
314 |         ```
315 | 
316 |         ```pycon
317 |         >>> expr = a("x") + a("y") + a("z")
318 |         >>> expr.parse("xyz")
319 |         ('x', 'y', 'z')
320 | 
321 |         ```
322 | 
323 |         ```pycon
324 |         >>> expr = a("x") + a("y")
325 |         >>> expr.parse("xz")
326 |         Traceback (most recent call last):
327 |             ...
328 |         parser.NoParseError: got unexpected token: 'z', expected: 'y'
329 | 
330 |         ```
331 |         """
332 | 
333 |         def magic(v1: Any, v2: Any) -> _Tuple:
334 |             if isinstance(v1, _Tuple):
335 |                 return _Tuple(v1 + (v2,))
336 |             else:
337 |                 return _Tuple((v1, v2))
338 | 
339 |         @_TupleParser
340 |         def _add(tokens: Sequence[_A], s: State) -> Tuple[Tuple[_B, _C], State]:
341 |             (v1, s2) = self.run(tokens, s)
342 |             (v2, s3) = other.run(tokens, s2)
343 |             return cast(Tuple[_B, _C], magic(v1, v2)), s3
344 | 
345 |         @Parser
346 |         def ignored_right(tokens: Sequence[_A], s: State) -> Tuple[_B, State]:
347 |             v, s2 = self.run(tokens, s)
348 |             _, s3 = other.run(tokens, s2)
349 |             return v, s3
350 | 
351 |         name = "(%s, %s)" % (self.name, other.name)
352 |         if isinstance(other, _IgnoredParser):
353 |             return ignored_right.named(name)
354 |         else:
355 |             _add.name = name
356 |             return _add
357 | 
358 |     def __or__(self, other: "Parser[_A, _C]") -> "Parser[_A, Union[_B, _C]]":
359 |         """Choice combination of parsers.
360 | 
361 |         It runs this parser and returns its result. If the parser fails, it runs the
362 |         other parser.
363 | 
364 |         Examples:
365 | 
366 |         ```pycon
367 |         >>> expr = a("x") | a("y")
368 |         >>> expr.parse("x")
369 |         'x'
370 |         >>> expr.parse("y")
371 |         'y'
372 |         >>> expr.parse("z")
373 |         Traceback (most recent call last):
374 |             ...
375 |         parser.NoParseError: got unexpected token: 'z', expected: 'x' or 'y'
376 | 
377 |         ```
378 |         """
379 | 
380 |         @Parser
381 |         def _or(tokens: Sequence[_A], s: State) -> Tuple[Union[_B, _C], State]:
382 |             try:
383 |                 return self.run(tokens, s)
384 |             except NoParseError as e:
385 |                 state = e.state
386 |             try:
387 |                 return other.run(tokens, State(s.pos, state.max, state.parser))
388 |             except NoParseError as e:
389 |                 if s.pos == e.state.max:
390 |                     e.state = State(e.state.pos, e.state.max, _or)
391 |                 raise
392 | 
393 |         _or.name = "%s or %s" % (self.name, other.name)
394 |         return _or
395 | 
396 |     def __rshift__(self, f: Callable[[_B], _C]) -> "Parser[_A, _C]":
397 |         """Transform the parsing result by applying the specified function.
398 | 
399 |         Type: `(Callable[[B], C]) -> Parser[A, C]`
400 | 
401 |         You can use it for transforming the parsed value into another value before
402 |         including it into the parse tree (the AST).
403 | 
404 |         Examples:
405 | 
406 |         ```pycon
407 |         >>> def make_canonical_name(s):
408 |         ...     return s.lower()
409 |         >>> expr = (a("D") | a("d")) >> make_canonical_name
410 |         >>> expr.parse("D")
411 |         'd'
412 |         >>> expr.parse("d")
413 |         'd'
414 | 
415 |         ```
416 |         """
417 | 
418 |         @Parser
419 |         def _shift(tokens: Sequence[_A], s: State) -> Tuple[_C, State]:
420 |             (v, s2) = self.run(tokens, s)
421 |             return f(v), s2
422 | 
423 |         return _shift.named(self.name)
424 | 
425 |     def bind(self, f: Callable[[_B], "Parser[_A, _C]"]) -> "Parser[_A, _C]":
426 |         """Bind the parser to a monadic function that returns a new parser.
427 | 
428 |         Type: `(Callable[[B], Parser[A, C]]) -> Parser[A, C]`
429 | 
430 |         Also known as `>>=` in Haskell.
431 | 
432 |         !!! Note
433 | 
434 |             You can parse any context-free grammar without resorting to `bind`. Due
435 |             to its poor performance please use it only when you really need it.
436 |         """
437 | 
438 |         @Parser
439 |         def _bind(tokens: Sequence[_A], s: State) -> Tuple[_C, State]:
440 |             (v, s2) = self.run(tokens, s)
441 |             return f(v).run(tokens, s2)
442 | 
443 |         _bind.name = "(%s >>=)" % (self.name,)
444 |         return _bind
445 | 
446 |     def __neg__(self) -> "_IgnoredParser[_A]":
447 |         """Return a parser that parses the same tokens, but its parsing result is
448 |         ignored by the sequential `+` combinator.
449 | 
450 |         Type: `(Parser[A, B]) -> _IgnoredParser[A]`
451 | 
452 |         You can use it for throwing away elements of concrete syntax (e.g. `","`,
453 |         `";"`).
454 | 
455 |         Examples:
456 | 
457 |         ```pycon
458 |         >>> expr = -a("x") + a("y")
459 |         >>> expr.parse("xy")
460 |         'y'
461 | 
462 |         ```
463 | 
464 |         ```pycon
465 |         >>> expr = a("x") + -a("y")
466 |         >>> expr.parse("xy")
467 |         'x'
468 | 
469 |         ```
470 | 
471 |         ```pycon
472 |         >>> expr = a("x") + -a("y") + a("z")
473 |         >>> expr.parse("xyz")
474 |         ('x', 'z')
475 | 
476 |         ```
477 | 
478 |         ```pycon
479 |         >>> expr = -a("x") + a("y") + -a("z")
480 |         >>> expr.parse("xyz")
481 |         'y'
482 | 
483 |         ```
484 | 
485 |         ```pycon
486 |         >>> expr = -a("x") + a("y")
487 |         >>> expr.parse("yz")
488 |         Traceback (most recent call last):
489 |             ...
490 |         parser.NoParseError: got unexpected token: 'y', expected: 'x'
491 | 
492 |         ```
493 | 
494 |         ```pycon
495 |         >>> expr = a("x") + -a("y")
496 |         >>> expr.parse("xz")
497 |         Traceback (most recent call last):
498 |             ...
499 |         parser.NoParseError: got unexpected token: 'z', expected: 'y'
500 | 
501 |         ```
502 | 
503 |         !!! Note
504 | 
505 |             You **should not** pass the resulting parser to any combinators other than
506 |             `+`. You **should** have at least one non-skipped value in your
507 |             `p1 + p2 + ... + pN`. The parsed value of `-p` is an **internal** `_Ignored`
508 |             object, not intended for actual use.
509 |         """
510 |         return _IgnoredParser(self)
511 | 
512 | 
513 | class State:
514 |     """Parsing state that is maintained basically for error reporting.
515 | 
516 |     It consists of the current position `pos` in the sequence being parsed, and the
517 |     position `max` of the rightmost token that has been consumed while parsing.
518 |     """
519 | 
520 |     def __init__(
521 |         self,
522 |         pos: int,
523 |         max: int,
524 |         parser: Union[
525 |             Parser,
526 |             Callable[[Any, "State"], Tuple[Any, "State"]],
527 |             None,
528 |         ] = None,
529 |     ) -> None:
530 |         self.pos = pos
531 |         self.max = max
532 |         self.parser = parser
533 | 
534 |     def __str__(self) -> str:
535 |         return str((self.pos, self.max))
536 | 
537 |     def __repr__(self) -> str:
538 |         return "State(%r, %r)" % (self.pos, self.max)
539 | 
540 | 
541 | class NoParseError(Exception):
542 |     def __init__(self, msg: str, state: State) -> None:
543 |         self.msg = msg
544 |         self.state = state
545 | 
546 |     def __str__(self) -> str:
547 |         return self.msg
548 | 
549 | 
550 | class _Tuple(tuple):
551 |     pass
552 | 
553 | 
554 | class _TupleParser(Parser[_A, _B], Generic[_A, _B]):
555 |     @overload  # type: ignore[override]
556 |     def __add__(self, other: "_IgnoredParser[_A]") -> "_TupleParser[_A, _B]":
557 |         pass
558 | 
559 |     @overload
560 |     def __add__(self, other: Parser[_A, Any]) -> Parser[_A, Any]:
561 |         pass
562 | 
563 |     def __add__(
564 |         self, other: Union["_IgnoredParser[_A]", Parser[_A, Any]]
565 |     ) -> Union["_TupleParser[_A, _B]", Parser[_A, Any]]:
566 |         return super().__add__(other)
567 | 
568 | 
569 | class _Ignored:
570 |     def __init__(self, value: Any) -> None:
571 |         self.value = value
572 | 
573 |     def __repr__(self) -> str:
574 |         return "_Ignored(%s)" % repr(self.value)
575 | 
576 |     def __eq__(self, other: object) -> bool:
577 |         return isinstance(other, _Ignored) and self.value == other.value
578 | 
579 | 
580 | @Parser
581 | def finished(tokens: Sequence[Any], s: State) -> Tuple[None, State]:
582 |     """A parser that throws an exception if there are any unparsed tokens left in the
583 |     sequence."""
584 |     if s.pos >= len(tokens):
585 |         return None, s
586 |     else:
587 |         s2 = State(s.pos, s.max, finished if s.pos == s.max else s.parser)
588 |         raise NoParseError("got unexpected token", s2)
589 | 
590 | 
591 | finished.name = "end of input"
592 | 
593 | 
594 | def many(p: Parser[_A, _B]) -> Parser[_A, List[_B]]:
595 |     """Return a parser that applies the parser `p` as many times as it succeeds at
596 |     parsing the tokens.
597 | 
598 |     Return a parser that infinitely applies the parser `p` to the input sequence
599 |     of tokens as long as it successfully parses them. The parsed value is a list of
600 |     the sequentially parsed values.
601 | 
602 |     Examples:
603 | 
604 |     ```pycon
605 |     >>> expr = many(a("x"))
606 |     >>> expr.parse("x")
607 |     ['x']
608 |     >>> expr.parse("xx")
609 |     ['x', 'x']
610 |     >>> expr.parse("xxxy")  # noqa
611 |     ['x', 'x', 'x']
612 |     >>> expr.parse("y")
613 |     []
614 | 
615 |     ```
616 |     """
617 | 
618 |     @Parser
619 |     def _many(tokens: Sequence[_A], s: State) -> Tuple[List[_B], State]:
620 |         res = []
621 |         try:
622 |             while True:
623 |                 (v, s) = p.run(tokens, s)
624 |                 res.append(v)
625 |         except NoParseError as e:
626 |             s2 = State(s.pos, e.state.max, e.state.parser)
627 |             if debug:
628 |                 log.debug(
629 |                     "*matched* %d instances of %s, new state = %s"
630 |                     % (len(res), _many.name, s2)
631 |                 )
632 |             return res, s2
633 | 
634 |     _many.name = "{ %s }" % p.name
635 |     return _many
636 | 
637 | 
638 | def some(pred: Callable[[_A], bool]) -> Parser[_A, _A]:
639 |     """Return a parser that parses a token if it satisfies the predicate `pred`.
640 | 
641 |     Type: `(Callable[[A], bool]) -> Parser[A, A]`
642 | 
643 |     Examples:
644 | 
645 |     ```pycon
646 |     >>> expr = some(lambda s: s.isalpha()).named('alpha')
647 |     >>> expr.parse("x")
648 |     'x'
649 |     >>> expr.parse("y")
650 |     'y'
651 |     >>> expr.parse("1")
652 |     Traceback (most recent call last):
653 |         ...
654 |     parser.NoParseError: got unexpected token: '1', expected: alpha
655 | 
656 |     ```
657 | 
658 |     !!! Warning
659 | 
660 |         The `some()` combinator is quite slow and may be changed or removed in future
661 |         versions. If you need a parser for a token by its type (e.g. any identifier)
662 |         and maybe its value, use `tok(type[, value])` instead. You should use
663 |         `make_tokenizer()` from `funcparserlib.lexer` to tokenize your text first.
664 |     """
665 | 
666 |     @Parser
667 |     def _some(tokens: Sequence[_A], s: State) -> Tuple[_A, State]:
668 |         if s.pos >= len(tokens):
669 |             s2 = State(s.pos, s.max, _some if s.pos == s.max else s.parser)
670 |             raise NoParseError("got unexpected end of input", s2)
671 |         else:
672 |             t = tokens[s.pos]
673 |             if pred(t):
674 |                 pos = s.pos + 1
675 |                 s2 = State(pos, max(pos, s.max), s.parser)
676 |                 if debug:
677 |                     log.debug("*matched* %r, new state = %s" % (t, s2))
678 |                 return t, s2
679 |             else:
680 |                 s2 = State(s.pos, s.max, _some if s.pos == s.max else s.parser)
681 |                 if debug and isinstance(s2.parser, Parser):
682 |                     log.debug(
683 |                         "failed %r, state = %s, expected = %s" % (t, s2, s2.parser.name)
684 |                     )
685 |                 raise NoParseError("got unexpected token", s2)
686 | 
687 |     _some.name = "some(...)"
688 |     return _some
689 | 
690 | 
691 | def a(value: _A) -> Parser[_A, _A]:
692 |     """Return a parser that parses a token if it's equal to `value`.
693 | 
694 |     Type: `(A) -> Parser[A, A]`
695 | 
696 |     Examples:
697 | 
698 |     ```pycon
699 |     >>> expr = a("x")
700 |     >>> expr.parse("x")
701 |     'x'
702 |     >>> expr.parse("y")
703 |     Traceback (most recent call last):
704 |         ...
705 |     parser.NoParseError: got unexpected token: 'y', expected: 'x'
706 | 
707 |     ```
708 | 
709 |     !!! Note
710 | 
711 |         Although `Parser.parse()` can parse sequences of any objects (including
712 |         `str` which is a sequence of `str` chars), **the recommended way** is
713 |         parsing sequences of `Token` objects.
714 | 
715 |         You **should** use a regexp-based tokenizer `make_tokenizer()` defined in
716 |         `funcparserlib.lexer` to convert your text into a sequence of `Token` objects
717 |         before parsing it. You will get more readable parsing error messages (as `Token`
718 |         objects contain their position in the source file) and good separation of the
719 |         lexical and syntactic levels of the grammar.
720 |     """
721 |     name = getattr(value, "name", value)
722 | 
723 |     def eq_value(t: _A) -> bool:
724 |         return t == value
725 | 
726 |     return some(eq_value).named(repr(name))
727 | 
728 | 
729 | def tok(type: str, value: Optional[str] = None) -> Parser[Token, str]:
730 |     """Return a parser that parses a `Token` and returns the string value of the token.
731 | 
732 |     Type: `(str, Optional[str]) -> Parser[Token, str]`
733 | 
734 |     You can match any token of the specified `type` or you can match a specific token by
735 |     its `type` and `value`.
736 | 
737 |     Examples:
738 | 
739 |     ```pycon
740 |     >>> expr = tok("expr")
741 |     >>> expr.parse([Token("expr", "foo")])
742 |     'foo'
743 |     >>> expr.parse([Token("expr", "bar")])
744 |     'bar'
745 |     >>> expr.parse([Token("op", "=")])
746 |     Traceback (most recent call last):
747 |         ...
748 |     parser.NoParseError: got unexpected token: '=', expected: expr
749 | 
750 |     ```
751 | 
752 |     ```pycon
753 |     >>> expr = tok("op", "=")
754 |     >>> expr.parse([Token("op", "=")])
755 |     '='
756 |     >>> expr.parse([Token("op", "+")])
757 |     Traceback (most recent call last):
758 |         ...
759 |     parser.NoParseError: got unexpected token: '+', expected: '='
760 | 
761 |     ```
762 | 
763 |     !!! Note
764 | 
765 |         In order to convert your text to parse into a sequence of `Token` objects,
766 |         use a regexp-based tokenizer `make_tokenizer()` defined in
767 |         `funcparserlib.lexer`. You will get more readable parsing error messages (as
768 |         `Token` objects contain their position in the source file) and good separation
769 |         of the lexical and syntactic levels of the grammar.
770 |     """
771 | 
772 |     def eq_type(t: Token) -> bool:
773 |         return t.type == type
774 | 
775 |     if value is not None:
776 |         p = a(Token(type, value))
777 |     else:
778 |         p = some(eq_type).named(type)
779 |     return (p >> (lambda t: t.value)).named(p.name)
780 | 
781 | 
782 | def pure(x: _A) -> Parser[Any, _A]:
783 |     """Wrap any object into a parser.
784 | 
785 |     Type: `(A) -> Parser[A, A]`
786 | 
787 |     A pure parser doesn't touch the tokens sequence, it just returns its pure `x`
788 |     value.
789 | 
790 |     Also known as `return` in Haskell.
791 |     """
792 | 
793 |     @Parser
794 |     def _pure(_: Sequence[Any], s: State) -> Tuple[_A, State]:
795 |         return x, s
796 | 
797 |     _pure.name = "(pure %r)" % (x,)
798 |     return _pure
799 | 
800 | 
801 | def maybe(p: Parser[_A, _B]) -> Parser[_A, Optional[_B]]:
802 |     """Return a parser that returns `None` if the parser `p` fails.
803 | 
804 |     Examples:
805 | 
806 |     ```pycon
807 |     >>> expr = maybe(a("x"))
808 |     >>> expr.parse("x")
809 |     'x'
810 |     >>> expr.parse("y") is None
811 |     True
812 | 
813 |     ```
814 |     """
815 |     return (p | pure(None)).named("[ %s ]" % (p.name,))
816 | 
817 | 
818 | def skip(p: Parser[_A, Any]) -> "_IgnoredParser[_A]":
819 |     """An alias for `-p`.
820 | 
821 |     See also the docs for `Parser.__neg__()`.
822 |     """
823 |     return -p
824 | 
825 | 
826 | class _IgnoredParser(Parser[_A, Any]):
827 |     def __init__(
828 |         self,
829 |         p: Union[
830 |             Parser[_A, Any],
831 |             Callable[[Sequence[_A], "State"], Tuple[Any, "State"]],
832 |         ],
833 |     ) -> None:
834 |         super(_IgnoredParser, self).__init__(p)
835 |         run = self._run if debug else self.run
836 | 
837 |         def ignored(tokens: Sequence[_A], s: State) -> Tuple[Any, State]:
838 |             v, s2 = run(tokens, s)
839 |             return v if isinstance(v, _Ignored) else _Ignored(v), s2
840 | 
841 |         self.define(ignored)
842 |         name = getattr(p, "name", p.__doc__)
843 |         if name is not None:
844 |             self.name = name
845 | 
846 |     @overload  # type: ignore[override]
847 |     def __add__(self, other: "_IgnoredParser[_A]") -> "_IgnoredParser[_A]":
848 |         pass
849 | 
850 |     @overload
851 |     def __add__(self, other: Parser[_A, _C]) -> Parser[_A, _C]:
852 |         pass
853 | 
854 |     def __add__(
855 |         self, other: Union["_IgnoredParser[_A]", Parser[_A, _C]]
856 |     ) -> Union["_IgnoredParser[_A]", Parser[_A, _C]]:
857 |         if isinstance(other, _IgnoredParser):
858 | 
859 |             @_IgnoredParser
860 |             def ip(tokens: Sequence[_A], s: State) -> Tuple[Any, State]:
861 |                 _, s2 = self.run(tokens, s)
862 |                 v, s3 = other.run(tokens, s2)
863 |                 return v, s3
864 | 
865 |             ip.name = "(%s, %s)" % (self.name, other.name)
866 |             return ip
867 |         else:
868 | 
869 |             @Parser
870 |             def p(tokens: Sequence[_A], s: State) -> Tuple[_C, State]:
871 |                 _, s2 = self.run(tokens, s)
872 |                 v, s3 = other.run(tokens, s2)
873 |                 return v, s3
874 | 
875 |             p.name = "(%s, %s)" % (self.name, other.name)
876 |             return p
877 | 
878 | 
879 | def oneplus(p: Parser[_A, _B]) -> Parser[_A, List[_B]]:
880 |     """Return a parser that applies the parser `p` one or more times.
881 | 
882 |     A similar parser combinator `many(p)` means apply `p` zero or more times, whereas
883 |     `oneplus(p)` means apply `p` one or more times.
884 | 
885 |     Examples:
886 | 
887 |     ```pycon
888 |     >>> expr = oneplus(a("x"))
889 |     >>> expr.parse("x")
890 |     ['x']
891 |     >>> expr.parse("xx")
892 |     ['x', 'x']
893 |     >>> expr.parse("y")
894 |     Traceback (most recent call last):
895 |         ...
896 |     parser.NoParseError: got unexpected token: 'y', expected: 'x'
897 | 
898 |     ```
899 |     """
900 | 
901 |     @Parser
902 |     def _oneplus(tokens: Sequence[_A], s: State) -> Tuple[List[_B], State]:
903 |         (v1, s2) = p.run(tokens, s)
904 |         (v2, s3) = many(p).run(tokens, s2)
905 |         return [v1] + v2, s3
906 | 
907 |     _oneplus.name = "(%s, { %s })" % (p.name, p.name)
908 |     return _oneplus
909 | 
910 | 
911 | def with_forward_decls(suspension: Callable[[], Parser[_A, _B]]) -> Parser[_A, _B]:
912 |     warnings.warn(
913 |         "Use forward_decl() instead:\n"
914 |         "\n"
915 |         "    p = forward_decl()\n"
916 |         "    ...\n"
917 |         "    p.define(parser_value)\n",
918 |         DeprecationWarning,
919 |     )
920 | 
921 |     @Parser
922 |     def f(tokens: Sequence[_A], s: State) -> Tuple[_B, State]:
923 |         return suspension().run(tokens, s)
924 | 
925 |     return f
926 | 
927 | 
928 | def forward_decl() -> Parser[Any, Any]:
929 |     """Return an undefined parser that can be used as a forward declaration.
930 | 
931 |     Type: `Parser[Any, Any]`
932 | 
933 |     Use `p = forward_decl()` in combination with `p.define(...)` to define recursive
934 |     parsers.
935 | 
936 | 
937 |     Examples:
938 | 
939 |     ```pycon
940 |     >>> expr = forward_decl()
941 |     >>> expr.define(a("x") + maybe(expr) + a("y"))
942 |     >>> expr.parse("xxyy")  # noqa
943 |     ('x', ('x', None, 'y'), 'y')
944 |     >>> expr.parse("xxy")
945 |     Traceback (most recent call last):
946 |         ...
947 |     parser.NoParseError: got unexpected end of input, expected: 'y'
948 | 
949 |     ```
950 | 
951 |     !!! Note
952 | 
953 |         If you care about static types, you should add a type hint for your forward
954 |         declaration, so that your type checker can check types in `p.define(...)` later:
955 | 
956 |         ```python
957 |         p: Parser[str, int] = forward_decl()
958 |         p.define(a("x"))  # Type checker error
959 |         p.define(a("1") >> int)  # OK
960 |         ```
961 |     """
962 | 
963 |     @Parser
964 |     def f(_tokens: Any, _s: Any) -> Any:
965 |         raise NotImplementedError("you must define() a forward_decl somewhere")
966 | 
967 |     f.name = "forward_decl()"
968 |     return f
969 | 
970 | 
971 | if __name__ == "__main__":
972 |     import doctest
973 | 
974 |     doctest.testmod()
975 | 


--------------------------------------------------------------------------------
/funcparserlib/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlasovskikh/funcparserlib/18c0a99dcdb427e35226c74b7cc2617223c8e1fa/funcparserlib/py.typed


--------------------------------------------------------------------------------
/funcparserlib/util.py:
--------------------------------------------------------------------------------
 1 | # Copyright © 2009/2023 Andrey Vlasovskikh
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
 4 | # software and associated documentation files (the "Software"), to deal in the Software
 5 | # without restriction, including without limitation the rights to use, copy, modify,
 6 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 7 | # permit persons to whom the Software is furnished to do so, subject to the following
 8 | # conditions:
 9 | #
10 | # The above copyright notice and this permission notice shall be included in all copies
11 | # or substantial portions of the Software.
12 | #
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
15 | # PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
16 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
17 | # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
18 | # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 
20 | from typing import TypeVar, Callable, Sequence
21 | 
22 | _A = TypeVar("_A")
23 | 
24 | 
25 | def pretty_tree(
26 |     x: _A,
27 |     kids: Callable[[_A], Sequence[_A]],
28 |     show: Callable[[_A], str],
29 | ) -> str:
30 |     """Return a pseudo-graphic tree representation of the object `x` similar to the
31 |     `tree` command in Unix.
32 | 
33 |     Type: `(T, Callable[[T], List[T]], Callable[[T], str]) -> str`
34 | 
35 |     It applies the parameter `show` (which is a function of type `(T) -> str`) to get a
36 |     textual representation of the objects to show.
37 | 
38 |     It applies the parameter `kids` (which is a function of type `(T) -> List[T]`) to
39 |     list the children of the object to show.
40 | 
41 |     Examples:
42 | 
43 |     ```pycon
44 |     >>> print(pretty_tree(
45 |     ...     ["foo", ["bar", "baz"], "quux"],
46 |     ...     lambda obj: obj if isinstance(obj, list) else [],
47 |     ...     lambda obj: "[]" if isinstance(obj, list) else str(obj),
48 |     ... ))
49 |     []
50 |     |-- foo
51 |     |-- []
52 |     |   |-- bar
53 |     |   `-- baz
54 |     `-- quux
55 | 
56 |     ```
57 |     """
58 |     (MID, END, CONT, LAST, ROOT) = ("|-- ", "`-- ", "|   ", "    ", "")
59 | 
60 |     def rec(obj: _A, indent: str, sym: str) -> str:
61 |         line = indent + sym + show(obj)
62 |         obj_kids = kids(obj)
63 |         if len(obj_kids) == 0:
64 |             return line
65 |         else:
66 |             if sym == MID:
67 |                 next_indent = indent + CONT
68 |             elif sym == ROOT:
69 |                 next_indent = indent + ROOT
70 |             else:
71 |                 next_indent = indent + LAST
72 |             chars = [MID] * (len(obj_kids) - 1) + [END]
73 |             lines = [rec(kid, next_indent, sym) for kid, sym in zip(obj_kids, chars)]
74 |             return "\n".join([line] + lines)
75 | 
76 |     return rec(x, "", ROOT)
77 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: funcparserlib
 2 | repo_url: https://github.com/vlasovskikh/funcparserlib
 3 | repo_name: vlasovskikh/funcparserlib
 4 | nav:
 5 |   - Home: index.md
 6 |   - Getting Started:
 7 |       - Getting Started - Intro: getting-started/index.md
 8 |       - Tokenizing Input: getting-started/tokenizing.md
 9 |       - Parsing Tokens: getting-started/parsing.md
10 |       - Preparing the Parse Tree: getting-started/parse-tree.md
11 |       - Tips and Tricks: getting-started/tips-and-tricks.md
12 |   #  - Examples:
13 |   #      - Nested Brackets Language: examples/brackets.md
14 |   #      - S-expressions Language: examples/s-exp.md
15 |   #      - DOT Language:  examples/dot.md
16 |   #      - JSON Language: examples/json.md
17 |   - API Reference:
18 |       - API Overview: api/index.md
19 |       - Lexer: api/lexer.md
20 |       - Parser: api/parser.md
21 |       - Utilities: api/util.md
22 |   - Changelog: changes.md
23 | theme:
24 |   name: material
25 |   #  icon:
26 |   #    logo: fontawesome/solid/angle-double-right
27 |   features:
28 |     - navigation.expand
29 |     - navigation.tabs
30 |   #    - toc.integrate
31 |   palette:
32 |     - media: "(prefers-color-scheme: light)"
33 |       scheme: default
34 |       primary: indigo
35 |       accent: indigo
36 |       toggle:
37 |         icon: material/weather-sunny
38 |         name: Switch to dark mode
39 |     - media: "(prefers-color-scheme: dark)"
40 |       scheme: slate
41 |       primary: blue
42 |       accent: blue
43 |       toggle:
44 |         icon: material/weather-night
45 |         name: Switch to light mode
46 | extra_css:
47 |   - "media/extra.css"
48 | markdown_extensions:
49 |   - pymdownx.highlight
50 |   - pymdownx.superfences
51 |   - admonition
52 | plugins:
53 |   - search
54 |   - mkdocstrings:
55 |       handlers:
56 |         python:
57 |           options:
58 |             show_root_toc_entry: false
59 |             show_root_heading: true
60 |             heading_level: 3
61 |             show_source: false
62 |             members: false
63 | watch:
64 |   - funcparserlib
65 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | check_untyped_defs = True
3 | disallow_untyped_defs = True
4 | disallow_incomplete_defs = True
5 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "funcparserlib"
 3 | version = "2.0.0a0"
 4 | description = "Recursive descent parsing library based on functional combinators"
 5 | authors = ["Andrey Vlasovskikh <andrey.vlasovskikh@gmail.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | homepage = "https://funcparserlib.pirx.ru"
 9 | repository = "https://github.com/vlasovskikh/funcparserlib"
10 | classifiers = [
11 |     "Development Status :: 5 - Production/Stable",
12 |     "Intended Audience :: Developers",
13 |     "License :: OSI Approved :: MIT License",
14 |     "Operating System :: OS Independent",
15 |     "Programming Language :: Python :: 3.8",
16 |     "Programming Language :: Python :: 3.9",
17 |     "Programming Language :: Python :: 3.10",
18 |     "Programming Language :: Python :: 3.11",
19 |     "Programming Language :: Python :: 3.12",
20 | ]
21 | 
22 | [tool.poetry.dependencies]
23 | python = "^3.8"
24 | 
25 | [tool.poetry.dev-dependencies]
26 | pre-commit = {version = "^3.5.0"}
27 | tox = {version = "^4.4.6"}
28 | mkdocs = {version = "^1.4.2"}
29 | mkdocs-material = {version = "^9.1.1"}
30 | mkdocstrings = {extras = ["python"], version = "^0.24.0"}
31 | 
32 | [build-system]
33 | requires = ["poetry-core>=1.5.1"]
34 | build-backend = "poetry.core.masonry.api"
35 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vlasovskikh/funcparserlib/18c0a99dcdb427e35226c74b7cc2617223c8e1fa/tests/__init__.py


--------------------------------------------------------------------------------
/tests/dot.py:
--------------------------------------------------------------------------------
  1 | # Copyright © 2009/2023 Andrey Vlasovskikh
  2 | #
  3 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
  4 | # software and associated documentation files (the "Software"), to deal in the Software
  5 | # without restriction, including without limitation the rights to use, copy, modify,
  6 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
  7 | # permit persons to whom the Software is furnished to do so, subject to the following
  8 | # conditions:
  9 | #
 10 | # The above copyright notice and this permission notice shall be included in all copies
 11 | # or substantial portions of the Software.
 12 | #
 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 14 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 15 | # PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 16 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 17 | # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
 18 | # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | 
 20 | """A DOT language parser using funcparserlib.
 21 | 
 22 | The parser is based on [the DOT grammar][1]. It is pretty complete with a few
 23 | not supported things:
 24 | 
 25 | * String escapes
 26 | * Ports and compass points
 27 | * XML identifiers
 28 | 
 29 | At the moment, the parser builds only a parse tree, not an abstract syntax tree
 30 | (AST), or an API for dealing with DOT.
 31 | 
 32 |   [1]: https://www.graphviz.org/doc/info/lang.html
 33 | """
 34 | 
 35 | import os
 36 | import sys
 37 | from re import MULTILINE
 38 | from typing import Sequence, List, TypeVar, Callable, NamedTuple, Union, Optional
 39 | 
 40 | from funcparserlib.lexer import TokenSpec, make_tokenizer, Token, LexerError
 41 | from funcparserlib.parser import (
 42 |     maybe,
 43 |     many,
 44 |     finished,
 45 |     oneplus,
 46 |     forward_decl,
 47 |     NoParseError,
 48 |     Parser,
 49 |     tok,
 50 | )
 51 | from funcparserlib.util import pretty_tree
 52 | 
 53 | ENCODING = "UTF-8"
 54 | 
 55 | 
 56 | class Graph(NamedTuple):
 57 |     strict: Optional[str]
 58 |     type: Optional[str]
 59 |     id: Optional[str]
 60 |     stmts: List["Statement"]
 61 | 
 62 | 
 63 | class SubGraph(NamedTuple):
 64 |     id: Optional[str]
 65 |     stmts: List["Statement"]
 66 | 
 67 | 
 68 | class Attr(NamedTuple):
 69 |     name: str
 70 |     value: Optional[str]
 71 | 
 72 | 
 73 | class Node(NamedTuple):
 74 |     id: str
 75 |     attrs: List[Attr]
 76 | 
 77 | 
 78 | class Edge(NamedTuple):
 79 |     nodes: List[Union[str, SubGraph]]
 80 |     attrs: List[Attr]
 81 | 
 82 | 
 83 | class DefAttrs(NamedTuple):
 84 |     object: str
 85 |     attrs: List[Attr]
 86 | 
 87 | 
 88 | Statement = Union[DefAttrs, Edge, SubGraph, Node]
 89 | 
 90 | 
 91 | T = TypeVar("T")
 92 | 
 93 | 
 94 | def tokenize(s: str) -> Sequence[Token]:
 95 |     specs = [
 96 |         TokenSpec("Comment", r"/\*(.|[\r\n])*?\*/", MULTILINE),
 97 |         TokenSpec("Comment", r"//.*"),
 98 |         TokenSpec("NL", r"[\r\n]+"),
 99 |         TokenSpec("Space", r"[ \t\r\n]+"),
100 |         TokenSpec("Name", r"[A-Za-z\200-\377_][A-Za-z\200-\377_0-9]*"),
101 |         TokenSpec("Op", r"[{};,=\[\]]|(->)|(--)"),
102 |         TokenSpec("Number", r"-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)"),
103 |         TokenSpec("String", r'"[^"]*"'),  # '\"' escapes are ignored
104 |     ]
105 |     useless = ["Comment", "NL", "Space"]
106 |     t = make_tokenizer(specs)
107 |     return [x for x in t(s) if x.type not in useless]
108 | 
109 | 
110 | def parse(tokens: Sequence[Token]) -> Graph:
111 |     def un_arg(f: Callable[..., T]) -> Callable[[tuple], T]:
112 |         return lambda args: f(*args)
113 | 
114 |     def flatten(xs: List[List[Attr]]) -> List[Attr]:
115 |         return sum(xs, [])
116 | 
117 |     def n(s: str) -> Parser[Token, str]:
118 |         return tok("Name", s)
119 | 
120 |     def op(s: str) -> Parser[Token, str]:
121 |         return tok("Op", s)
122 | 
123 |     dot_id = (tok("Name") | tok("Number") | tok("String")).named("id")
124 | 
125 |     def make_graph_attr(args: tuple) -> DefAttrs:
126 |         return DefAttrs("graph", [Attr(*args)])
127 | 
128 |     def make_edge(
129 |         node: Union[str, SubGraph], xs: List[Union[str, SubGraph]], attrs: List[Attr]
130 |     ) -> Edge:
131 |         return Edge([node] + xs, attrs)
132 | 
133 |     node_id = dot_id  # + maybe(port)
134 |     a_list = dot_id + maybe(-op("=") + dot_id) + -maybe(op(",")) >> un_arg(Attr)
135 |     attr_list = many(-op("[") + many(a_list) + -op("]")) >> flatten
136 |     attr_stmt = (n("graph") | n("node") | n("edge")) + attr_list >> un_arg(DefAttrs)
137 |     graph_attr = dot_id + -op("=") + dot_id >> make_graph_attr
138 |     node_stmt = node_id + attr_list >> un_arg(Node)
139 |     # We use a forward_decl because of circular definitions like
140 |     # (stmt_list -> stmt -> subgraph -> stmt_list)
141 |     subgraph: Parser[Token, SubGraph] = forward_decl()
142 |     edge_rhs = -(op("->") | op("--")) + (subgraph | node_id)
143 |     edge_stmt = (subgraph | node_id) + oneplus(edge_rhs) + attr_list >> un_arg(
144 |         make_edge
145 |     )
146 |     stmt = attr_stmt | edge_stmt | subgraph | graph_attr | node_stmt
147 |     stmt_list = many(stmt + -maybe(op(";")))
148 |     graph_body = -op("{") + stmt_list + -op("}")
149 |     subgraph.define(-n("subgraph") + maybe(dot_id) + graph_body >> un_arg(SubGraph))
150 |     graph_modifiers = maybe(n("strict")) + maybe(n("graph") | n("digraph"))
151 |     graph = graph_modifiers + maybe(dot_id) + graph_body >> un_arg(Graph)
152 |     dotfile = graph + -finished
153 | 
154 |     return dotfile.parse(tokens)
155 | 
156 | 
157 | def pretty_parse_tree(obj: object) -> str:
158 |     class NamedValues(NamedTuple):
159 |         name: str
160 |         values: Sequence[object]
161 | 
162 |     def kids(x: object) -> Sequence[object]:
163 |         if isinstance(x, (Graph, SubGraph)):
164 |             return [NamedValues("stmts", x.stmts)]
165 |         elif isinstance(x, (Node, DefAttrs)):
166 |             return [NamedValues("attrs", x.attrs)]
167 |         elif isinstance(x, Edge):
168 |             return [NamedValues("nodes", x.nodes), NamedValues("attrs", x.attrs)]
169 |         elif isinstance(x, NamedValues):
170 |             return x.values
171 |         else:
172 |             return []
173 | 
174 |     def show(x: object) -> str:
175 |         if isinstance(x, NamedValues):
176 |             return x.name
177 |         elif isinstance(x, Graph):
178 |             return "Graph [id=%s, strict=%r, type=%s]" % (
179 |                 x.id,
180 |                 x.strict is not None,
181 |                 x.type,
182 |             )
183 |         elif isinstance(x, SubGraph):
184 |             return "SubGraph [id=%s]" % (x.id,)
185 |         elif isinstance(x, Edge):
186 |             return "Edge"
187 |         elif isinstance(x, Attr):
188 |             return "Attr [name=%s, value=%s]" % (x.name, x.value)
189 |         elif isinstance(x, DefAttrs):
190 |             return "DefAttrs [object=%s]" % (x.object,)
191 |         elif isinstance(x, Node):
192 |             return "Node [id=%s]" % (x.id,)
193 |         else:
194 |             return str(x)
195 | 
196 |     return pretty_tree(obj, kids, show)
197 | 
198 | 
199 | def main() -> None:
200 |     # import logging
201 |     # logging.basicConfig(level=logging.DEBUG)
202 |     # import funcparserlib
203 |     # funcparserlib.parser.debug = True
204 |     try:
205 |         stdin = os.fdopen(sys.stdin.fileno(), "rb")
206 |         text = stdin.read().decode(ENCODING)
207 |         tree = parse(tokenize(text))
208 |         # print(pformat(tree))
209 |         print(pretty_parse_tree(tree).encode(ENCODING))
210 |     except (NoParseError, LexerError) as e:
211 |         msg = ("syntax error: %s" % e).encode(ENCODING)
212 |         print(msg, file=sys.stderr)
213 |         sys.exit(1)
214 | 
215 | 
216 | if __name__ == "__main__":
217 |     main()
218 | 


--------------------------------------------------------------------------------
/tests/json.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright © 2009/2023 Andrey Vlasovskikh
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this
  6 | # software and associated documentation files (the "Software"), to deal in the Software
  7 | # without restriction, including without limitation the rights to use, copy, modify,
  8 | # merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
  9 | # permit persons to whom the Software is furnished to do so, subject to the following
 10 | # conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in all copies
 13 | # or substantial portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
 16 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 17 | # PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 18 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 19 | # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
 20 | # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | 
 22 | """A JSON parser using funcparserlib.
 23 | 
 24 | The parser is based on [the JSON grammar][1].
 25 | 
 26 |   [1]: https://tools.ietf.org/html/rfc4627
 27 | """
 28 | 
 29 | import re
 30 | import sys
 31 | from pprint import pformat
 32 | from re import VERBOSE
 33 | from typing import (
 34 |     List,
 35 |     Sequence,
 36 |     Optional,
 37 |     Tuple,
 38 |     Any,
 39 |     Dict,
 40 |     Match,
 41 |     TypeVar,
 42 |     Callable,
 43 |     Text,
 44 |     Union,
 45 | )
 46 | 
 47 | from funcparserlib.lexer import TokenSpec, make_tokenizer, Token, LexerError
 48 | from funcparserlib.parser import (
 49 |     maybe,
 50 |     many,
 51 |     finished,
 52 |     forward_decl,
 53 |     NoParseError,
 54 |     Parser,
 55 |     tok,
 56 | )
 57 | 
 58 | ENCODING = "UTF-8"
 59 | # noinspection SpellCheckingInspection
 60 | regexps = {
 61 |     "escaped": r"""
 62 |         \\                                  # Escape
 63 |           ((?P<standard>["\\/bfnrt])        # Standard escapes
 64 |         | (u(?P<unicode>[0-9A-Fa-f]{4})))   # uXXXX
 65 |         """,
 66 |     "unescaped": r"""
 67 |         [^"\\]                              # Unescaped: avoid ["\\]
 68 |         """,
 69 | }
 70 | re_esc = re.compile(regexps["escaped"], VERBOSE)
 71 | T = TypeVar("T")
 72 | JsonValue = Union[None, bool, dict, list, int, float, str]
 73 | JsonMember = Tuple[str, JsonValue]
 74 | 
 75 | 
 76 | def tokenize(s: str) -> List[Token]:
 77 |     specs = [
 78 |         TokenSpec("space", r"[ \t\r\n]+"),
 79 |         TokenSpec("string", r'"(%(unescaped)s | %(escaped)s)*"' % regexps, VERBOSE),
 80 |         TokenSpec(
 81 |             "number",
 82 |             r"""
 83 |             -?                  # Minus
 84 |             (0|([1-9][0-9]*))   # Int
 85 |             (\.[0-9]+)?         # Frac
 86 |             ([Ee][+-]?[0-9]+)?   # Exp
 87 |             """,
 88 |             VERBOSE,
 89 |         ),
 90 |         TokenSpec("op", r"[{}\[\]\-,:]"),
 91 |         TokenSpec("name", r"[A-Za-z_][A-Za-z_0-9]*"),
 92 |     ]
 93 |     useless = ["space"]
 94 |     t = make_tokenizer(specs)
 95 |     return [x for x in t(s) if x.type not in useless]
 96 | 
 97 | 
 98 | def parse(tokens: Sequence[Token]) -> JsonValue:
 99 |     def const(x: T) -> Callable[[Any], T]:
100 |         return lambda _: x
101 | 
102 |     def op(s: str) -> Parser[Token, str]:
103 |         return tok("op", s)
104 | 
105 |     def n(s: str) -> Parser[Token, Text]:
106 |         return tok("name", s)
107 | 
108 |     def make_array(
109 |         values: Optional[Tuple[JsonValue, List[JsonValue]]]
110 |     ) -> List[JsonValue]:
111 |         if values is None:
112 |             return []
113 |         else:
114 |             return [values[0]] + values[1]
115 | 
116 |     def make_object(
117 |         values: Optional[Tuple[JsonMember, List[JsonMember]]]
118 |     ) -> Dict[str, Any]:
119 |         if values is None:
120 |             return {}
121 |         else:
122 |             first, rest = values
123 |             k, v = first
124 |             d = {k: v}
125 |             d.update(rest)
126 |             return d
127 | 
128 |     def make_number(s: str) -> Union[int, float]:
129 |         try:
130 |             return int(s)
131 |         except ValueError:
132 |             return float(s)
133 | 
134 |     def unescape(s: str) -> str:
135 |         std = {
136 |             '"': '"',
137 |             "\\": "\\",
138 |             "/": "/",
139 |             "b": "\b",
140 |             "f": "\f",
141 |             "n": "\n",
142 |             "r": "\r",
143 |             "t": "\t",
144 |         }
145 | 
146 |         def sub(m: Match[str]) -> str:
147 |             if m.group("standard") is not None:
148 |                 return std[m.group("standard")]
149 |             else:
150 |                 return chr(int(m.group("unicode"), 16))
151 | 
152 |         return re_esc.sub(sub, s)
153 | 
154 |     def make_string(s: str) -> str:
155 |         return unescape(s[1:-1])
156 | 
157 |     def make_member(values: JsonMember) -> JsonMember:
158 |         k, v = values
159 |         return k, v
160 | 
161 |     null = n("null") >> const(None)
162 |     true = n("true") >> const(True)
163 |     false = n("false") >> const(False)
164 |     number = tok("number") >> make_number
165 |     string = tok("string") >> make_string
166 |     value: Parser[Token, JsonValue] = forward_decl().named("json_value")
167 |     member = string + -op(":") + value >> make_member
168 |     json_object = (
169 |         (-op("{") + maybe(member + many(-op(",") + member)) + -op("}")) >> make_object
170 |     ).named("json_object")
171 |     json_array = (
172 |         (-op("[") + maybe(value + many(-op(",") + value)) + -op("]")) >> make_array
173 |     ).named("json_array")
174 |     value.define(null | true | false | json_object | json_array | number | string)
175 |     json_text = value + -finished
176 | 
177 |     return json_text.parse(tokens)
178 | 
179 | 
180 | def loads(s: str) -> JsonValue:
181 |     return parse(tokenize(s))
182 | 
183 | 
184 | def main() -> None:
185 |     try:
186 |         text = sys.stdin.read()
187 |         tree = loads(text)
188 |         print(pformat(tree))
189 |     except (NoParseError, LexerError) as e:
190 |         print("syntax error: %s" % e, file=sys.stderr)
191 |         sys.exit(1)
192 | 
193 | 
194 | if __name__ == "__main__":
195 |     main()
196 | 


--------------------------------------------------------------------------------
/tests/test_dot.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import unittest
  4 | from typing import Optional
  5 | 
  6 | from funcparserlib.parser import NoParseError
  7 | from funcparserlib.lexer import LexerError
  8 | from .dot import parse, tokenize, Graph, Edge, SubGraph, DefAttrs, Attr, Node
  9 | 
 10 | 
 11 | class DotTest(unittest.TestCase):
 12 |     def t(self, data: str, expected: Optional[Graph] = None) -> None:
 13 |         self.assertEqual(parse(tokenize(data)), expected)
 14 | 
 15 |     def test_comments(self) -> None:
 16 |         self.t(
 17 |             """
 18 |             /* комм 1 */
 19 |             graph /* комм 4 */ g1 {
 20 |                 // комм 2 /* комм 3 */
 21 |             }
 22 |             // комм 5
 23 |         """,
 24 |             Graph(strict=None, type="graph", id="g1", stmts=[]),
 25 |         )
 26 | 
 27 |     def test_connected_subgraph(self) -> None:
 28 |         self.t(
 29 |             """
 30 |             digraph g1 {
 31 |                 n1 -> n2 ->
 32 |                 subgraph n3 {
 33 |                     nn1 -> nn2 -> nn3;
 34 |                     nn3 -> nn1;
 35 |                 };
 36 |                 subgraph n3 {} -> n1;
 37 |             }
 38 |         """,
 39 |             Graph(
 40 |                 strict=None,
 41 |                 type="digraph",
 42 |                 id="g1",
 43 |                 stmts=[
 44 |                     Edge(
 45 |                         nodes=[
 46 |                             "n1",
 47 |                             "n2",
 48 |                             SubGraph(
 49 |                                 id="n3",
 50 |                                 stmts=[
 51 |                                     Edge(nodes=["nn1", "nn2", "nn3"], attrs=[]),
 52 |                                     Edge(nodes=["nn3", "nn1"], attrs=[]),
 53 |                                 ],
 54 |                             ),
 55 |                         ],
 56 |                         attrs=[],
 57 |                     ),
 58 |                     Edge(nodes=[SubGraph(id="n3", stmts=[]), "n1"], attrs=[]),
 59 |                 ],
 60 |             ),
 61 |         )
 62 | 
 63 |     def test_default_attrs(self) -> None:
 64 |         self.t(
 65 |             """
 66 |             digraph g1 {
 67 |                 page="3,3";
 68 |                 graph [rotate=90];
 69 |                 node [shape=box, color="#0000ff"];
 70 |                 edge [style=dashed];
 71 |                 n1 -> n2 -> n3;
 72 |                 n3 -> n1;
 73 |             }
 74 |         """,
 75 |             Graph(
 76 |                 strict=None,
 77 |                 type="digraph",
 78 |                 id="g1",
 79 |                 stmts=[
 80 |                     DefAttrs(object="graph", attrs=[Attr(name="page", value='"3,3"')]),
 81 |                     DefAttrs(object="graph", attrs=[Attr(name="rotate", value="90")]),
 82 |                     DefAttrs(
 83 |                         object="node",
 84 |                         attrs=[
 85 |                             Attr(name="shape", value="box"),
 86 |                             Attr(name="color", value='"#0000ff"'),
 87 |                         ],
 88 |                     ),
 89 |                     DefAttrs(object="edge", attrs=[Attr(name="style", value="dashed")]),
 90 |                     Edge(nodes=["n1", "n2", "n3"], attrs=[]),
 91 |                     Edge(nodes=["n3", "n1"], attrs=[]),
 92 |                 ],
 93 |             ),
 94 |         )
 95 | 
 96 |     def test_empty_graph(self) -> None:
 97 |         self.t(
 98 |             """
 99 |             graph g1 {}
100 |         """,
101 |             Graph(strict=None, type="graph", id="g1", stmts=[]),
102 |         )
103 | 
104 |     def test_few_attrs(self) -> None:
105 |         self.t(
106 |             """
107 |             digraph g1 {
108 |                     n1 [attr1, attr2 = value2];
109 |             }
110 |         """,
111 |             Graph(
112 |                 strict=None,
113 |                 type="digraph",
114 |                 id="g1",
115 |                 stmts=[
116 |                     Node(
117 |                         id="n1",
118 |                         attrs=[
119 |                             Attr(name="attr1", value=None),
120 |                             Attr(name="attr2", value="value2"),
121 |                         ],
122 |                     )
123 |                 ],
124 |             ),
125 |         )
126 | 
127 |     def test_few_nodes(self) -> None:
128 |         self.t(
129 |             """
130 |             graph g1 {
131 |                 n1;
132 |                 n2;
133 |                 n3
134 |             }
135 |         """,
136 |             Graph(
137 |                 strict=None,
138 |                 type="graph",
139 |                 id="g1",
140 |                 stmts=[
141 |                     Node(id="n1", attrs=[]),
142 |                     Node(id="n2", attrs=[]),
143 |                     Node(id="n3", attrs=[]),
144 |                 ],
145 |             ),
146 |         )
147 | 
148 |     def test_illegal_comma(self) -> None:
149 |         try:
150 |             self.t(
151 |                 """
152 |                 graph g1 {
153 |                     n1;
154 |                     n2;
155 |                     n3,
156 |                 }
157 |             """
158 |             )
159 |         except NoParseError:
160 |             pass
161 |         else:
162 |             self.fail("must raise NoParseError")
163 | 
164 |     def test_null(self) -> None:
165 |         try:
166 |             self.t("")
167 |         except NoParseError:
168 |             pass
169 |         else:
170 |             self.fail("must raise NoParseError")
171 | 
172 |     def test_simple_cycle(self) -> None:
173 |         self.t(
174 |             """
175 |             digraph g1 {
176 |                 n1 -> n2 [w=5];
177 |                 n2 -> n3 [w=10];
178 |                 n3 -> n1 [w=7];
179 |             }
180 |         """,
181 |             Graph(
182 |                 strict=None,
183 |                 type="digraph",
184 |                 id="g1",
185 |                 stmts=[
186 |                     Edge(nodes=["n1", "n2"], attrs=[Attr(name="w", value="5")]),
187 |                     Edge(nodes=["n2", "n3"], attrs=[Attr(name="w", value="10")]),
188 |                     Edge(nodes=["n3", "n1"], attrs=[Attr(name="w", value="7")]),
189 |                 ],
190 |             ),
191 |         )
192 | 
193 |     def test_single_unicode_char(self) -> None:
194 |         try:
195 |             self.t("ф")
196 |         except LexerError:
197 |             pass
198 |         else:
199 |             self.fail("must raise LexerError")
200 | 
201 |     def test_unicode_names(self) -> None:
202 |         self.t(
203 |             """
204 |             digraph g1 {
205 |                 n1 -> "Медведь" [label="Поехали!"];
206 |                 "Медведь" -> n3 [label="Добро пожаловать!"];
207 |                 n3 -> n1 ["Водка"="Селёдка"];
208 |             }
209 |         """,
210 |             Graph(
211 |                 strict=None,
212 |                 type="digraph",
213 |                 id="g1",
214 |                 stmts=[
215 |                     Edge(
216 |                         nodes=["n1", '"Медведь"'],
217 |                         attrs=[Attr(name="label", value='"Поехали!"')],
218 |                     ),
219 |                     Edge(
220 |                         nodes=['"Медведь"', "n3"],
221 |                         attrs=[Attr(name="label", value='"Добро пожаловать!"')],
222 |                     ),
223 |                     Edge(
224 |                         nodes=["n3", "n1"],
225 |                         attrs=[Attr(name='"Водка"', value='"Селёдка"')],
226 |                     ),
227 |                 ],
228 |             ),
229 |         )
230 | 


--------------------------------------------------------------------------------
/tests/test_json.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import unittest
  4 | from typing import Optional
  5 | 
  6 | from funcparserlib.parser import NoParseError
  7 | from funcparserlib.lexer import LexerError
  8 | from . import json
  9 | 
 10 | 
 11 | class JsonTest(unittest.TestCase):
 12 |     def t(self, data: str, expected: Optional[object] = None) -> None:
 13 |         self.assertEqual(json.loads(data), expected)
 14 | 
 15 |     def test_1_array(self) -> None:
 16 |         self.t("[1]", [1])
 17 | 
 18 |     def test_1_object(self) -> None:
 19 |         self.t('{"foo": "bar"}', {"foo": "bar"})
 20 | 
 21 |     def test_bool_and_null(self) -> None:
 22 |         self.t("[null, true, false]", [None, True, False])
 23 | 
 24 |     def test_empty_array(self) -> None:
 25 |         self.t("[]", [])
 26 | 
 27 |     def test_empty_object(self) -> None:
 28 |         self.t("{}", {})
 29 | 
 30 |     def test_many_array(self) -> None:
 31 |         self.t("[1, 2, [3, 4, 5], 6]", [1, 2, [3, 4, 5], 6])
 32 | 
 33 |     def test_many_object(self) -> None:
 34 |         # noinspection SpellCheckingInspection
 35 |         self.t(
 36 |             """
 37 |             {
 38 |                 "foo": 1,
 39 |                 "bar":
 40 |                 {
 41 |                     "baz": 2,
 42 |                     "quux": [true, false],
 43 |                     "{}": {}
 44 |                 },
 45 |                 "spam": "eggs"
 46 |             }
 47 |         """,
 48 |             {
 49 |                 "foo": 1,
 50 |                 "bar": {
 51 |                     "baz": 2,
 52 |                     "quux": [True, False],
 53 |                     "{}": {},
 54 |                 },
 55 |                 "spam": "eggs",
 56 |             },
 57 |         )
 58 | 
 59 |     def test_null(self) -> None:
 60 |         try:
 61 |             self.t("")
 62 |         except NoParseError:
 63 |             pass
 64 |         else:
 65 |             self.fail("must raise NoParseError")
 66 | 
 67 |     def test_numbers(self) -> None:
 68 |         self.t(
 69 |             """\
 70 |             [
 71 |                 0, 1, -1, 14, -14, 65536,
 72 |                 0.0, 3.14, -3.14, -123.456,
 73 |                 6.67428e-11, -1.602176e-19, 6.67428E-11
 74 |             ]
 75 |         """,
 76 |             [
 77 |                 0,
 78 |                 1,
 79 |                 -1,
 80 |                 14,
 81 |                 -14,
 82 |                 65536,
 83 |                 0.0,
 84 |                 3.14,
 85 |                 -3.14,
 86 |                 -123.456,
 87 |                 6.67428e-11,
 88 |                 -1.602176e-19,
 89 |                 6.67428e-11,
 90 |             ],
 91 |         )
 92 | 
 93 |     def test_strings(self) -> None:
 94 |         # noinspection SpellCheckingInspection
 95 |         self.t(
 96 |             r"""
 97 |             [
 98 |                 ["", "hello", "hello world!"],
 99 |                 ["привет, мир!", "λx.x"],
100 |                 ["\"", "\\", "\/", "\b", "\f", "\n", "\r", "\t"],
101 |                 ["\u0000", "\u03bb", "\uffff", "\uFFFF"],
102 |                 ["вот функция идентичности:\nλx.x\nили так:\n\u03bbx.x"]
103 |             ]
104 |         """,
105 |             [
106 |                 ["", "hello", "hello world!"],
107 |                 ["привет, мир!", "λx.x"],
108 |                 ['"', "\\", "/", "\x08", "\x0c", "\n", "\r", "\t"],
109 |                 ["\u0000", "\u03bb", "\uffff", "\uffff"],
110 |                 ["вот функция идентичности:\nλx.x\nили так:\n\u03bbx.x"],
111 |             ],
112 |         )
113 | 
114 |     def test_toplevel_string(self) -> None:
115 |         try:
116 |             self.t("неправильно")
117 |         except LexerError:
118 |             pass
119 |         else:
120 |             self.fail("must raise LexerError")
121 | 


--------------------------------------------------------------------------------
/tests/test_parsing.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import unittest
  4 | from typing import Optional, Tuple
  5 | 
  6 | from funcparserlib.lexer import TokenSpec, make_tokenizer, LexerError, Token
  7 | from funcparserlib.parser import (
  8 |     a,
  9 |     many,
 10 |     NoParseError,
 11 |     oneplus,
 12 |     Parser,
 13 |     maybe,
 14 |     _Ignored,  # noqa
 15 |     tok,
 16 |     finished,
 17 |     forward_decl,
 18 |     some,
 19 | )
 20 | 
 21 | 
 22 | class ParsingTest(unittest.TestCase):
 23 |     def test_oneplus(self) -> None:
 24 |         x = a("x")
 25 |         y = a("y")
 26 |         expr = oneplus(x + y)
 27 |         # noinspection SpellCheckingInspection
 28 |         self.assertEqual(expr.parse("xyxyxy"), ([("x", "y"), ("x", "y"), ("x", "y")]))
 29 | 
 30 |     # Issue 31
 31 |     def test_many_backtracking(self) -> None:
 32 |         x = a("x")
 33 |         y = a("y")
 34 |         expr = many(x + y) + x + x
 35 |         # noinspection SpellCheckingInspection
 36 |         self.assertEqual(expr.parse("xyxyxx"), ([("x", "y"), ("x", "y")], "x", "x"))
 37 | 
 38 |     # Issue 14
 39 |     def test_error_info(self) -> None:
 40 |         tokenize = make_tokenizer(
 41 |             [
 42 |                 TokenSpec("keyword", r"\b(is|end)\b"),
 43 |                 TokenSpec("id", r"[a-z_]+"),
 44 |                 ("space", (r"[ \t]+",)),  # Legacy token spec
 45 |                 TokenSpec("nl", r"[\n\r]+"),
 46 |             ]
 47 |         )
 48 |         with self.assertRaises(LexerError) as ctx:
 49 |             list(tokenize("f is ф"))
 50 |         self.assertEqual(str(ctx.exception), 'cannot tokenize data: 1,6: "f is \u0444"')
 51 | 
 52 |         def make_equality(values: Tuple[str, str]) -> Tuple[str, str]:
 53 |             v1, v2 = values
 54 |             return v1, v2
 55 | 
 56 |         tok_id = tok("id")
 57 |         equality = tok_id + -tok("keyword", "is") + tok_id >> make_equality
 58 |         expr = equality + -tok("nl")
 59 |         file = many(expr) + tok("keyword", "end")
 60 | 
 61 |         msg = """\
 62 | spam is eggs
 63 | foo is_not bar
 64 | end"""
 65 |         tokens = [x for x in tokenize(msg) if x.type != "space"]
 66 |         with self.assertRaises(NoParseError) as ctx2:
 67 |             file.parse(tokens)
 68 |         self.assertEqual(ctx2.exception.state.pos, 4)
 69 |         self.assertEqual(ctx2.exception.state.max, 5)
 70 |         # May raise KeyError
 71 |         t = tokens[ctx2.exception.state.max]
 72 |         self.assertEqual(t, Token("id", "is_not"))
 73 |         self.assertEqual((t.start, t.end), ((2, 5), (2, 10)))
 74 |         self.assertEqual(
 75 |             ctx2.exception.msg,
 76 |             "2,5-2,10: got unexpected token: 'is_not', expected: 'is'",
 77 |         )
 78 | 
 79 |     def test_ok_ignored(self) -> None:
 80 |         x = a("x")
 81 |         y = a("y")
 82 |         expr: Parser[str, str] = -x + y
 83 |         self.assertEqual(expr.parse("xy"), "y")
 84 | 
 85 |     def test_ignored_ok(self) -> None:
 86 |         x = a("x")
 87 |         y = a("y")
 88 |         expr: Parser[str, str] = x + -y
 89 |         self.assertEqual(expr.parse("xy"), "x")
 90 | 
 91 |     def test_ignored_ok_ok(self) -> None:
 92 |         x = a("x")
 93 |         y = a("y")
 94 |         expr: Parser[str, Tuple[str, str]] = -x + y + x
 95 |         self.assertEqual(expr.parse("xyx"), ("y", "x"))
 96 | 
 97 |     def test_ok_ignored_ok(self) -> None:
 98 |         x = a("x")
 99 |         y = a("y")
100 |         expr: Parser[str, Tuple[str, str]] = x + -y + x
101 |         self.assertEqual(expr.parse("xyx"), ("x", "x"))
102 | 
103 |     def test_ok_ok_ok(self) -> None:
104 |         x = a("x")
105 |         y = a("y")
106 |         expr: Parser[str, Tuple[str, str]] = x + y + x
107 |         self.assertEqual(expr.parse("xyx"), ("x", "y", "x"))
108 | 
109 |     def test_ok_ok_ignored(self) -> None:
110 |         x = a("x")
111 |         y = a("y")
112 |         expr: Parser[str, Tuple[str, str]] = x + y + -x
113 |         self.assertEqual(expr.parse("xyx"), ("x", "y"))
114 | 
115 |     def test_ignored_ignored_ok(self) -> None:
116 |         x = a("x")
117 |         y = a("y")
118 |         expr: Parser[str, str] = -x + -x + y
119 |         self.assertEqual(expr.parse("xxy"), "y")
120 | 
121 |     def test_ok_ignored_ignored(self) -> None:
122 |         x = a("x")
123 |         y = a("y")
124 |         expr: Parser[str, str] = x + -y + -y
125 |         self.assertEqual(expr.parse("xyy"), "x")
126 | 
127 |     def test_ignored_ignored(self) -> None:
128 |         x = a("x")
129 |         y = a("y")
130 |         expr: Parser[str, _Ignored] = -x + -y
131 |         self.assertEqual(expr.parse("xy"), _Ignored("y"))
132 | 
133 |     def test_ignored_ignored_ignored(self) -> None:
134 |         x = a("x")
135 |         y = a("y")
136 |         z = a("z")
137 |         expr: Parser[str, _Ignored] = -x + -y + -z
138 |         self.assertEqual(expr.parse("xyz"), _Ignored("z"))
139 | 
140 |     def test_ignored_maybe(self) -> None:
141 |         x = a("x")
142 |         y = a("y")
143 |         expr: Parser[str, str] = -maybe(x) + y
144 |         self.assertEqual(expr.parse("xy"), "y")
145 |         self.assertEqual(expr.parse("y"), "y")
146 | 
147 |     def test_maybe_ignored(self) -> None:
148 |         x = a("x")
149 |         y = a("y")
150 |         expr: Parser[str, Tuple[Optional[_Ignored], str]] = maybe(-x) + y
151 |         self.assertEqual(expr.parse("xy"), (_Ignored("x"), "y"))
152 |         self.assertEqual(expr.parse("y"), (None, "y"))
153 | 
154 |     def test_ignored_maybe_ignored(self) -> None:
155 |         x = a("x")
156 |         y = a("y")
157 |         expr: Parser[str, Optional[str]] = -x + maybe(y) + -x
158 |         self.assertEqual(expr.parse("xyx"), "y")
159 |         self.assertEqual(expr.parse("xx"), None)
160 | 
161 |     def test_compare_token_with_none(self) -> None:
162 |         # https://github.com/vlasovskikh/funcparserlib/pull/58
163 |         specs = [
164 |             ("id", (r"\w+",)),
165 |         ]
166 |         tokenize = make_tokenizer(specs)
167 |         tokens = list(tokenize("foo"))
168 |         expr = maybe(a(None))
169 |         self.assertEqual(expr.parse(tokens), None)  # type: ignore
170 | 
171 |     def test_seq_parse_error(self) -> None:
172 |         expr = a("x") + a("y")
173 |         with self.assertRaises(NoParseError) as ctx:
174 |             expr.parse("xz")
175 |         self.assertEqual(ctx.exception.msg, "got unexpected token: 'z', expected: 'y'")
176 | 
177 |     def test_alt_2_parse_error(self) -> None:
178 |         expr = a("x") + (a("x") | a("y"))
179 |         with self.assertRaises(NoParseError) as ctx:
180 |             expr.parse("xz")
181 |         self.assertEqual(
182 |             ctx.exception.msg, "got unexpected token: 'z', expected: 'x' or 'y'"
183 |         )
184 | 
185 |     def test_alt_3_parse_error(self) -> None:
186 |         expr = a("x") + (a("x") | a("y") | a("z"))
187 |         with self.assertRaises(NoParseError) as ctx:
188 |             expr.parse("xa")
189 |         self.assertEqual(
190 |             ctx.exception.msg,
191 |             "got unexpected token: 'a', expected: 'x' or 'y' or 'z'",
192 |         )
193 | 
194 |     def test_alt_3_two_steps_parse_error(self) -> None:
195 |         expr = a("x") + (a("x") | (a("y") + a("a")))
196 |         with self.assertRaises(NoParseError) as ctx:
197 |             expr.parse("xyz")
198 |         self.assertEqual(ctx.exception.msg, "got unexpected token: 'z', expected: 'a'")
199 | 
200 |     def test_expected_eof_error(self) -> None:
201 |         expr = a("x") + finished
202 |         with self.assertRaises(NoParseError) as ctx:
203 |             expr.parse("xy")
204 |         self.assertEqual(
205 |             ctx.exception.msg,
206 |             "got unexpected token: 'y', expected: end of input",
207 |         )
208 | 
209 |     def test_expected_second_in_sequence_error(self) -> None:
210 |         expr = a("x") + a("y")
211 |         with self.assertRaises(NoParseError) as ctx:
212 |             expr.parse("xz")
213 |         self.assertEqual(ctx.exception.msg, "got unexpected token: 'z', expected: 'y'")
214 | 
215 |     def test_forward_decl_nested_matching_error(self) -> None:
216 |         expr = forward_decl()
217 |         expr.define(a("x") + maybe(expr) + a("y"))
218 |         with self.assertRaises(NoParseError) as ctx:
219 |             expr.parse("xxy")
220 |         self.assertEqual(
221 |             ctx.exception.msg, "got unexpected end of input, expected: 'y'"
222 |         )
223 | 
224 |     def test_expected_token_type_error(self) -> None:
225 |         expr = tok("number")
226 |         with self.assertRaises(NoParseError) as ctx:
227 |             expr.parse([Token("id", "x")])
228 |         self.assertEqual(
229 |             ctx.exception.msg, "got unexpected token: 'x', expected: number"
230 |         )
231 | 
232 |     def test_expected_exact_token_error(self) -> None:
233 |         expr = tok("operator", "=")
234 |         with self.assertRaises(NoParseError) as ctx:
235 |             expr.parse([Token("operator", "+")])
236 |         self.assertEqual(ctx.exception.msg, "got unexpected token: '+', expected: '='")
237 | 
238 |     def test_unexpected_eof(self) -> None:
239 |         expr = (a("x") + a("y")) | a("z")
240 |         with self.assertRaises(NoParseError) as ctx:
241 |             expr.parse("x")
242 |         self.assertEqual(
243 |             ctx.exception.msg, "got unexpected end of input, expected: 'y'"
244 |         )
245 | 
246 |     def test_expected_transform_parsing_results_error(self) -> None:
247 |         expr = (a("1") >> int) | a("2")
248 |         with self.assertRaises(NoParseError) as ctx:
249 |             expr.parse("x")
250 |         self.assertEqual(
251 |             ctx.exception.msg, "got unexpected token: 'x', expected: '1' or '2'"
252 |         )
253 | 
254 |     def test_expected_sequence_with_skipped_parts(self) -> None:
255 |         expr = (-a("x") + a("y")) | a("z")
256 |         with self.assertRaises(NoParseError) as ctx:
257 |             expr.parse("b")
258 |         self.assertEqual(
259 |             ctx.exception.msg,
260 |             "got unexpected token: 'b', expected: ('x', 'y') or 'z'",
261 |         )
262 | 
263 |     def test_expected_some_without_name(self) -> None:
264 |         def lowercase(t: str) -> bool:
265 |             return t.islower()
266 | 
267 |         expr = some(lowercase)
268 |         with self.assertRaises(NoParseError) as ctx:
269 |             expr.parse("A")
270 |         self.assertEqual(
271 |             ctx.exception.msg, "got unexpected token: 'A', expected: some(...)"
272 |         )
273 | 
274 |     def test_expected_forward_decl_without_name(self) -> None:
275 |         nested = forward_decl()
276 |         nested.define(-a("a") + maybe(nested) + -a("z"))
277 |         expr = nested | a("x")
278 |         with self.assertRaises(NoParseError) as ctx:
279 |             expr.parse("y")
280 |         self.assertEqual(
281 |             ctx.exception.msg,
282 |             "got unexpected token: 'y', "
283 |             "expected: (('a', [ forward_decl() ]), 'z') or 'x'",
284 |         )
285 | 
286 |     def test_expected_forward_decl_with_name(self) -> None:
287 |         nested = forward_decl().named("nested")
288 |         nested.define(-a("a") + maybe(nested) + -a("z"))
289 |         expr = nested | a("x")
290 |         with self.assertRaises(NoParseError) as ctx:
291 |             expr.parse("y")
292 |         self.assertEqual(
293 |             ctx.exception.msg,
294 |             "got unexpected token: 'y', expected: (('a', [ nested ]), 'z') or 'x'",
295 |         )
296 | 
297 |     def test_end_of_input_after_many_alternatives(self) -> None:
298 |         brackets = a("[") + a("]")
299 |         expr = many(a("x") | brackets) + finished
300 |         with self.assertRaises(NoParseError) as ctx:
301 |             expr.parse("[")
302 |         self.assertEqual(
303 |             ctx.exception.msg, "got unexpected end of input, expected: ']'"
304 |         )
305 | 
306 |     def test_parse_one_more_then_rollback_to_single(self) -> None:
307 |         mul = a("x") + many(a("*") + a("y"))
308 |         add = mul + many(a("+") + mul)
309 |         expr = add + finished
310 |         with self.assertRaises(NoParseError) as ctx:
311 |             expr.parse("x*")
312 |         self.assertEqual(
313 |             ctx.exception.msg, "got unexpected end of input, expected: 'y'"
314 |         )
315 | 
316 |     def test_parse_one_more_then_rollback_to_alternative(self) -> None:
317 |         mul = a("x") + many(a("*") + a("y"))
318 |         addsub = mul + many((a("+") | a("-")) + mul)
319 |         expr = addsub + finished
320 |         with self.assertRaises(NoParseError) as ctx:
321 |             expr.parse("x*")
322 |         self.assertEqual(
323 |             ctx.exception.msg, "got unexpected end of input, expected: 'y'"
324 |         )
325 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | isolated_build = true
3 | envlist = py{38,39,310,311,312}
4 | 
5 | [testenv]
6 | commands =
7 |     python -m unittest discover
8 | 


--------------------------------------------------------------------------------