├── .editorconfig
├── .flake8
├── .github
    └── workflows
    │   └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .python-version
├── .readthedocs.yaml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── RELEASE.rst
├── codecov.yml
├── conftest.py
├── docs
    ├── Makefile
    ├── conf.py
    ├── contributing.rst
    ├── history.rst
    ├── howto
    │   ├── index.rst
    │   ├── lexing.rst
    │   └── other_examples.rst
    ├── index.rst
    ├── installation.rst
    ├── make.bat
    ├── overview.rst
    ├── ref
    │   ├── generating.rst
    │   ├── index.rst
    │   ├── methods_and_combinators.rst
    │   ├── parser_instances.rst
    │   └── primitives.rst
    ├── requirements.txt
    └── tutorial.rst
├── examples
    ├── __init__.py
    ├── json.py
    ├── simple_eval.py
    ├── simple_logo_lexer.py
    ├── simple_logo_parser.py
    └── sql_select.py
├── pyproject.toml
├── pytest.ini
├── release.sh
├── src
    └── parsy
    │   └── __init__.py
├── tests
    ├── requirements-linters.txt
    ├── requirements-tests.txt
    ├── test_parsy.py
    └── test_sexpr.py
└── tox.ini


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # See http://editorconfig.org/
 2 | root = true
 3 | 
 4 | [*]
 5 | end_of_line = lf
 6 | insert_final_newline = true
 7 | charset = utf-8
 8 | indent_style = space
 9 | 
10 | [*.py]
11 | indent_size = 4
12 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude = .tox,.git,docs,dist,build,todo,.venv
3 | ignore = E731,E221,W503,E741,E203
4 | max-line-length = 119
5 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | 
 3 | # Trigger the workflow on push or pull request
 4 | # events but only for the master branch:
 5 | on:
 6 |   push:
 7 |     branches: [ master ]
 8 |   pull_request:
 9 |     branches: [ master ]
10 | 
11 | 
12 | jobs:
13 |   tests:
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy-3.9"]
19 | 
20 |     env:
21 |       PYTHON: ${{ matrix.python-version }}
22 | 
23 |     steps:
24 |       - uses: actions/checkout@v4
25 |       - name: Set up Python ${{ matrix.python-version }} with uv
26 |         uses: drivendataorg/setup-python-uv-action@v1.0.0
27 |         with:
28 |           python-version: ${{ matrix.python-version }}
29 |           cache: 'packages'
30 | 
31 |       - name: Install dependencies
32 |         run: |
33 |           uv sync
34 |       - name: Run tests
35 |         run: |
36 |           uv run pytest --cov=./ --cov-report=xml
37 |       - name: Upload coverage to Codecov
38 |         uses: codecov/codecov-action@v1
39 |         with:
40 |           file: ./coverage.xml
41 |           env_vars: PYTHON
42 |           fail_ci_if_error: false
43 | 
44 |   linters:
45 |     runs-on: ubuntu-latest
46 |     steps:
47 |       - uses: actions/checkout@v4
48 |       - name: Set up Python 3.10 with uv
49 |         uses: drivendataorg/setup-python-uv-action@v1.0.0
50 |         with:
51 |           python-version: "3.10"
52 |           cache: 'packages'
53 | 
54 |       - name: Install dependencies
55 |         run: |
56 |           uv sync
57 |       - name: Run pre-commit checks
58 |         run: |
59 |           uv run pre-commit run --all --all-files
60 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /dist
3 | /todo
4 | .tox
5 | src/parsy.egg-info
6 | docs/_build
7 | .cache
8 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v3.2.0
 4 |     hooks:
 5 |       - id: trailing-whitespace
 6 |       - id: end-of-file-fixer
 7 |   - repo: https://github.com/pycqa/flake8
 8 |     rev: 3.8.4
 9 |     hooks:
10 |       - id: flake8
11 |         language_version: python3.10
12 |   - repo: https://github.com/pre-commit/mirrors-isort
13 |     rev: v5.6.4
14 |     hooks:
15 |       - id: isort
16 |         language_version: python3.10
17 |   - repo: https://github.com/ikamensh/flynt/
18 |     rev: '0.69'
19 |     hooks:
20 |       - id: flynt
21 |         language_version: python3.10
22 |   - repo: https://github.com/asottile/pyupgrade
23 |     rev: v2.26.0
24 |     hooks:
25 |       - id: pyupgrade
26 |         entry: pyupgrade --py3-plus --py36-plus --py37-plus --keep-runtime-typing
27 |         language_version: python3.10
28 |   - repo: https://github.com/myint/autoflake
29 |     rev: 'v1.4'
30 |     hooks:
31 |       - id: autoflake
32 |         args: ['--remove-all-unused-imports', '-i']
33 |         language_version: python3.10
34 |   - repo: https://github.com/pre-commit/mirrors-autopep8
35 |     rev: 'v1.5.7'
36 |     hooks:
37 |       - id: autopep8
38 |         language_version: python3.10
39 |   - repo: https://github.com/psf/black
40 |     rev: 22.3.0
41 |     hooks:
42 |       - id: black
43 |         language_version: python3.10
44 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.13
2 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: "ubuntu-22.04"
 5 |   tools:
 6 |     python: "3.11"
 7 | 
 8 | python:
 9 |   install:
10 |   - requirements: docs/requirements.txt
11 |   - method: pip
12 |     path: .
13 | 
14 | sphinx:
15 |   configuration: docs/conf.py
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | # MIT license.  See http://www.opensource.org/licenses/mit-license.php
 2 | 
 3 | Copyright (c) 2013 Jeanine Adkisson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.md
 2 | include *.rst
 3 | include *.sh
 4 | include *.yml
 5 | include *.yaml
 6 | include LICENSE
 7 | include tox.ini
 8 | include pytest.ini
 9 | include conftest.py
10 | include .editorconfig
11 | recursive-include docs *.bat
12 | recursive-include docs *.txt
13 | recursive-include docs *.py
14 | recursive-include docs *.rst
15 | recursive-include docs Makefile
16 | recursive-include examples *.py
17 | recursive-include tests *.py *.txt
18 | prune docs/_build
19 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | parsy
 2 | =====
 3 | 
 4 | |Documentation Status| |Build Status| |Codecov| |Downloads|
 5 | 
 6 | Parsy is an easy and elegant way to parse text in Python by combining small
 7 | parsers into complex, larger parsers. If it means anything to you, it's a
 8 | monadic parser combinator library for LL(infinity) grammars in the spirit of
 9 | `Parsec <https://github.com/haskell/parsec>`_, `Parsnip
10 | <http://parsnip-parser.sourceforge.net/>`_, and `Parsimmon
11 | <https://github.com/jneen/parsimmon>`_. But don't worry, it has really good
12 | documentation and it doesn't say things like that!
13 | 
14 | Parsy requires Python 3.7 or greater.
15 | 
16 | For a good example of the kind of clear, declarative code you can create using
17 | parsy, see the `SQL SELECT statement example
18 | <https://parsy.readthedocs.io/en/latest/howto/other_examples.html#sql-select-statement-parser>`_
19 | or `JSON parser
20 | <https://parsy.readthedocs.io/en/latest/howto/other_examples.html#json-parser>`_.
21 | 
22 | Links:
23 | 
24 | - `Documentation <http://parsy.readthedocs.io/en/latest/>`_
25 | - `History and changelog <http://parsy.readthedocs.io/en/latest/history.html>`_
26 | - `PyPI <https://pypi.python.org/pypi/parsy/>`_
27 | 
28 | To contribute, please create a fork and submit a pull request on GitHub, after
29 | checking the `contributing
30 | <https://parsy.readthedocs.io/en/latest/contributing.html>`_ section of the
31 | docs. Thanks!
32 | 
33 | If you like parsy and think it should be better known, you could:
34 | 
35 | * Star this project on GitHub.
36 | * `Vote <https://github.com/vinta/awesome-python/pull/993>`_ for it being included on awesome-python.
37 | 
38 | Parsy was originally written by `Jeanine Adkisson <https://github.com/jneen>`_,
39 | with contributions by other people as can be found in the git commit history.
40 | 
41 | .. |Documentation Status| image:: https://readthedocs.org/projects/parsy/badge/?version=latest
42 |    :target: http://parsy.readthedocs.io/en/latest/?badge=latest
43 | .. |Build Status| image:: https://img.shields.io/github/actions/workflow/status/python-parsy/parsy/tests.yml?branch=master
44 |    :target: https://github.com/python-parsy/parsy/actions?query=workflow%3A%22Tests%22+branch%3Amaster
45 | .. |Codecov| image:: https://img.shields.io/codecov/c/github/python-parsy/parsy/master.svg
46 |    :target: https://codecov.io/gh/python-parsy/parsy
47 | .. |Downloads| image:: https://img.shields.io/pypi/dm/parsy
48 |    :target: https://pypi.org/project/parsy/
49 | 


--------------------------------------------------------------------------------
/RELEASE.rst:
--------------------------------------------------------------------------------
 1 | ==================
 2 | How to do releases
 3 | ==================
 4 | 
 5 | * Make sure you are on master branch, and have pulled latest changes.
 6 | 
 7 | * Check test suite passes on all supported versions::
 8 | 
 9 |     tox
10 | 
11 | * Change docs/history.rst to remove " - unreleased"
12 | 
13 | * Update the version number (removing the ``-dev1`` part):
14 | 
15 |   * src/parsy/__init__.py
16 |   * docs/conf.py
17 | 
18 | * Commit with "Version bump"
19 | 
20 | * Release to PyPI::
21 | 
22 |     $ ./release.sh
23 | 
24 | 
25 | Post release
26 | ------------
27 | 
28 | * Bump version numbers to next version, and add ``-dev1`` suffix, for example
29 |   ``0.9.0-dev1``
30 | 
31 | * Add new section to docs/history.rst, with " - unreleased".
32 | 
33 | * Commit and push
34 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | ignore:
2 |   - "setup.py"
3 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-parsy/parsy/3b72c71bf9570d73ce50477cf503fd5544c1c4b1/conftest.py


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = parsy
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # parsy documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Sep 25 22:24:17 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | 
 20 | import os
 21 | import sys
 22 | 
 23 | sys.path.insert(0, os.path.abspath("../src"))
 24 | 
 25 | 
 26 | # -- General configuration ------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | #
 30 | # needs_sphinx = '1.0'
 31 | 
 32 | # Add any Sphinx extension module names here, as strings. They can be
 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 34 | # ones.
 35 | extensions = ["sphinx.ext.viewcode"]
 36 | 
 37 | # Add any paths that contain templates here, relative to this directory.
 38 | templates_path = ["_templates"]
 39 | 
 40 | # The suffix(es) of source filenames.
 41 | # You can specify multiple suffix as a list of string:
 42 | #
 43 | # source_suffix = ['.rst', '.md']
 44 | source_suffix = ".rst"
 45 | 
 46 | # The master toctree document.
 47 | master_doc = "index"
 48 | 
 49 | # General information about the project.
 50 | project = "parsy"
 51 | copyright = "2017, Jeanine Adkisson, Luke Plant"
 52 | author = "Jeanine Adkisson"
 53 | 
 54 | # The version info for the project you're documenting, acts as replacement for
 55 | # |version| and |release|, also used in various other places throughout the
 56 | # built documents.
 57 | #
 58 | # The short X.Y version.
 59 | version = "2.1"
 60 | # The full version, including alpha/beta/rc tags.
 61 | release = "2.1"
 62 | 
 63 | # The language for content autogenerated by Sphinx. Refer to documentation
 64 | # for a list of supported languages.
 65 | #
 66 | # This is also used if you do content translation via gettext catalogs.
 67 | # Usually you set "language" from the command line for these cases.
 68 | language = None
 69 | 
 70 | # List of patterns, relative to source directory, that match files and
 71 | # directories to ignore when looking for source files.
 72 | # This patterns also effect to html_static_path and html_extra_path
 73 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 74 | 
 75 | # The name of the Pygments (syntax highlighting) style to use.
 76 | pygments_style = "sphinx"
 77 | 
 78 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 79 | todo_include_todos = False
 80 | 
 81 | 
 82 | # -- Options for HTML output ----------------------------------------------
 83 | 
 84 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 85 | # a list of builtin themes.
 86 | #
 87 | html_theme = "default"
 88 | 
 89 | # Theme options are theme-specific and customize the look and feel of a theme
 90 | # further.  For a list of options available for each theme, see the
 91 | # documentation.
 92 | #
 93 | # html_theme_options = {}
 94 | 
 95 | # Add any paths that contain custom static files (such as style sheets) here,
 96 | # relative to this directory. They are copied after the builtin static files,
 97 | # so a file named "default.css" will overwrite the builtin "default.css".
 98 | html_static_path = ["_static"]
 99 | 
100 | 
101 | # -- Options for HTMLHelp output ------------------------------------------
102 | 
103 | # Output file base name for HTML help builder.
104 | htmlhelp_basename = "parsydoc"
105 | 
106 | 
107 | # -- Options for LaTeX output ---------------------------------------------
108 | 
109 | latex_elements = {
110 |     # The paper size ('letterpaper' or 'a4paper').
111 |     #
112 |     # 'papersize': 'letterpaper',
113 |     # The font size ('10pt', '11pt' or '12pt').
114 |     #
115 |     # 'pointsize': '10pt',
116 |     # Additional stuff for the LaTeX preamble.
117 |     #
118 |     # 'preamble': '',
119 |     # Latex figure (float) alignment
120 |     #
121 |     # 'figure_align': 'htbp',
122 | }
123 | 
124 | # Grouping the document tree into LaTeX files. List of tuples
125 | # (source start file, target name, title,
126 | #  author, documentclass [howto, manual, or own class]).
127 | latex_documents = [
128 |     (master_doc, "parsy.tex", "parsy Documentation", "Jeanine Adkisson", "manual"),
129 | ]
130 | 
131 | 
132 | # -- Options for manual page output ---------------------------------------
133 | 
134 | # One entry per manual page. List of tuples
135 | # (source start file, name, description, authors, manual section).
136 | man_pages = [(master_doc, "parsy", "parsy Documentation", [author], 1)]
137 | 
138 | 
139 | # -- Options for Texinfo output -------------------------------------------
140 | 
141 | # Grouping the document tree into Texinfo files. List of tuples
142 | # (source start file, target name, title, author,
143 | #  dir menu entry, description, category)
144 | texinfo_documents = [
145 |     (master_doc, "parsy", "parsy Documentation", author, "parsy", "One line description of project.", "Miscellaneous"),
146 | ]
147 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
 1 | Contributing to parsy
 2 | =====================
 3 | 
 4 | Contributions to parsy, whether code or docs, are very welcome. Please
 5 | contribute by making a fork, and submitting a PR on `GitHub
 6 | <https://github.com/python-parsy/parsy>`_.
 7 | 
 8 | We have a high standard in terms of quality. All contributions will need to be
 9 | fully covered by unit tests and documentation.
10 | 
11 | To get started you’ll need to:
12 | 
13 | - Check out the repo using git, ``cd`` into the directory.
14 | 
15 | - Set up a venv for development. We use `uv <https://docs.astral.sh/uv/>`_ and
16 |   recommend you do the same. With uv, the setup instructions are::
17 | 
18 |     uv sync
19 | 
20 |   This will use your default Python version. If you want to use a different
21 |   Python version, instead of the above do this e.g.::
22 | 
23 |     uv python install 3.10
24 |     uv venv --python 3.10
25 |     uv sync
26 | 
27 | - Activate the venv::
28 | 
29 |     source .venv/bin/activate
30 | 
31 |   (Alternatively, you can add ``uv run`` before most of the commands below)
32 | 
33 | - Get test suite running::
34 | 
35 |     pytest
36 | 
37 | - Run tests against all versions::
38 | 
39 |     tox
40 | 
41 | - To build the docs, do::
42 | 
43 |     cd docs
44 |     make html
45 | 
46 | We now have several linters and code formatters that we require use of,
47 | including `flake8 <http://flake8.pycqa.org/en/latest/>`_, `isort
48 | <https://github.com/timothycrosley/isort#readme>`_ and `black
49 | <https://github.com/psf/black>`_. These are most easily add by using `pre-commit
50 | <https://pre-commit.com/>`_:
51 | 
52 | - Install `pre-commit <https://pre-commit.com/>`_ in the repo::
53 | 
54 |     pre-commit install
55 | 
56 |   This will add Git hooks to run linters when committing, which ensures our style
57 |   (black) and other things.
58 | 
59 |   Now all the linters will run when you commit changes.
60 | 
61 | - You can also manually run these linters using::
62 | 
63 |     pre-commit run --all --all-files
64 | 
65 | 
66 | When writing documentation, please keep in mind Daniele Procida's `great article
67 | on documentation <https://www.divio.com/en/blog/documentation/>`_. To summarise,
68 | there are 4 types of docs:
69 | 
70 | * Tutorials (focus: learning, analogy: teaching a child to cook)
71 | * How-to guides (focus: goals, analogy: a recipe in a cook book)
72 | * Discussions (focus: understanding, analogy: an article on culinary history)
73 | * Reference (focus: information, analogy: encyclopedia article)
74 | 
75 | We do not (yet) have documentation that fits into the "Discussions" category,
76 | but we do have the others, and when adding new features, documentation of the
77 | right sort(s) should be added. With parsy, where code is often very succinct,
78 | writing good docs often takes several times longer than writing the code.
79 | 


--------------------------------------------------------------------------------
/docs/history.rst:
--------------------------------------------------------------------------------
 1 | =========================
 2 | History and release notes
 3 | =========================
 4 | 
 5 | .. currentmodule:: parsy
 6 | 
 7 | 2.2 - unreleased
 8 | ----------------
 9 | * Dropped support for Python 3.7, 3.8 which are past EOL
10 | 
11 | 2.1 - 2023-02-22
12 | ----------------
13 | 
14 | * Dropped support for Python 3.7
15 | * Test against Python 3.11
16 | * Added docstrings and basic type hints to all primitives and main methods
17 | 
18 | 
19 | 2.0 - 2022-09-08
20 | ----------------
21 | 
22 | * Dropped support for Python < 3.6
23 | * Added :meth:`Parser.until`. Thanks `@mcdeoliveira <https://github.com/mcdeoliveira>`_!
24 | * :meth:`Parser.optional` now supports an optional default argument to be returned instead of ``None``.
25 | 
26 | 1.4.0 - 2021-11-15
27 | ------------------
28 | 
29 | * Documentation improvements.
30 | * Added ``group`` parameter to :func:`regex` - thanks `@camerondm9
31 |   <https://github.com/camerondm9>`_.
32 | * Support ``bytes`` with :func:`regex` as well as ``str`` - thanks `@quack4
33 |   <https://github.com/quack4>`_.
34 | * Added :class:`forward_declaration`.
35 | 
36 | 
37 | 1.3.0 - 2019-08-03
38 | ------------------
39 | 
40 | * Documentation improvements.
41 | * Added :func:`peek` - thanks `@lisael <https://github.com/lisael>`_.
42 | * Removed Python 3.3 support
43 | * Added Python 3.7 support
44 | * :meth:`Parser.combine_dict` now strips keys that start with ``_``.
45 | 
46 | 
47 | 1.2.0 - 2017-11-15
48 | ------------------
49 | 
50 | * Added ``transform`` argument to :func:`string` and :func:`string_from`.
51 | * Made :meth:`Parser.combine_dict` accept lists of name value pairs,
52 |   and filter out keys with value ``None``.
53 | * Added :func:`from_enum`.
54 | 
55 | 
56 | 1.1.0 - 2017-11-05
57 | ------------------
58 | 
59 | * Added :meth:`Parser.optional`.
60 | * Added :meth:`Parser.tag`.
61 | * Added :func:`seq` keyword argument version (Python 3.6)
62 | * Added :meth:`Parser.combine_dict`.
63 | * Documented :meth:`Parser.mark`.
64 | * Documentation improvements.
65 | 
66 | 
67 | 1.0.0 - 2017-10-10
68 | ------------------
69 | 
70 | * Improved parse failure messages of ``@generate`` parsers. Previously
71 |   the parser was given a default description of the function name,
72 |   which hides all useful internal info there might be.
73 | * Added :meth:`Parser.sep_by`
74 | * Added :func:`test_char`
75 | * Added :func:`char_from`
76 | * Added :func:`string_from`
77 | * Added :data:`any_char`
78 | * Added :data:`decimal_digit`
79 | * Added :meth:`Parser.concat`
80 | * Fixed parsy so that it can again work with tokens as well as strings, allowing it to
81 |   be used as both a :doc:`lexer or parser or both <howto/lexing>`, with docs and tests.
82 | * Added :func:`test_item`
83 | * Added :func:`match_item`
84 | * Added :meth:`Parser.should_fail`
85 | 
86 | 0.9.0 - 2017-09-28
87 | ------------------
88 | 
89 | * Better error reporting of failed parses.
90 | * Documentation overhaul and expansion.
91 | * Added :meth:`Parser.combine`.
92 | 
93 | 0.0.4 - 2014-12-28
94 | ------------------
95 | 
96 | * See git logs for changes before this point.
97 | 


--------------------------------------------------------------------------------
/docs/howto/index.rst:
--------------------------------------------------------------------------------
 1 | =================================
 2 |  Howto's, cookbooks and examples
 3 | =================================
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 |    :caption: Contents:
 8 | 
 9 |    lexing
10 |    other_examples
11 | 


--------------------------------------------------------------------------------
/docs/howto/lexing.rst:
--------------------------------------------------------------------------------
 1 | =====================================
 2 |  Separate lexing/tokenization phases
 3 | =====================================
 4 | 
 5 | .. currentmodule:: parsy
 6 | 
 7 | Most of the documentation in parsy assumes that when you call
 8 | :meth:`Parser.parse` you will pass a string, and will get back your final
 9 | parsed, constructed object (of whatever type you desire).
10 | 
11 | A more classical approach to parsing is that you first have a
12 | lexing/tokenization phase, the result of which is a simple list of tokens. These
13 | tokens could be strings, or other objects.
14 | 
15 | You then have a separate parsing phase that consumes this list of tokens, and
16 | produces your final object, which is very often a tree-like structure or other
17 | complex object.
18 | 
19 | Parsy can actually work with either approach. Further, for the split
20 | lexing/parsing approach, parsy can be used either to implement the lexer, or the
21 | parser, or both! The following examples use parsy to do both lexing and parsing.
22 | 
23 | However, parsy's features for this use case are not as developed as some other
24 | Python tools. If you are building a parser for a full language that needs the
25 | split lexing/parsing approach, you might be better off with `PLY
26 | <http://www.dabeaz.com/ply/>`_.
27 | 
28 | Turtle Logo
29 | ===========
30 | 
31 | For our first example, we'll do a very stripped down Turtle Logo parser. First,
32 | the lexer:
33 | 
34 | .. literalinclude:: ../../examples/simple_logo_lexer.py
35 |    :language: python
36 | 
37 | 
38 | We are not interested in whitespace, so our lexer removes it all, apart from
39 | newlines. We can now parse a program into the tokens we are interested in:
40 | 
41 | .. code-block:: python
42 | 
43 |    >>> l = lexer.parse("fd 1\nbk 2")
44 |    >>> l
45 |    ['fd', 1, '\n', 'bk', 2, '\n']
46 | 
47 | The ``line`` parser produces a list, so after applying ``many`` which also
48 | produces a list, we applied a level of flattening so that we end up with a
49 | simple list of tokens. We also chose to convert the parameters to integers while
50 | we were at it, so in this case our list of tokens is not a list of strings, but
51 | heterogeneous.
52 | 
53 | The next step is the parser. We create some classes to represent different
54 | commands, and then use parsy again to create a parser which is very simple
55 | because this is a very limited language:
56 | 
57 | .. literalinclude:: ../../examples/simple_logo_parser.py
58 |    :language: python
59 | 
60 | To use it, we pass the the list of tokens generated above into
61 | ``program.parse``:
62 | 
63 | .. code-block:: python
64 | 
65 |    >>> program.parse(l)
66 |    [Forward(1), Backward(2)]
67 | 
68 | In a real implementation, we could then have ``execute`` methods on the
69 | ``Command`` sub-classes if we wanted to implement an interpreter, for example.
70 | 
71 | Calculator
72 | ==========
73 | 
74 | Our second example illustrates lexing and then parsing a sequence of
75 | mathematical operations, e.g "1 + 2 * (3 - 4.5)", with precedence.
76 | 
77 | In this case, while doing the parsing stage, instead of building up an AST of
78 | objects representing the operations, the parser actually evaluates the
79 | expression.
80 | 
81 | .. literalinclude:: ../../examples/simple_eval.py
82 |    :language: python
83 | 


--------------------------------------------------------------------------------
/docs/howto/other_examples.rst:
--------------------------------------------------------------------------------
 1 | ==============
 2 | Other examples
 3 | ==============
 4 | 
 5 | .. currentmodule:: parsy
 6 | 
 7 | This section has some further example parsers that you can study. There are also
 8 | examples in the :doc:`/tutorial` and in :doc:`/ref/generating`.
 9 | 
10 | SQL SELECT statement parser
11 | ===========================
12 | 
13 | This shows a very simplified parser for a SQL ``SELECT`` statement, using custom
14 | data structures, and the convenient keyword argument syntax for :func:`seq`,
15 | followed by :meth:`Parser.combine_dict`.
16 | 
17 | .. literalinclude:: ../../examples/sql_select.py
18 |    :language: python
19 | 
20 | 
21 | JSON parser
22 | ===========
23 | 
24 | A full parser for JSON. (This will not be competitive in terms of performance
25 | with other implementations!)
26 | 
27 | This demonstrates the use of :class:`forward_declaration`, needed due to the
28 | circular definition of ``json_value``.
29 | 
30 | .. literalinclude:: ../../examples/json.py
31 |    :language: python
32 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to parsy's documentation!
 2 | =================================
 3 | 
 4 | These are the docs for parsy |release|. Check the :doc:`/history` for
 5 | significant changes.
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 |    :caption: Contents:
10 | 
11 |    installation
12 |    overview
13 |    tutorial
14 |    ref/index
15 |    howto/index
16 |    history
17 |    contributing
18 | 
19 | Indices and tables
20 | ==================
21 | 
22 | * :ref:`genindex`
23 | * :ref:`modindex`
24 | * :ref:`search`
25 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Installation
 3 | ============
 4 | 
 5 | parsy can be installed with pip::
 6 | 
 7 |     pip install parsy
 8 | 
 9 | 
10 | Python 3.7 or greater is required.
11 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=python -msphinx
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=parsy
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | 	echo.then set the SPHINXBUILD environment variable to point to the full
21 | 	echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | 	echo.Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/overview.rst:
--------------------------------------------------------------------------------
  1 | ========
  2 | Overview
  3 | ========
  4 | 
  5 | .. currentmodule:: parsy
  6 | 
  7 | Parsy is an easy way to combine simple, small parsers into complex, larger
  8 | parsers.
  9 | 
 10 | If it means anything to you, it's a monadic parser combinator library for
 11 | LL(infinity) grammars in the spirit of `Parsec
 12 | <https://github.com/haskell/parsec>`_, `Parsnip
 13 | <http://parsnip-parser.sourceforge.net/>`_, and `Parsimmon
 14 | <https://github.com/jneen/parsimmon>`_.
 15 | 
 16 | If that means nothing, rest assured that parsy is a very straightforward and
 17 | Pythonic solution for parsing text that doesn't require knowing anything about
 18 | monads.
 19 | 
 20 | Parsy differentiates itself from other solutions with the following:
 21 | 
 22 | * it is not a parser generator, but a combinator based parsing library.
 23 | * a very clean implementation, only a few hundred lines, that borrows
 24 |   from the best of recent combinator libraries.
 25 | * it produces fairly terse code, with an embedded DSL feel — not too far from
 26 |   things like EBNF notation or Haskell’s parsec.
 27 | * free, good quality documentation, all in one place. (Please raise an issue on
 28 |   GitHub if you have any problems, or find the documentation lacking in any
 29 |   way).
 30 | * it avoids mutability, and therefore a ton of related bugs.
 31 | * it has monadic binding with a :doc:`nice syntax </ref/generating>`. In plain
 32 |   English:
 33 | 
 34 |   * we can easily handle cases where later parsing depends on the value of
 35 |     something parsed earlier e.g. Hollerith constants.
 36 |   * it's easy to build up complex result objects, rather than returning lists of
 37 |     lists etc. which then need to be further processed.
 38 | 
 39 | * it has a minimalist philosophy. It doesn't include built-in helpers for any
 40 |   specific grammars or languages, but provides building blocks for making these.
 41 | 
 42 | Basic usage looks like this:
 43 | 
 44 | Example 1 - parsing a set of alternatives:
 45 | 
 46 | .. code-block:: python
 47 | 
 48 |    >>> from parsy import string
 49 |    >>> title = (string('Dr.') | string('Mr.') | string('Mrs.')).desc("title")
 50 |    >>> title.parse('Mrs.')
 51 |    'Mrs.'
 52 |    >>> title.parse('Mr.')
 53 |    'Mr.'
 54 | 
 55 |    >>> title.parse('Joe')
 56 |    ParseError: expected title at 0:0
 57 | 
 58 |    >>> title.parse_partial('Dr. Who')
 59 |    ('Dr.', ' Who')
 60 | 
 61 | Example 2 - Parsing a dd-mm-yy date:
 62 | 
 63 | .. code-block:: python
 64 | 
 65 |    >>> from parsy import string, regex
 66 |    >>> from datetime import date
 67 |    >>> ddmmyy = regex(r'[0-9]{2}').map(int).sep_by(string("-"), min=3, max=3).combine(
 68 |    ...                lambda d, m, y: date(2000 + y, m, d))
 69 |    >>> ddmmyy.parse('06-05-14')
 70 |    datetime.date(2014, 5, 6)
 71 | 
 72 | 
 73 | 
 74 | 
 75 | To learn how to use parsy, you should continue with:
 76 | 
 77 | * the :doc:`tutorial </tutorial>`, especially if you are not familiar with this
 78 |   type of parser library.
 79 | * the :doc:`parser generator decorator </ref/generating>`
 80 | * the :doc:`builtin parser primitives </ref/primitives>`
 81 | * the :doc:`method and combinator reference </ref/methods_and_combinators>`
 82 | 
 83 | Other Python projects
 84 | =====================
 85 | 
 86 | This library isn’t for everyone or for every project. It excels at quickly
 87 | writing easy-to-read parsers for relatively small languages, and it’s great if
 88 | you are a relative newcomer to the subject of parsing but want something better
 89 | than ``str.split``. If you have demanding needs in terms of performance, or
 90 | producing good error messages, you may need to look elsewhere. Below are some
 91 | other Python libraries you might consider:
 92 | 
 93 | * `PLY <http://www.dabeaz.com/ply/>`_. A pure Python implementation of
 94 |   the classic lex/yacc parsing tools. It is well suited to large grammars
 95 |   that would be found in typical programming languages.
 96 | 
 97 | * `Lark <https://github.com/erezsh/lark>`_. With Lark you write a grammar
 98 |   definition in a separate mini-language as a string, and have a parser
 99 |   generated for you, rather than writing the grammar in Python. It has the
100 |   advantage of speed and being able to use different parsing algorithms.
101 | 
102 | * `pyparsing <https://pypi.org/project/pyparsing/>`_. Also a combinator approach,
103 |   but in general much less cleanly implemented, and rather scattered
104 |   documentation, although it has more builtin functionality in terms
105 |   of provided utilities for certain parsing tasks.
106 | 
107 | * `funcparserlib <https://github.com/vlasovskikh/funcparserlib>`_ - the most
108 |   similar to parsy. It differs from parsy mainly in normally using a separate
109 |   tokenization phase and lacking the convenience of the :func:`generate` method
110 |   for creating parsers.
111 | 


--------------------------------------------------------------------------------
/docs/ref/generating.rst:
--------------------------------------------------------------------------------
  1 | ===================
  2 | Generating a parser
  3 | ===================
  4 | 
  5 | .. currentmodule:: parsy
  6 | .. function:: generate
  7 | 
  8 | ``generate`` converts a generator function (one that uses the ``yield`` keyword)
  9 | into a parser. The generator function must yield parsers. These parsers are
 10 | applied successively and their results are sent back to the generator using the
 11 | ``.send()`` protocol. The generator function should return the final result of
 12 | the parsing. Alternatively it can return another parser, which is equivalent to
 13 | applying it and returning its result.
 14 | 
 15 | Motivation and examples
 16 | =======================
 17 | 
 18 | Constructing parsers by using combinators and :class:`Parser` methods to make
 19 | larger parsers works well for many simpler cases. However, for more complex
 20 | cases the ``generate`` function decorator is both more readable and more
 21 | powerful. (For those coming from Haskell/Parsec, this method provides an
 22 | acceptable substitute for ``do`` notation).
 23 | 
 24 | Alternative syntax to combinators
 25 | ---------------------------------
 26 | 
 27 | The first example just shows a different way of building a parser that could
 28 | have easily been built using combinators:
 29 | 
 30 | .. code:: python
 31 | 
 32 |    from parsy import generate
 33 | 
 34 |    @generate("form")
 35 |    def form():
 36 |        """
 37 |        Parse an s-expression form, like (a b c).
 38 |        An equivalent to lparen >> expr.many() << rparen
 39 |        """
 40 |        yield lparen
 41 |        exprs = yield expr.many()
 42 |        yield rparen
 43 |        return exprs
 44 | 
 45 | In the example above, the parser was given a string name ``"form"``, which does
 46 | the same as :meth:`Parser.desc`. This is not required, as per the examples below.
 47 | 
 48 | Note that there is no guarantee that the entire function is executed: if any of
 49 | the yielded parsers fails, the function will not complete, and parsy will try to
 50 | backtrack to an alternative parser if there is one.
 51 | 
 52 | Building complex objects
 53 | ------------------------
 54 | 
 55 | The second example shows how you can use multiple parse results to build up a
 56 | complex object:
 57 | 
 58 | .. code:: python
 59 | 
 60 |    from datetime import date
 61 | 
 62 |    from parsy import generate, regex, string
 63 | 
 64 |    @generate
 65 |    def date():
 66 |        """
 67 |        Parse a date in the format YYYY-MM-DD
 68 |        """
 69 |        year = yield regex("[0-9]{4}").map(int)
 70 |        yield string("-")
 71 |        month = yield regex("[0-9]{2}").map(int)
 72 |        yield string("-")
 73 |        day = yield regex("[0-9]{2}").map(int)
 74 | 
 75 |        return date(year, month, day)
 76 | 
 77 | This could also have been achieved using :func:`seq` and :meth:`Parser.combine`.
 78 | 
 79 | Using values already parsed
 80 | ---------------------------
 81 | 
 82 | The third example shows how we can use an earlier parsed value to influence the
 83 | subsequent parsing. This example parses Hollerith constants. Hollerith constants
 84 | are a way of specifying an arbitrary set of characters by first writing the
 85 | integer that specifies the length, followed by the character H, followed by the
 86 | set of characters. For example, ``pancakes`` would be written ``8Hpancakes``.
 87 | 
 88 | .. code:: python
 89 | 
 90 |    from parsy import generate, regex, string, any_char
 91 | 
 92 |    @generate
 93 |    def hollerith():
 94 |        num = yield regex(r'[0-9]+').map(int)
 95 |        yield string('H')
 96 |        return any_char.times(num).concat()
 97 | 
 98 | (You may want to compare this with an `implementation of Hollerith constants
 99 | <https://gist.github.com/spookylukey/591aa8a6a9af7cf0f1e22129b29288d6>`_ that
100 | uses `pyparsing <http://pyparsing.wikispaces.com/>`_, originally by John
101 | Shipman from his `pyparsing docs
102 | <http://infohost.nmt.edu/tcc/help/pubs/pyparsing/web/class-Forward.html>`_.)
103 | 
104 | There are also more complex examples in the :ref:`tutorial
105 | <using-previous-values>` of using the ``generate`` decorator to create parsers
106 | where there is logic that is conditional upon earlier parsed values.
107 | 
108 | .. _recursive-definitions-with-generate:
109 | 
110 | Implementing recursive definitions
111 | ----------------------------------
112 | 
113 | A fourth examples shows how you can use this syntax for grammars that you would
114 | like to define recursively (or mutually recursively).
115 | 
116 | Say we want to be able to parse an s-expression like syntax which uses
117 | parenthesis for grouping items into a tree structure, like the following::
118 | 
119 |      (0 1 (2 3) (4 5 6) 7 8)
120 | 
121 | A naive approach would be:
122 | 
123 | .. code-block:: python
124 | 
125 |    simple = regex('[0-9]+').map(int)
126 |    group = string('(') >> expr.sep_by(string(' ')) << string(')')
127 |    expr = simple | group
128 | 
129 | The problem is that the second line will get a ``NameError`` because ``expr`` is
130 | not defined yet.
131 | 
132 | One way to solve this is to use :ref:`forward-declarations`. But another uses
133 | ``@generate``.
134 | 
135 | Using the ``@generate`` syntax will introduce a level of laziness in resolving
136 | ``expr`` that allows things to work:
137 | 
138 | .. code-block:: python
139 | 
140 |    simple = regex('[0-9]+').map(int)
141 | 
142 |    @generate
143 |    def group():
144 |        return (yield string('(') >> expr.sep_by(string(' ')) << string(')'))
145 | 
146 |    expr = simple | group
147 | 
148 | .. code-block:: python
149 | 
150 |    >>> expr.parse("(0 1 (2 3) (4 5 6) 7 8)")
151 |    [0, 1, [2, 3], [4, 5, 6], 7, 8]
152 | 


--------------------------------------------------------------------------------
/docs/ref/index.rst:
--------------------------------------------------------------------------------
 1 | ===============
 2 |  API reference
 3 | ===============
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 3
 7 |    :caption: Contents:
 8 | 
 9 |    primitives
10 |    methods_and_combinators
11 |    generating
12 |    parser_instances
13 | 


--------------------------------------------------------------------------------
/docs/ref/methods_and_combinators.rst:
--------------------------------------------------------------------------------
  1 | =========================================
  2 | Parser methods, operators and combinators
  3 | =========================================
  4 | 
  5 | Parser methods
  6 | ==============
  7 | 
  8 | Parser objects are returned by any of the built-in parser :doc:`primitives`. They
  9 | can be used and manipulated as below.
 10 | 
 11 | .. currentmodule:: parsy
 12 | 
 13 | .. class:: Parser
 14 | 
 15 |    .. method:: __init__(wrapped_fn)
 16 | 
 17 |       This is a low level function to create new parsers that is used internally
 18 |       but is rarely needed by users of the parsy library. It should be passed a
 19 |       parsing function, which takes two arguments - a string/list to be parsed
 20 |       and the current index into the list - and returns a :class:`Result` object,
 21 |       as described in :doc:`/ref/parser_instances`.
 22 | 
 23 |    The following methods are for actually **using** the parsers that you have
 24 |    created:
 25 | 
 26 |    .. method:: parse(string_or_list)
 27 | 
 28 |       Attempts to parse the given string (or list). If the parse is successful
 29 |       and consumes the entire string, the result is returned - otherwise, a
 30 |       ``ParseError`` is raised.
 31 | 
 32 |       Instead of passing a string, you can in fact pass a list of tokens. Almost
 33 |       all the examples assume strings for simplicity. Some of the primitives are
 34 |       also clearly string specific, and a few of the combinators (such as
 35 |       :meth:`Parser.concat`) are string specific, but most of the rest of the
 36 |       library will work with tokens just as well. See :doc:`/howto/lexing` for
 37 |       more information.
 38 | 
 39 |    .. method:: parse_partial(string_or_list)
 40 | 
 41 |       Similar to ``parse``, except that it does not require the entire
 42 |       string (or list) to be consumed. Returns a tuple of
 43 |       ``(result, remainder)``, where ``remainder`` is the part of
 44 |       the string (or list) that was left over.
 45 | 
 46 |    The following methods are essentially **combinators** that produce new
 47 |    parsers from the existing one. They are provided as methods on ``Parser`` for
 48 |    convenience. More combinators are documented below.
 49 | 
 50 |    .. method:: desc(string)
 51 | 
 52 |       Adds a description to the parser, which is used in the error message
 53 |       if parsing fails.
 54 | 
 55 |       >>> year = regex(r'[0-9]{4}').desc('4 digit year')
 56 |       >>> year.parse('123')
 57 |       ParseError: expected 4 digit year at 0:0
 58 | 
 59 |    .. method:: then(other_parser)
 60 | 
 61 |       Returns a parser which, if the initial parser succeeds, will continue parsing
 62 |       with ``other_parser``. This will produce the value produced by
 63 |       ``other_parser``.
 64 | 
 65 |       .. code:: python
 66 | 
 67 |          >>> string('x').then(string('y')).parse('xy')
 68 |          'y'
 69 | 
 70 |       See also :ref:`parser-rshift`.
 71 | 
 72 |    .. method:: skip(other_parser)
 73 | 
 74 |       Similar to :meth:`Parser.then`, except the resulting parser will use
 75 |       the value produced by the first parser.
 76 | 
 77 |       .. code:: python
 78 | 
 79 |          >>> string('x').skip(string('y')).parse('xy')
 80 |          'x'
 81 | 
 82 |       See also :ref:`parser-lshift`.
 83 | 
 84 |    .. method:: many()
 85 | 
 86 |       Returns a parser that expects the initial parser 0 or more times, and
 87 |       produces a list of the results. Note that this parser does not fail if
 88 |       nothing matches, but instead consumes nothing and produces an empty list.
 89 | 
 90 |       .. code:: python
 91 | 
 92 |          >>> parser = regex(r'[a-z]').many()
 93 |          >>> parser.parse('')
 94 |          []
 95 |          >>> parser.parse('abc')
 96 |          ['a', 'b', 'c']
 97 | 
 98 |    .. method:: times(min [, max=min])
 99 | 
100 |       Returns a parser that expects the initial parser at least ``min`` times,
101 |       and at most ``max`` times, and produces a list of the results. If only one
102 |       argument is given, the parser is expected exactly that number of times.
103 | 
104 |    .. method:: at_most(n)
105 | 
106 |       Returns a parser that expects the initial parser at most ``n`` times, and
107 |       produces a list of the results.
108 | 
109 |    .. method:: at_least(n)
110 | 
111 |       Returns a parser that expects the initial parser at least ``n`` times, and
112 |       produces a list of the results.
113 | 
114 |    .. method:: until(other_parser, [min=0, max=inf, consume_other=False])
115 | 
116 |       Returns a parser that expects the initial parser followed by ``other_parser``.
117 |       The initial parser is expected at least ``min`` times and at most ``max`` times.
118 |       By default, it does not consume ``other_parser`` and it produces a list of the
119 |       results excluding ``other_parser``. If ``consume_other`` is ``True`` then
120 |       ``other_parser`` is consumed and its result is included in the list of results.
121 | 
122 |       .. code:: python
123 | 
124 |          >>> seq(string('A').until(string('B')), string('BC')).parse('AAABC')
125 |          [['A','A','A'], 'BC']
126 |          >>> string('A').until(string('B')).then(string('BC')).parse('AAABC')
127 |          'BC'
128 |          >>> string('A').until(string('BC'), consume_other=True).parse('AAABC')
129 |          ['A', 'A', 'A', 'BC']
130 | 
131 |    .. versionadded:: 2.0
132 | 
133 |    .. method:: optional(default=None)
134 | 
135 |       Returns a parser that expects the initial parser zero or once, and maps
136 |       the result to a given default value in the case of no match. If no default
137 |       value is given, ``None`` is used.
138 | 
139 |       .. code:: python
140 | 
141 |          >>> string('A').optional().parse('A')
142 |          'A'
143 |          >>> string('A').optional().parse('')
144 |          None
145 |          >>> string('A').optional('Oops').parse('')
146 |          'Oops'
147 | 
148 |    .. method:: map(map_function)
149 | 
150 |       Returns a parser that transforms the produced value of the initial parser
151 |       with ``map_function``.
152 | 
153 |       .. code:: python
154 | 
155 |          >>> regex(r'[0-9]+').map(int).parse('1234')
156 |          1234
157 | 
158 |       This is the simplest way to convert parsed strings into the data types
159 |       that you need. See also :meth:`combine` and :meth:`combine_dict` below.
160 | 
161 |    .. method:: combine(combine_fn)
162 | 
163 |       Returns a parser that transforms the produced values of the initial parser
164 |       with ``combine_fn``, passing the arguments using ``*args`` syntax.
165 | 
166 |       Where the current parser produces an iterable of values, this can be a
167 |       more convenient way to combine them than :meth:`~Parser.map`.
168 | 
169 |       Example 1 - the argument order of our callable already matches:
170 | 
171 |       .. code:: python
172 | 
173 |          >>> from datetime import date
174 |          >>> yyyymmdd = seq(regex(r'[0-9]{4}').map(int),
175 |          ...                regex(r'[0-9]{2}').map(int),
176 |          ...                regex(r'[0-9]{2}').map(int)).combine(date)
177 |          >>> yyyymmdd.parse('20140506')
178 |          datetime.date(2014, 5, 6)
179 | 
180 |       Example 2 - the argument order of our callable doesn't match, and
181 |       we need to adjust a parameter, so we can fix it using a lambda.
182 | 
183 |       .. code:: python
184 | 
185 |          >>> ddmmyy = regex(r'[0-9]{2}').map(int).times(3).combine(
186 |          ...                lambda d, m, y: date(2000 + y, m, d))
187 |          >>> ddmmyy.parse('060514')
188 |          datetime.date(2014, 5, 6)
189 | 
190 |       The equivalent ``lambda`` to use with ``map`` would be ``lambda res:
191 |       date(2000 + res[2], res[1], res[0])``, which is less readable. The version
192 |       with ``combine`` also ensures that exactly 3 items are generated by the
193 |       previous parser, otherwise you get a ``TypeError``.
194 | 
195 |    .. method:: combine_dict(fn)
196 | 
197 |       Returns a parser that transforms the value produced by the initial parser
198 |       using the supplied function/callable, passing the arguments using the
199 |       ``**kwargs`` syntax.
200 | 
201 |       The value produced by the initial parser must be a mapping/dictionary from
202 |       names to values, or a list of two-tuples, or something else that can be
203 |       passed to the ``dict`` constructor.
204 | 
205 |       If ``None`` is present as a key in the dictionary it will be removed
206 |       before passing to ``fn``, as will all keys starting with ``_``.
207 | 
208 |       **Motivation:**
209 | 
210 |       For building complex objects, this can be more convenient, flexible and
211 |       readable than :meth:`map` or :meth:`combine`, because by avoiding
212 |       positional arguments we can avoid a dependence on the order of components
213 |       in the string being parsed and in the argument order of callables being
214 |       used. It is especially designed to be used in conjunction with :func:`seq`
215 |       and :meth:`tag`.
216 | 
217 |       We can make use of the ``**kwargs`` version of :func:`seq` to produce a
218 |       very readable definition:
219 | 
220 |       .. code:: python
221 | 
222 |          >>> ddmmyyyy = seq(
223 |          ...     day=regex(r'[0-9]{2}').map(int),
224 |          ...     month=regex(r'[0-9]{2}').map(int),
225 |          ...     year=regex(r'[0-9]{4}').map(int),
226 |          ... ).combine_dict(date)
227 |          >>> ddmmyyyy.parse('04052003')
228 |          datetime.date(2003, 5, 4)
229 | 
230 |       (If that is hard to understand, use a Python REPL, and examine the result
231 |       of the ``parse`` call if you remove the ``combine_dict`` call).
232 | 
233 |       Here we used ``datetime.date`` which accepts keyword arguments. For your
234 |       own parsing needs you will often use custom data types. You can create
235 |       these however you like, but we suggest `dataclasses
236 |       <https://docs.python.org/3/library/dataclasses.html>`_ (stdlib), `attrs
237 |       <https://github.com/python-attrs/attrs>`_ or `pydantic
238 |       <https://github.com/samuelcolvin/pydantic/>`_. You can also use
239 |       `namedtuple
240 |       <https://docs.python.org/3/library/collections.html#collections.namedtuple>`_
241 |       for simple cases.
242 | 
243 |       The following example shows the use of ``_`` as a prefix to remove
244 |       elements you are not interested in, and the use of ``namedtuple`` to
245 |       create a simple data-structure.
246 | 
247 |       .. code-block:: python
248 | 
249 |          >>> from collections import namedtuple
250 |          >>> Pair = namedtuple('Pair', ['name', 'value'])
251 |          >>> name = regex("[A-Za-z]+")
252 |          >>> int_value = regex("[0-9]+").map(int)
253 |          >>> bool_value = string("true").result(True) | string("false").result(False)
254 |          >>> pair = seq(
255 |          ...    name=name,
256 |          ...    __eq=string('='),
257 |          ...    value=int_value | bool_value,
258 |          ...    __sc=string(';'),
259 |          ... ).combine_dict(Pair)
260 |          >>> pair.parse("foo=123;")
261 |          Pair(name='foo', value=123)
262 |          >>> pair.parse("BAR=true;")
263 |          Pair(name='BAR', value=True)
264 | 
265 |       You could also use ``<<`` or ``>>`` for the unwanted parts (but in some
266 |       cases this is less convenient):
267 | 
268 |       .. code-block:: python
269 | 
270 |          >>> pair = seq(
271 |          ...    name=name << string('='),
272 |          ...    value=(int_value | bool_value) << string(';')
273 |          ... ).combine_dict(Pair)
274 | 
275 |       .. versionchanged:: 1.2
276 |          Allow lists as well as dicts to be consumed, and filter out ``None``.
277 | 
278 |       .. versionchanged:: 1.3
279 |          Stripping of args starting with ``_``
280 | 
281 |    .. method:: tag(name)
282 | 
283 |       Returns a parser that wraps the produced value of the initial parser in a
284 |       2 tuple containing ``(name, value)``. This provides a very simple way to
285 |       label parsed components. e.g.:
286 | 
287 |       .. code:: python
288 | 
289 |          >>> day = regex(r'[0-9]+').map(int)
290 |          >>> month = string_from("January", "February", "March", "April", "May",
291 |          ...                     "June", "July", "August", "September", "October",
292 |          ...                     "November", "December")
293 |          >>> day.parse("10")
294 |          10
295 |          >>> day.tag("day").parse("10")
296 |          ('day', 10)
297 | 
298 |          >>> seq(day.tag("day") << whitespace,
299 |          ...     month.tag("month")
300 |          ...     ).parse("10 September")
301 |          [('day', 10), ('month', 'September')]
302 | 
303 |       It also works well when combined with ``.map(dict)`` to get a dictionary
304 |       of values:
305 | 
306 |       .. code:: python
307 | 
308 |          >>> seq(day.tag("name") << whitespace,
309 |          ...     month.tag("month")
310 |          ...     ).map(dict).parse("10 September")
311 |          {'day': 10, 'month': 'September'}
312 | 
313 |       ... and with :meth:`combine_dict` to build other objects.
314 | 
315 |       Usually it is better to use :func:`seq` with keyword arguments if you want
316 |       to produce a dictionary.
317 | 
318 |    .. method:: concat()
319 | 
320 |       Returns a parser that concatenates together (as a string) the previously
321 |       produced values. Usually used after :meth:`~Parser.many` and similar
322 |       methods that produce multiple values.
323 | 
324 |       .. code:: python
325 | 
326 |          >>> letter.at_least(1).parse("hello")
327 |          ['h', 'e', 'l', 'l', 'o']
328 |          >>> letter.at_least(1).concat().parse("hello")
329 |          'hello'
330 | 
331 |    .. method:: result(val)
332 | 
333 |       Returns a parser that, if the initial parser succeeds, always produces
334 |       ``val``.
335 | 
336 |       .. code:: python
337 | 
338 |          >>> string('foo').result(42).parse('foo')
339 |          42
340 | 
341 |    .. method:: should_fail(description)
342 | 
343 |       Returns a parser that fails when the initial parser succeeds, and succeeds
344 |       when the initial parser fails (consuming no input). A description must
345 |       be passed which is used in parse failure messages.
346 | 
347 |       This is essentially a negative lookahead:
348 | 
349 |       .. code:: python
350 | 
351 |          >>> p = letter << string(" ").should_fail("not space")
352 |          >>> p.parse('A')
353 |          'A'
354 |          >>> p.parse('A ')
355 |          ParseError: expected 'not space' at 0:1
356 | 
357 |       It is also useful for implementing things like parsing repeatedly until a
358 |       marker:
359 | 
360 |       .. code:: python
361 | 
362 |          >>> (string(";").should_fail("not ;") >> letter).many().concat().parse_partial('ABC;')
363 |          ('ABC', ';')
364 | 
365 |    .. method:: bind(fn)
366 | 
367 |       Returns a parser which, if the initial parser is successful, passes the
368 |       result to ``fn``, and continues with the parser returned from ``fn``. This
369 |       is the monadic binding operation. However, since we don't have Haskell's
370 |       ``do`` notation in Python, using this is very awkward. Instead, you should
371 |       look at :doc:`/ref/generating/` which provides a much nicer syntax for that
372 |       cases where you would have needed ``do`` notation in Parsec.
373 | 
374 |    .. method:: sep_by(sep, min=0, max=inf)
375 | 
376 |       Like :meth:`Parser.times`, this returns a new parser that repeats
377 |       the initial parser and collects the results in a list, but in this case separated
378 |       by the parser ``sep`` (whose return value is discarded). By default it
379 |       repeats with no limit, but minimum and maximum values can be supplied.
380 | 
381 |       .. code:: python
382 | 
383 |          >>> csv = letter.at_least(1).concat().sep_by(string(","))
384 |          >>> csv.parse("abc,def")
385 |          ['abc', 'def']
386 | 
387 |    .. method:: mark()
388 | 
389 |       Returns a parser that wraps the initial parser's result in a value
390 |       containing column and line information of the match, as well as the
391 |       original value. The new value is a 3-tuple:
392 | 
393 |       .. code:: python
394 | 
395 |          ((start_row, start_column),
396 |           original_value,
397 |           (end_row, end_column))
398 | 
399 |       This is useful for being able to report problems with parsing more
400 |       accurately, especially if you are using parsy as a :doc:`lexer
401 |       </howto/lexing/>` and want subsequent parsing of the token stream to be
402 |       able to report original positions in error messages etc.
403 | 
404 | .. _operators:
405 | 
406 | Parser operators
407 | ================
408 | 
409 | This section describes operators that you can use on :class:`Parser` objects to
410 | build new parsers.
411 | 
412 | 
413 | .. _parser-or:
414 | 
415 | ``|`` operator
416 | --------------
417 | 
418 | ``parser | other_parser``
419 | 
420 | Returns a parser that tries ``parser`` and, if it fails, backtracks
421 | and tries ``other_parser``. These can be chained together.
422 | 
423 | The resulting parser will produce the value produced by the first
424 | successful parser.
425 | 
426 | .. code:: python
427 | 
428 |    >>> parser = string('x') | string('y') | string('z')
429 |    >>> parser.parse('x')
430 |    'x'
431 |    >>> parser.parse('y')
432 |    'y'
433 |    >>> parser.parse('z')
434 |    'z'
435 | 
436 | Note that ``other_parser`` will only be tried if ``parser`` cannot consume any
437 | input and fails. ``other_parser`` is not used in the case that **later** parser
438 | components fail. This means that the order of the operands matters - for
439 | example:
440 | 
441 | .. code:: python
442 | 
443 |    >>> ((string('A') | string('AB')) + string('C')).parse('ABC')
444 |    ParseEror: expected 'C' at 0:1
445 |    >>> ((string('AB') | string('A')) + string('C')).parse('ABC')
446 |    'ABC'
447 |    >>> ((string('AB') | string('A')) + string('C')).parse('AC')
448 |    'AC'
449 | 
450 | .. _parser-lshift:
451 | 
452 | ``<<`` operator
453 | ---------------
454 | 
455 | ``parser << other_parser``
456 | 
457 | The same as ``parser.skip(other_parser)`` - see :meth:`Parser.skip`.
458 | 
459 | (Hint - the arrows point at the important parser!)
460 | 
461 | .. code:: python
462 | 
463 |    >>> (string('x') << string('y')).parse('xy')
464 |    'x'
465 | 
466 | .. _parser-rshift:
467 | 
468 | ``>>`` operator
469 | ---------------
470 | 
471 | ``parser >> other_parser``
472 | 
473 | The same as ``parser.then(other_parser)`` - see :meth:`Parser.then`.
474 | 
475 | (Hint - the arrows point at the important parser!)
476 | 
477 | .. code-block:: python
478 | 
479 |    >>> (string('x') >> string('y')).parse('xy')
480 |    'y'
481 | 
482 | 
483 | .. _parser-plus:
484 | 
485 | ``+`` operator
486 | --------------
487 | 
488 | ``parser1 + parser2``
489 | 
490 | Requires both parsers to match in order, and adds the two results together using
491 | the + operator. This will only work if the results support the plus operator
492 | (e.g. strings and lists):
493 | 
494 | 
495 | .. code-block:: python
496 | 
497 |    >>> (string("x") + regex("[0-9]")).parse("x1")
498 |    "x1"
499 | 
500 |    >>> (string("x").many() + regex("[0-9]").map(int).many()).parse("xx123")
501 |    ['x', 'x', 1, 2, 3]
502 | 
503 | The plus operator is a convenient shortcut for:
504 | 
505 |    >>> seq(parser1, parser2).combine(lambda a, b: a + b)
506 | 
507 | .. _parser-times:
508 | 
509 | ``*`` operator
510 | --------------
511 | 
512 | ``parser1 * number``
513 | 
514 | This is a shortcut for doing :meth:`Parser.times`:
515 | 
516 | .. code-block:: python
517 | 
518 |    >>> (string("x") * 3).parse("xxx")
519 |    ["x", "x", "x"]
520 | 
521 | You can also set both upper and lower bounds by multiplying by a range:
522 | 
523 | .. code-block:: python
524 | 
525 |    >>> (string("x") * range(0, 3)).parse("xxx")
526 |    ParseError: expected EOF at 0:2
527 | 
528 | (Note the normal semantics of ``range`` are respected - the second number is an
529 | *exclusive* upper bound, not inclusive).
530 | 
531 | Parser combinators
532 | ==================
533 | 
534 | .. function:: alt(*parsers)
535 | 
536 |    Creates a parser from the passed in argument list of alternative parsers,
537 |    which are tried in order, moving to the next one if the current one fails, as
538 |    per the :ref:`parser-or` - in other words, it matches any one of the
539 |    alternative parsers.
540 | 
541 |    Example using ``*args`` syntax to pass a list of parsers that have been
542 |    generated by mapping :func:`string` over a list of characters:
543 | 
544 |    .. code-block:: python
545 | 
546 |       >>> hexdigit = alt(*map(string, "0123456789abcdef"))
547 | 
548 |    (In this case you would be better off using :func:`char_from`)
549 | 
550 |    Note that the order of arguments matter, as described in :ref:`parser-or`.
551 | 
552 | .. function:: seq(*parsers, **kw_parsers)
553 | 
554 |    Creates a parser that runs a sequence of parsers in order and combines
555 |    their results in a list.
556 | 
557 | 
558 |    .. code-block:: python
559 | 
560 |       >>> x_bottles_of_y_on_the_z = \
561 |       ...    seq(regex(r"[0-9]+").map(int) << string(" bottles of "),
562 |       ...        regex(r"\S+") << string(" on the "),
563 |       ...        regex(r"\S+")
564 |       ...        )
565 |       >>> x_bottles_of_y_on_the_z.parse("99 bottles of beer on the wall")
566 |       [99, 'beer', 'wall']
567 | 
568 | 
569 |    You can also use :func:`seq` with keyword arguments instead of positional
570 |    arguments. In this case, the produced value is a dictionary of the individual
571 |    values, rather than a sequence. This can make the produced value easier to
572 |    consume.
573 | 
574 |    .. code-block:: python
575 | 
576 |       >>> name = seq(first_name=regex("\S+") << whitespace,
577 |       ...            last_name=regex("\S+")
578 |       >>> name.parse("Jane Smith")
579 |       {'first_name': 'Jane',
580 |        'last_name': 'Smith'}
581 | 
582 |    .. versionchanged:: 1.1
583 |       Added ``**kwargs`` option.
584 | 
585 |    .. note::
586 |       As an alternative, see :meth:`Parser.tag` for a way of labelling parsed
587 |       components and producing dictionaries.
588 | 
589 | 
590 | Other combinators
591 | =================
592 | 
593 | Parsy does not try to include every possible combinator - there is no reason why
594 | you cannot create your own for your needs using the built-in combinators and
595 | primitives. If you find something that is very generic and would be very useful
596 | to have as a built-in, please :doc:`submit </contributing>` as a PR!
597 | 


--------------------------------------------------------------------------------
/docs/ref/parser_instances.rst:
--------------------------------------------------------------------------------
 1 | =============================
 2 | Creating new Parser instances
 3 | =============================
 4 | 
 5 | .. currentmodule:: parsy
 6 | 
 7 | Normally you will create Parser instances using the provided :doc:`primitives
 8 | </ref/primitives>` and :doc:`combinators </ref/methods_and_combinators>`.
 9 | 
10 | However it is also possible to create them manually, as below.
11 | 
12 | The :class:`Parser` constructor should be passed a function that takes the
13 | string/list to be parsed and an index into that string, and returns a
14 | :class:`Result` object. The ``Result`` object will be created either using
15 | :meth:`Result.success` or :meth:`Result.failure` to indicate success or failure
16 | respectively. :meth:`Result.success` should be passed the next index to continue
17 | parsing with, and the value that is returned from the parsing.
18 | :meth:`Result.failure` should return the index at which failure occurred i.e.
19 | the index passed in, and a string indicating what the parser expected to find.
20 | 
21 | The ``Parser`` constructor will usually be called using decorator syntax. In
22 | order to pass parameters to the ``Parser`` instance, it is typically created
23 | using a closure. In the example below, we create a parser that matches any
24 | string/list of tokens of a given length. This could also be written as something
25 | like ``any_char.times(n).concat()`` but the following will be more efficient:
26 | 
27 | 
28 | .. code-block:: python
29 | 
30 |    def consume(n):
31 | 
32 |        @Parser
33 |        def consumer(stream, index):
34 |            items = stream[index:index + n]
35 |            if len(items) == n:
36 |                return Result.success(index + n, items)
37 |            else:
38 |                return Result.failure(index, "{0} items".format(n))
39 | 
40 |        return consumer
41 | 
42 | 
43 | .. code-block:: python
44 | 
45 |    >>> consume(3).many().parse('abc123def')
46 |    ['abc', '123', 'def']
47 | 
48 | 
49 | Result objects
50 | ==============
51 | 
52 | .. class:: Result
53 | 
54 |    .. staticmethod:: success(next_index, value)
55 | 
56 |       Creates a ``Result`` object indicating parsing succeeded. The index to
57 |       continue parsing at, and the value retrieved from the parsing, should be
58 |       passed.
59 | 
60 |    .. staticmethod:: failure(index, expected)
61 | 
62 |       Creates a ``Result`` object indicating parsing failed. The index to
63 |       continue parsing at, and a string representing what the parser expected to
64 |       find, should be passed.
65 | 


--------------------------------------------------------------------------------
/docs/ref/primitives.rst:
--------------------------------------------------------------------------------
  1 | ==================
  2 | Parsing primitives
  3 | ==================
  4 | 
  5 | These are the lowest level building blocks for creating parsers.
  6 | 
  7 | .. module:: parsy
  8 | 
  9 | .. function:: string(expected_string, transform=None)
 10 | 
 11 |    Returns a parser that expects the ``expected_string`` and produces
 12 |    that string value.
 13 | 
 14 |    Optionally, a transform function can be passed, which will be used on both
 15 |    the expected string and tested string. This allows things like case
 16 |    insensitive matches to be done. This function must not change the length of
 17 |    the string (as determined by ``len``). The returned value of the parser will
 18 |    always be ``expected_string`` in its un-transformed state.
 19 | 
 20 |      .. code-block:: python
 21 | 
 22 |         >>> parser = string("Hello", transform=lambda s: s.upper())
 23 |         >>> parser.parse("Hello")
 24 |         'Hello'
 25 |         >>> parser.parse("hello")
 26 |         'Hello'
 27 |         >>> parser.parse("HELLO")
 28 |         'Hello'
 29 | 
 30 |    .. versionchanged:: 1.2
 31 |       Added ``transform`` argument.
 32 | 
 33 | .. function:: regex(exp, flags=0, group=0)
 34 | 
 35 |    Returns a parser that expects the given ``exp``, and produces the
 36 |    matched string. ``exp`` can be a compiled regular expression, or a
 37 |    string which will be compiled with the given ``flags``.
 38 | 
 39 |    Optionally, accepts ``group``, which is passed to `re.Match.group
 40 |    <https://docs.python.org/3/library/re.html#re.Match.group>`_ to
 41 |    return the text from a capturing group in the regex instead of the
 42 |    entire match.
 43 | 
 44 |    Using a regex parser for small building blocks, instead of building up
 45 |    parsers from primitives like :func:`string`, :func:`test_char` and
 46 |    :meth:`Parser.times` combinators etc., can have several advantages,
 47 |    including:
 48 | 
 49 |    * It can be more succinct e.g. compare:
 50 | 
 51 |      .. code-block:: python
 52 | 
 53 |         >>> (string('a') | string('b')).times(1, 4)
 54 |         >>> regex(r'[ab]{1,4}')
 55 | 
 56 |    * It can return the entire matched string as a single item,
 57 |      so you don't need to use :meth:`Parser.concat`.
 58 |    * It can return a part of the matched string using a capturing group
 59 |      from the regex, so you don't need to split the string yourself.
 60 | 
 61 |      You can use named or numbered groups, just like with `re.Match.group
 62 |      <https://docs.python.org/3/library/re.html#re.Match.group>`_.
 63 |      Tuples also work, and return the captured text from multiple groups.
 64 | 
 65 |      .. code-block:: python
 66 | 
 67 |         >>> regex(r'([0-9]{4})-([0-9]{2})', group=1).parse('2020-03')
 68 |         '2020'
 69 |         >>> regex(r'(?P<year>[0-9]{4})-(?P<month>[0-9]{2})', group='month').parse('2020-03')
 70 |         '03'
 71 |         >>> regex(r'([0-9]{4})-([0-9]{2})', group=(1,2)).parse('2020-03')
 72 |         ('2020', '03')
 73 | 
 74 |    * It can be much faster.
 75 | 
 76 | .. function:: test_char(func, description)
 77 | 
 78 |    Returns a parser that tests a single character with the callable
 79 |    ``func``. If ``func`` returns ``True``, the parse succeeds, otherwise
 80 |    the parse fails with the description ``description``.
 81 | 
 82 |    .. code-block:: python
 83 | 
 84 |       >>> ascii = test_char(lambda c: ord(c) < 128,
 85 |       ...                   'ascii character')
 86 |       >>> ascii.parse('A')
 87 |       'A'
 88 | 
 89 | .. function:: test_item(func, description)
 90 | 
 91 |    Returns a parser that tests a single item from the list of items being
 92 |    consumed, using the callable ``func``. If ``func`` returns ``True``, the
 93 |    parse succeeds, otherwise the parse fails with the description
 94 |    ``description``.
 95 | 
 96 |    If you are parsing a string, i.e. a list of characters, you can use
 97 |    :func:`test_char` instead. (In fact the implementations are identical, these
 98 |    functions are aliases for the sake of clear code).
 99 | 
100 |    .. code-block:: python
101 | 
102 |       >>> numeric = test_item(str.isnumeric, 'numeric')
103 |       >>> numeric.many().parse(['123', '456'])
104 |       ['123', '456']
105 | 
106 | .. function:: char_from(characters)
107 | 
108 |    Accepts a string and returns a parser that matches and returns one character
109 |    from the string.
110 | 
111 |    .. code-block:: python
112 | 
113 |       >>> char_from('abc').parse('a')
114 |       'a'
115 | 
116 | .. function:: string_from(*strings, transform=None)
117 | 
118 |    Accepts a sequence of strings as positional arguments, and returns a parser
119 |    that matches and returns one string from the list. The list is first sorted
120 |    in descending length order, so that overlapping strings are handled correctly
121 |    by checking the longest one first.
122 | 
123 |    .. code-block:: python
124 | 
125 |       >>> string_from('y', 'yes').parse('yes')
126 |       'yes'
127 | 
128 |    Optionally accepts ``transform``, which is passed to :func:`string` (see the
129 |    documentation there).
130 | 
131 |    .. versionchanged:: 1.2
132 |       Added ``transform`` argument.
133 | 
134 | 
135 | .. function:: match_item(item, description=None)
136 | 
137 |    Returns a parser that tests the next item (or character) from the stream (or
138 |    string) for equality against the provided item. Optionally a string
139 |    description can be passed.
140 | 
141 |    Parsing a string:
142 | 
143 |    >>> letter_A = match_item('A')
144 |    >>> letter_A.parse_partial('ABC')
145 |    ('A', 'BC')
146 | 
147 |    Parsing a list of tokens:
148 | 
149 |    >>> hello = match_item('hello')
150 |    >>> hello.parse_partial(['hello', 'how', 'are', 'you'])
151 |    ('hello', ['how', 'are', 'you'])
152 | 
153 | .. data:: eof
154 | 
155 |    A parser that only succeeds if the end of the stream has been reached.
156 | 
157 |    >>> eof.parse_partial("")
158 |    (None, '')
159 |    >>> eof.parse_partial("123")
160 |    Traceback (most recent call last):
161 |       ...
162 |    parsy.ParseError: expected 'EOF' at 0:0
163 | 
164 | .. function:: success(val)
165 | 
166 |    Returns a parser that does not consume any of the stream, but
167 |    produces ``val``.
168 | 
169 | .. function:: fail(expected)
170 | 
171 |    Returns a parser that always fails with the provided error message.
172 | 
173 | .. function:: from_enum(enum_cls, transform=None)
174 | 
175 |    Given a class that is an `enum.Enum
176 |    <https://docs.python.org/3/library/enum.html>`_ class, returns a parser that
177 |    will parse the values (or the string representations of the values) and
178 |    return the corresponding enum item.
179 | 
180 |    .. code-block:: python
181 | 
182 |       >>> from enum import Enum
183 |       >>> class Pet(Enum):
184 |       ...     CAT = "cat"
185 |       ...     DOG = "dog"
186 |       >>> pet = from_enum(Pet)
187 |       >>> pet.parse("cat")
188 |       <Pet.CAT: 'cat'>
189 | 
190 |    ``str`` is first run on the values (for the case of values that are integers
191 |    etc.) to create the strings which are turned into parsers using
192 |    :func:`string`.
193 | 
194 |    If ``transform`` is provided, it is passed to :func:`string` when creating
195 |    the parser (allowing for things like case insensitive parsing).
196 | 
197 | .. function:: peek(parser)
198 | 
199 |    Returns a lookahead parser that parses the input stream without consuming
200 |    chars.
201 | 
202 |    .. code-block: python
203 | 
204 |       >>> peek(any_char).parse_partial("ABC")
205 |       ('A', 'ABC')
206 | 
207 | Pre-built parsers
208 | =================
209 | 
210 | Some common, pre-built parsers (all of these are :class:`Parser` objects created
211 | using the primitives above):
212 | 
213 | 
214 | .. data:: any_char
215 | 
216 |    A parser that matches any single character.
217 | 
218 | .. data:: whitespace
219 | 
220 |    A parser that matches and returns one or more whitespace characters.
221 | 
222 | .. data:: letter
223 | 
224 |    A parser that matches and returns a single letter, as defined by
225 |    `str.isalpha <https://docs.python.org/3/library/stdtypes.html#str.isalpha>`_.
226 | 
227 | .. data:: digit
228 | 
229 |    A parser that matches and returns a single digit, as defined by `str.isdigit
230 |    <https://docs.python.org/3/library/stdtypes.html#str.isdigit>`_. Note that
231 |    this includes various unicode characters outside of the normal 0-9 range,
232 |    such as ¹²³.
233 | 
234 | .. data:: decimal_digit
235 | 
236 |    A parser that matches and returns a single decimal digit, one of
237 |    "0123456789".
238 | 
239 | .. data:: line_info
240 | 
241 |    A parser that consumes no input and always just returns the current line
242 |    information, a tuple of (line, column), zero-indexed, where lines are
243 |    terminated by ``\n``. This is normally useful when wanting to build more
244 |    debugging information into parse failure error messages.
245 | 
246 | .. data:: index
247 | 
248 |    A parser that consumes no input and always just returns the current stream
249 |    index. This is normally useful when wanting to build more debugging
250 |    information into parse failure error messages.
251 | 
252 | 
253 | .. _forward-declarations:
254 | 
255 | Forward declarations
256 | ====================
257 | 
258 | .. class:: forward_declaration
259 | 
260 | When defining parsers for a recursive grammar, you may run into ``NameError``
261 | problems with a naive approach, because you can’t refer to a Python object
262 | before you have defined it. In this case, :class:`forward_declaration` can be
263 | useful.
264 | 
265 | Say we want to be able to parse an s-expression like syntax which uses
266 | parenthesis for grouping items into a tree structure, like the following::
267 | 
268 |      (0 1 (2 3) (4 5 6) 7 8)
269 | 
270 | A naive approach would be:
271 | 
272 | .. code-block:: python
273 | 
274 |    simple = regex('[0-9]+').map(int)
275 |    group = string('(') >> expr.sep_by(string(' ')) << string(')')
276 |    expr = simple | group
277 | 
278 | The problem is that the second line will get a ``NameError`` because ``expr`` is
279 | not defined yet, and we’ll have the same problem if we put the ``expr``
280 | definition first.
281 | 
282 | We can solve it like this:
283 | 
284 | .. code-block:: python
285 | 
286 |    from parsy import forward_declaration, regex, string
287 | 
288 |    expr = forward_declaration()
289 |    simple = regex('[0-9]+').map(int)
290 |    group = string('(') >> expr.sep_by(string(' ')) << string(')')
291 |    expr.become(simple | group)
292 | 
293 | 
294 | You must use ``.become()`` method exactly once before attempting to use the
295 | parser.
296 | 
297 | An alternative to this is to use ``generate`` as described in
298 | :ref:`recursive-definitions-with-generate`.
299 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=4.3.0
2 | sphinx-rtd-theme>=1.2.0rc3,<1.3
3 | 


--------------------------------------------------------------------------------
/docs/tutorial.rst:
--------------------------------------------------------------------------------
  1 | ========
  2 | Tutorial
  3 | ========
  4 | 
  5 | .. currentmodule:: parsy
  6 | 
  7 | First :doc:`install parsy </installation>`, and check that the documentation you
  8 | are reading matches the version you just installed.
  9 | 
 10 | Building an ISO 8601 parser
 11 | ===========================
 12 | 
 13 | In this tutorial, we are going to gradually build a parser for a subset of an
 14 | ISO 8601 date. Specifically, we want to handle dates that look like this:
 15 | ``2017-09-25``.
 16 | 
 17 | A problem of this size could admittedly be solved fairly easily with regexes.
 18 | But very quickly regexes don’t scale, especially when it comes to getting the
 19 | parsed data out, and for this tutorial we need to start with a simple example.
 20 | 
 21 | With parsy, you start by breaking the problem down into the smallest components.
 22 | So we need first to match the 4 digit year at the beginning.
 23 | 
 24 | There are various ways we can do this, but a regex works nicely, and
 25 | :func:`regex` is a built-in primitive of the parsy library:
 26 | 
 27 | .. code-block:: python
 28 | 
 29 |    >>> from parsy import regex
 30 |    >>> year = regex(r"[0-9]{4}")
 31 | 
 32 | (For those who don’t know regular expressions, the regex ``[0-9]{4}`` means
 33 | “match any character from 0123456789 exactly 4 times”.)
 34 | 
 35 | This has produced a :class:`Parser` object which has various methods. We can
 36 | immediately check that it works using the :meth:`Parser.parse` method:
 37 | 
 38 | .. code-block:: python
 39 | 
 40 |    >>> year.parse("2017")
 41 |    '2017'
 42 |    >>> year.parse("abc")
 43 |    ParseError: expected '[0-9]{4}' at 0:0
 44 | 
 45 | Notice first of all that a parser consumes input (the value we pass to
 46 | ``parse``), and it produces an output. In the case of ``regex``, the produced
 47 | output is the string that was matched, but this doesn’t have to be the case for
 48 | all parsers.
 49 | 
 50 | If there is no match, it raises a ``ParseError``.
 51 | 
 52 | Notice as well that the :meth:`Parser.parse` method expects to consume all the
 53 | input, so if there are extra characters at the end, even if it is just
 54 | whitespace, parsing will fail with a message saying it expected EOF (End Of
 55 | File/Data):
 56 | 
 57 | .. code-block:: python
 58 | 
 59 |    >>> year.parse("2017 ")
 60 |    ParseError: expected 'EOF' at 0:4
 61 | 
 62 | You can use :meth:`Parser.parse_partial` if you want to just keep parsing as far
 63 | as possible and not throw an exception.
 64 | 
 65 | To parse the data, we need to parse months, days, and the dash symbol, so we’ll
 66 | add those:
 67 | 
 68 | .. code-block:: python
 69 | 
 70 |    >>> from parsy import string
 71 |    >>> month = regex("[0-9]{2}")
 72 |    >>> day = regex("[0-9]{2}")
 73 |    >>> dash = string("-")
 74 | 
 75 | We’ve added use of the :func:`string` primitive here, that matches just the
 76 | string passed in, and returns that string.
 77 | 
 78 | Next we need to combine these parsers into something that will parse the whole
 79 | date. The simplest way is to use the :meth:`Parser.then` method:
 80 | 
 81 | .. code-block:: python
 82 | 
 83 |    >>> fulldate = year.then(dash).then(month).then(dash).then(day)
 84 | 
 85 | The ``then`` method returns a new parser that requires the first parser to
 86 | succeed, followed by the second parser (the argument to the method).
 87 | 
 88 | We could also write this using the :ref:`parser-rshift` which
 89 | does the same thing as :meth:`Parser.then`:
 90 | 
 91 | .. code-block:: python
 92 | 
 93 |    >>> fulldate = year >> dash >> month >> dash >> day
 94 | 
 95 | This parser has some problems which we need to address, but it is already useful
 96 | as a basic validator:
 97 | 
 98 | .. code-block:: python
 99 | 
100 |    >>> fulldate.parse("2017-xx")
101 |    ParseError: expected '[0-9]{2}' at 0:5
102 |    >>> fulldate.parse("2017-01")
103 |    ParseError: expected '-' at 0:7
104 |    >>> fulldate.parse("2017-02-01")
105 |    '01'
106 | 
107 | If the parse doesn’t succeed, we’ll get ``ParseError``, otherwise it is valid
108 | (at least as far as the basic syntax checks we’ve added).
109 | 
110 | The first problem with this parser is that it doesn’t return a very useful
111 | value. Due to the way that :meth:`Parser.then` works, when it combines two
112 | parsers to produce a larger one, the value from the first parser is discarded,
113 | and the value returned by the second parser is the overall return value. So, we
114 | end up getting only the 'day' component as the result of our parse. We really
115 | want the year, month and day packaged up nicely, and converted to integers.
116 | 
117 | A second problem is that our error messages are not very friendly.
118 | 
119 | Our first attempt at fixing these might be to use the :ref:`parser-plus` instead
120 | of ``then``. This operator is defined to combine the results of the two parsers
121 | using the normal plus operator, which will work fine on strings:
122 | 
123 |    >>> fulldate = year + dash + month + dash + day
124 |    >>> fulldate.parse("2017-02-01")
125 |    '2017-02-01'
126 | 
127 | However, it won’t help us if we want to split our data up into a set of
128 | integers.
129 | 
130 | Our first step should actually be to work on the year, month and day components
131 | using :meth:`Parser.map`, which allows us to convert the strings to other
132 | objects - in our case we want integers.
133 | 
134 | We can also use the :meth:`Parser.desc` method to give nicer error messages, so
135 | our components now look this this:
136 | 
137 | .. code-block:: python
138 | 
139 |    >>> year = regex("[0-9]{4}").map(int).desc("4 digit year")
140 |    >>> month = regex("[0-9]{2}").map(int).desc("2 digit month")
141 |    >>> day = regex("[0-9]{2}").map(int).desc("2 digit day")
142 | 
143 | We get better error messages now:
144 | 
145 | .. code-block:: python
146 | 
147 |    >>> year.then(dash).then(month).parse("2017-xx")
148 |    ParseError: expected '2 digit month' at 0:5
149 | 
150 | 
151 | Notice that the ``map`` and ``desc`` methods, like all similar methods on
152 | ``Parser``, return new parser objects - they do not modify the existing one.
153 | This allows us to build up parsers with a 'fluent' interface, and avoid problems
154 | caused by mutating objects.
155 | 
156 | However, we still need a way to package up the year, month and day as separate
157 | values.
158 | 
159 | The :func:`seq` combinator provides one easy way to do that. It takes the
160 | sequence of parsers that are passed in as arguments, and returns a parser that
161 | runs each parser in order and combines their results into a list:
162 | 
163 | .. code-block:: python
164 | 
165 |    >>> from parsy import seq
166 |    >>> fulldate = seq(year, dash, month, dash, day)
167 |    >>> fulldate.parse("2017-01-02")
168 |    [2017, '-', 1, '-', 2]
169 | 
170 | Now, we don’t need those dashes, so we can eliminate them using the :ref:`parser-rshift` or :ref:`parser-lshift`:
171 | 
172 | .. code-block:: python
173 | 
174 |    >>> fulldate = seq(year << dash, month << dash, day)
175 |    >>> fulldate.parse("2017-01-02")
176 |    [2017, 1, 2]
177 | 
178 | At this point, we could also convert this to a date object if we wanted using
179 | :meth:`Parser.combine`, which passes the produced sequence to another function
180 | using ``*args`` syntax.
181 | 
182 | .. code-block:: python
183 | 
184 |    >>> from datetime import date
185 |    >>> fulldate = seq(year << dash, month << dash, day).combine(date)
186 | 
187 | This works because the positional argument order of ``date`` matches the order
188 | of the values parsed i.e. (year, month, day).
189 | 
190 | A slightly more readable and flexible version would use the keyword argument
191 | version of :func:`seq`, followed by :meth:`Parser.combine_dict`. Putting
192 | everything together for our final solution:
193 | 
194 | .. code-block:: python
195 | 
196 |    from datetime import date
197 |    from parsy import regex, seq, string
198 | 
199 |    year = regex("[0-9]{4}").map(int).desc("4 digit year")
200 |    month = regex("[0-9]{2}").map(int).desc("2 digit month")
201 |    day = regex("[0-9]{2}").map(int).desc("2 digit day")
202 |    dash = string("-")
203 | 
204 |    fulldate = seq(
205 |        year=year << dash,
206 |        month=month << dash,
207 |        day=day,
208 |    ).combine_dict(date)
209 | 
210 | Breaking that down:
211 | 
212 | * for clarity, and to allow us test separately, we have defined individual
213 |   parsers for the YYYY, MM and DD components.
214 | 
215 | * the ``seq`` call produces a parser that parses the year, month and day
216 |   components in order, discarding the dashes, to produce a dictionary like this:
217 | 
218 |   .. code-block:: python
219 | 
220 |      {
221 |        "year": 2017,
222 |        "month": 1,
223 |        "day": 2,
224 |      }
225 | 
226 | * when we chain the ``combine_dict`` call, we have a parser that passes this
227 |   dictionary to the ``date`` constructor using ``**kwargs`` syntax, so we end up
228 |   calling ``date(year=2017, month=1, day=2)``
229 | 
230 | 
231 | So now it does exactly what we want:
232 | 
233 | .. code-block:: python
234 | 
235 |    >>> fulldate.parse("2017-02-01")
236 |    datetime.date(2017, 2, 1)
237 | 
238 | 
239 | .. _using-previous-values:
240 | 
241 | Using previously parsed values
242 | ==============================
243 | 
244 | Now, sometimes we might want to do more complex logic with the values that are
245 | collected as parse results, and do so while we are still parsing.
246 | 
247 | To continue our example, the above parser has a problem that it will raise an
248 | exception if the day and month values are not valid. We’d like to be able to
249 | check this, and produce a parse error instead, which will make our parser play
250 | better with others if we want to use it to build something bigger.
251 | 
252 | Also, in ISO8601, strictly speaking you can just write the year, or the year and
253 | the month, and leave off the other parts. We’d like to handle that by returning
254 | a tuple for the result, and ``None`` for the missing data.
255 | 
256 | To do this, we need to allow the parse to continue if the later components (with
257 | their leading dashes) are missing - that is, we need to express optional
258 | components, and we need a way to be able to test earlier values while in the
259 | middle of parsing, to see if we should continue looking for another component.
260 | 
261 | The :meth:`Parser.bind` method provides one way to do it (yay monads!).
262 | Unfortunately, it gets ugly pretty fast, and in Python we don’t have Haskell’s
263 | ``do`` notation to tidy it up. But thankfully we can use generators and the
264 | ``yield`` keyword to great effect.
265 | 
266 | We use a generator function and convert it into a parser by using the
267 | :func:`generate` decorator. The idea is that you ``yield`` every parser that you
268 | want to run, and receive the result of that parser as the value of the yield
269 | expression. You can then put parsers together using any logic you like, and
270 | finally return the value.
271 | 
272 | An equivalent parser to the one above can be written like this:
273 | 
274 | .. code-block:: python
275 | 
276 |    from parsy import generate
277 | 
278 |    @generate
279 |    def fulldate():
280 |        y = yield year
281 |        yield dash  # implicit skip, since we do nothing with the value
282 |        m = yield month
283 |        yield dash
284 |        d = yield day
285 |        return date(y, m, d)
286 | 
287 | Notice how this follows the previous definition of ``fulldate`` using ``seq``
288 | with keyword arguments. It’s more verbose than before, but provides a good
289 | starting point for our next set of requirements.
290 | 
291 | First of all, we need to express optional components - that is we need to be
292 | able to handle missing dashes, and return what we’ve got so far rather than
293 | failing the whole parse.
294 | 
295 | :class:`Parser` has a set of methods that convert parsers into ones that allow
296 | multiples of the parser - including :meth:`Parser.many`, :meth:`Parser.times`,
297 | :meth:`Parser.at_most` and :meth:`Parser.at_least`. There is also
298 | :meth:`Parser.optional` which allows matching zero times (in which case the
299 | parser will return the default value specified or ``None`` otherwise),
300 | or exactly once - just what we need in this case.
301 | 
302 | We also need to do checking on the month and the day. We’ll take a shortcut and
303 | use the built-in ``datetime.date`` class to do the validation for us. However,
304 | rather than allow exceptions to be raised, we convert the exception into a
305 | parsing failure.
306 | 
307 | 
308 | .. code-block:: python
309 | 
310 |    from parsy import fail, generate
311 | 
312 |    optional_dash = dash.optional()
313 | 
314 |    @generate
315 |    def full_or_partial_date():
316 |        d = None
317 |        m = None
318 |        y = yield year
319 |        dash1 = yield optional_dash
320 |        if dash1 is not None:
321 |            m = yield month
322 |            dash2 = yield optional_dash
323 |            if dash2 is not None:
324 |                d = yield day
325 |        if m is not None:
326 |            if m < 1 or m > 12:
327 |                return fail("month must be in 1..12")
328 |        if d is not None:
329 |            try:
330 |                datetime.date(y, m, d)
331 |            except ValueError as e:
332 |                return fail(e.args[0])
333 | 
334 |        return (y, m, d)
335 | 
336 | 
337 | This works now works as expected:
338 | 
339 | .. code-block:: python
340 | 
341 |    >>> full_or_partial_date.parse("2017-02")
342 |    (2017, 2, None)
343 |    >>> full_or_partial_date.parse("2017-02-29")
344 |    ParseError: expected 'day is out of range for month' at 0:10
345 | 
346 | We could of course use a custom object in the final line to return a more
347 | convenient data type, if wanted.
348 | 
349 | Alternatives and backtracking
350 | =============================
351 | 
352 | Suppose we are using our date parser to scrape dates off articles on a web site.
353 | We then discover that for recently published articles, instead of printing a
354 | timestamp, they write "X days ago".
355 | 
356 | We want to parse this, and we’ll use a timedelta object to represent the value
357 | (to easily distinguish it from other values and consume it later). We can write
358 | a parser for this using tools we’ve seen already:
359 | 
360 | .. code-block:: python
361 | 
362 |    >>> days_ago = regex("[0-9]+").map(lambda d: timedelta(days=-int(d))) << string(" days ago")
363 |    >>> days_ago.parse("5 days ago")
364 |    datetime.timedelta(-5)
365 | 
366 | Now we need to combine it with our date parser, and allow either to succeed.
367 | This is done using the :ref:`parser-or`, as follows:
368 | 
369 | 
370 | .. code-block:: python
371 | 
372 |    >>> flexi_date = full_or_partial_date | days_ago
373 |    >>> flexi_date.parse("2012-01-05")
374 |    (2012, 1, 5)
375 |    >>> flexi_date.parse("2 days ago")
376 |    datetime.timedelta(-2)
377 | 
378 | Notice that you still get good error messages from the appropriate parser,
379 | depending on which parser got furthest before returning a failure:
380 | 
381 | .. code-block:: python
382 | 
383 |    >>> flexi_date.parse("2012-")
384 |    ParseError: expected '2 digit month' at 0:5
385 |    >>> flexi_date.parse("2 years ago")
386 |    ParseError: expected ' days ago' at 0:1
387 | 
388 | When using backtracking, you need to understand that backtracking to the other
389 | option only occurs if the first parser fails. So, for example:
390 | 
391 | .. code-block:: python
392 | 
393 |    >>> a = string("a")
394 |    >>> ab = string("ab")
395 |    >>> c = string("c")
396 |    >>> a_or_ab_and_c = ((a | ab) + c)
397 |    >>> a_or_ab_and_c.parse("ac")
398 |    'ac'
399 |    >>> a_or_ab_and_c.parse("abc")
400 |    ParseError: expected 'c' at 0:1
401 | 
402 | The parse fails because the ``a`` parser succeeds, and so the ``ab`` parser is
403 | never tried. This is different from most regular expression engines, where
404 | backtracking is done over the whole regex by default.
405 | 
406 | In this case we can get the parse to succeed by switching the order:
407 | 
408 | .. code-block:: python
409 | 
410 |    >>> ((ab | a) + c).parse("abc")
411 |    'abc'
412 | 
413 |    >>> ((ab | a) + c).parse("ac")
414 |    'ac'
415 | 
416 | We could also fix it like this:
417 | 
418 | .. code-block:: python
419 | 
420 |    >>> ((a + c) | (ab + c)).parse("abc")
421 |    'abc'
422 | 
423 | 
424 | Custom data structures
425 | ======================
426 | 
427 | In the example shown so far, the result of parsing has been a native Python data
428 | type, such as a integer, string, datetime or tuple. In some cases that is
429 | enough, but very quickly you will find that for your parse result to be useful,
430 | you will need to use custom data structures (rather than ending up with nested
431 | lists etc.)
432 | 
433 | For defining custom data structures, you can use any method you like (e.g.
434 | simple classes). We suggest `dataclasses
435 | <https://docs.python.org/3/library/dataclasses.html>`_ (stdlib), `attrs
436 | <https://github.com/python-attrs/attrs>`_ or `pydantic
437 | <https://github.com/samuelcolvin/pydantic/>`_. You can also use `namedtuple
438 | <https://docs.python.org/3/library/collections.html#collections.namedtuple>`_
439 | for simple cases.
440 | 
441 | For combining parsed data into these data structures, you can:
442 | 
443 | 1. Use :meth:`Parser.map`, :meth:`Parser.combine` and :meth:`Parser.combine_dict`,
444 |    often in conjunction with :func:`seq`.
445 | 
446 |    See the :doc:`SQL SELECT example
447 |    </howto/other_examples/>` for an example of this approach.
448 | 
449 | 2. Use the ``@generate`` decorator as above, and manually call the data
450 |    structure constructor with the pieces, as in ``full_date`` or
451 |    ``full_or_partial_date`` above, but with your own data structure instead of a
452 |    tuple or datetime in the final line.
453 | 
454 | 
455 | Learn more
456 | ==========
457 | 
458 | For further topics, see the :doc:`table of contents </index>` for the rest of
459 | the documentation that should enable you to build parsers for your needs.
460 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-parsy/parsy/3b72c71bf9570d73ce50477cf503fd5544c1c4b1/examples/__init__.py


--------------------------------------------------------------------------------
/examples/json.py:
--------------------------------------------------------------------------------
 1 | from parsy import forward_declaration, regex, seq, string
 2 | 
 3 | # Utilities
 4 | whitespace = regex(r"\s*")
 5 | lexeme = lambda p: p << whitespace
 6 | 
 7 | # Punctuation
 8 | lbrace = lexeme(string("{"))
 9 | rbrace = lexeme(string("}"))
10 | lbrack = lexeme(string("["))
11 | rbrack = lexeme(string("]"))
12 | colon = lexeme(string(":"))
13 | comma = lexeme(string(","))
14 | 
15 | # Primitives
16 | true = lexeme(string("true")).result(True)
17 | false = lexeme(string("false")).result(False)
18 | null = lexeme(string("null")).result(None)
19 | number = lexeme(regex(r"-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?")).map(float)
20 | string_part = regex(r'[^"\\]+')
21 | string_esc = string("\\") >> (
22 |     string("\\")
23 |     | string("/")
24 |     | string('"')
25 |     | string("b").result("\b")
26 |     | string("f").result("\f")
27 |     | string("n").result("\n")
28 |     | string("r").result("\r")
29 |     | string("t").result("\t")
30 |     | regex(r"u[0-9a-fA-F]{4}").map(lambda s: chr(int(s[1:], 16)))
31 | )
32 | quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"'))
33 | 
34 | # Data structures
35 | json_value = forward_declaration()
36 | object_pair = seq(quoted << colon, json_value).map(tuple)
37 | json_object = lbrace >> object_pair.sep_by(comma).map(dict) << rbrace
38 | array = lbrack >> json_value.sep_by(comma) << rbrack
39 | 
40 | # Everything
41 | json_value.become(quoted | number | json_object | array | true | false | null)
42 | json_doc = whitespace >> json_value
43 | 
44 | 
45 | def test():
46 |     assert (
47 |         json_doc.parse(
48 |             r"""
49 |     {
50 |         "int": 1,
51 |         "string": "hello",
52 |         "a list": [1, 2, 3],
53 |         "escapes": "\n \u24D2",
54 |         "nested": {"x": "y"},
55 |         "other": [true, false, null]
56 |     }
57 | """
58 |         )
59 |         == {
60 |             "int": 1,
61 |             "string": "hello",
62 |             "a list": [1, 2, 3],
63 |             "escapes": "\n ⓒ",
64 |             "nested": {"x": "y"},
65 |             "other": [True, False, None],
66 |         }
67 |     )
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     from sys import stdin
72 | 
73 |     print(repr(json_doc.parse(stdin.read())))
74 | 


--------------------------------------------------------------------------------
/examples/simple_eval.py:
--------------------------------------------------------------------------------
 1 | from parsy import digit, generate, match_item, regex, string, success, test_item
 2 | 
 3 | 
 4 | def lexer(code):
 5 |     whitespace = regex(r"\s*")
 6 |     integer = digit.at_least(1).concat().map(int)
 7 |     float_ = (digit.many() + string(".").result(["."]) + digit.many()).concat().map(float)
 8 |     parser = whitespace >> ((float_ | integer | regex(r"[()*/+-]")) << whitespace).many()
 9 |     return parser.parse(code)
10 | 
11 | 
12 | def eval_tokens(tokens):
13 |     # This function parses and evaluates at the same time.
14 | 
15 |     lparen = match_item("(")
16 |     rparen = match_item(")")
17 | 
18 |     @generate
19 |     def additive():
20 |         res = yield multiplicative
21 |         sign = match_item("+") | match_item("-")
22 |         while True:
23 |             operation = yield sign | success("")
24 |             if not operation:
25 |                 break
26 |             operand = yield multiplicative
27 |             if operation == "+":
28 |                 res += operand
29 |             elif operation == "-":
30 |                 res -= operand
31 |         return res
32 | 
33 |     @generate
34 |     def multiplicative():
35 |         res = yield simple
36 |         op = match_item("*") | match_item("/")
37 |         while True:
38 |             operation = yield op | success("")
39 |             if not operation:
40 |                 break
41 |             operand = yield simple
42 |             if operation == "*":
43 |                 res *= operand
44 |             elif operation == "/":
45 |                 res /= operand
46 |         return res
47 | 
48 |     @generate
49 |     def number():
50 |         sign = yield match_item("+") | match_item("-") | success("+")
51 |         value = yield test_item(lambda x: isinstance(x, (int, float)), "number")
52 |         return value if sign == "+" else -value
53 | 
54 |     expr = additive
55 |     simple = (lparen >> expr << rparen) | number
56 | 
57 |     return expr.parse(tokens)
58 | 
59 | 
60 | def simple_eval(expr):
61 |     return eval_tokens(lexer(expr))
62 | 
63 | 
64 | import pytest  # noqa  isort:skip
65 | 
66 | test_item = pytest.mark.skip(test_item)  # This is not a test
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     print(simple_eval(input()))
71 | 


--------------------------------------------------------------------------------
/examples/simple_logo_lexer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Stripped down logo lexer, for tokenizing Turtle Logo programs like:
 3 | 
 4 |    fd 1
 5 |    bk 2
 6 |    rt 90
 7 | 
 8 | etc.
 9 | """
10 | 
11 | from parsy import eof, regex, seq, string, string_from, whitespace
12 | 
13 | command = string_from("fd", "bk", "rt", "lt")
14 | number = regex(r"[0-9]+").map(int)
15 | optional_whitespace = regex(r"\s*")
16 | eol = string("\n")
17 | line = seq(
18 |     optional_whitespace >> command,
19 |     whitespace >> number,
20 |     (eof | eol | (whitespace >> eol)).result("\n"),
21 | )
22 | flatten_list = lambda ls: sum(ls, [])
23 | lexer = line.many().map(flatten_list)
24 | 


--------------------------------------------------------------------------------
/examples/simple_logo_parser.py:
--------------------------------------------------------------------------------
 1 | from parsy import generate, match_item, test_item
 2 | 
 3 | 
 4 | class Command:
 5 |     def __init__(self, parameter):
 6 |         self.parameter = parameter
 7 | 
 8 |     def __repr__(self):
 9 |         return f"{self.__class__.__name__}({self.parameter})"
10 | 
11 | 
12 | class Forward(Command):
13 |     pass
14 | 
15 | 
16 | class Backward(Command):
17 |     pass
18 | 
19 | 
20 | class Right(Command):
21 |     pass
22 | 
23 | 
24 | class Left(Command):
25 |     pass
26 | 
27 | 
28 | commands = {
29 |     "fd": Forward,
30 |     "bk": Backward,
31 |     "rt": Right,
32 |     "lt": Left,
33 | }
34 | 
35 | 
36 | @generate
37 | def statement():
38 |     cmd_name = yield test_item(lambda i: i in commands.keys(), "command")
39 |     parameter = yield test_item(lambda i: isinstance(i, int), "number")
40 |     yield match_item("\n")
41 |     return commands[cmd_name](int(parameter))
42 | 
43 | 
44 | program = statement.many()
45 | 
46 | 
47 | import pytest  # noqa  isort:skip
48 | 
49 | test_item = pytest.mark.skip(test_item)  # This is not a test
50 | 


--------------------------------------------------------------------------------
/examples/sql_select.py:
--------------------------------------------------------------------------------
  1 | # A very limited parser for SQL SELECT statements,
  2 | # for demo purposes. Supports:
  3 | # 1. A simple list of columns (or number/string literals)
  4 | # 2. A simple table name
  5 | # 3. An optional where condition,
  6 | #    which has the form of 'A op B' where A and B are columns, strings or number,
  7 | #    and op is a comparison operator
  8 | #
  9 | # We demonstrate the use of `map` to create AST nodes with a single arg,
 10 | # and `seq` for AST nodes with more than one arg.
 11 | 
 12 | import enum
 13 | from dataclasses import dataclass
 14 | from typing import List, Optional, Union
 15 | 
 16 | from parsy import from_enum, regex, seq, string
 17 | 
 18 | # -- AST nodes:
 19 | 
 20 | 
 21 | class Operator(enum.Enum):
 22 |     EQ = "="
 23 |     LT = "<"
 24 |     GT = ">"
 25 |     LTE = "<="
 26 |     GTE = ">="
 27 | 
 28 | 
 29 | @dataclass
 30 | class Number:
 31 |     value: int
 32 | 
 33 | 
 34 | @dataclass
 35 | class String:
 36 |     value: str
 37 | 
 38 | 
 39 | @dataclass
 40 | class Field:
 41 |     name: str
 42 | 
 43 | 
 44 | @dataclass
 45 | class Table:
 46 |     name: str
 47 | 
 48 | 
 49 | ColumnExpression = Union[Field, String, Number]
 50 | 
 51 | 
 52 | @dataclass
 53 | class Comparison:
 54 |     left: ColumnExpression
 55 |     operator: Operator
 56 |     right: ColumnExpression
 57 | 
 58 | 
 59 | @dataclass
 60 | class Select:
 61 |     columns: List[ColumnExpression]
 62 |     table: Table
 63 |     where: Optional[Comparison]
 64 | 
 65 | 
 66 | # -- Parsers:
 67 | 
 68 | number_literal = regex(r"-?[0-9]+").map(int).map(Number)
 69 | 
 70 | # We don't support ' in strings or escaping for simplicity
 71 | string_literal = regex(r"'[^']*'").map(lambda s: String(s[1:-1]))
 72 | 
 73 | identifier = regex("[a-zA-Z][a-zA-Z0-9_]*")
 74 | 
 75 | field = identifier.map(Field)
 76 | 
 77 | table = identifier.map(Table)
 78 | 
 79 | space = regex(r"\s+")  # non-optional whitespace
 80 | padding = regex(r"\s*")  # optional whitespace
 81 | 
 82 | column_expr = field | string_literal | number_literal
 83 | 
 84 | operator = from_enum(Operator)
 85 | 
 86 | comparison = seq(
 87 |     left=column_expr << padding,
 88 |     operator=operator,
 89 |     right=padding >> column_expr,
 90 | ).combine_dict(Comparison)
 91 | 
 92 | SELECT = string("SELECT")
 93 | FROM = string("FROM")
 94 | WHERE = string("WHERE")
 95 | 
 96 | # Here we demonstrate use of leading underscore to discard parts we don't want,
 97 | # which is more readable and convenient than `<<` and `>>` sometimes.
 98 | select = seq(
 99 |     _select=SELECT + space,
100 |     columns=column_expr.sep_by(padding + string(",") + padding, min=1),
101 |     _from=space + FROM + space,
102 |     table=table,
103 |     where=(space >> WHERE >> space >> comparison).optional(),
104 |     _end=padding + string(";"),
105 | ).combine_dict(Select)
106 | 
107 | 
108 | # Run these tests with pytest:
109 | 
110 | 
111 | def test_select():
112 |     assert select.parse("SELECT thing, stuff, 123, 'hello' FROM my_table WHERE id = 1;") == Select(
113 |         columns=[
114 |             Field("thing"),
115 |             Field("stuff"),
116 |             Number(123),
117 |             String("hello"),
118 |         ],
119 |         table=Table("my_table"),
120 |         where=Comparison(
121 |             left=Field("id"),
122 |             operator=Operator.EQ,
123 |             right=Number(1),
124 |         ),
125 |     )
126 | 
127 | 
128 | def test_optional_where():
129 |     assert select.parse("SELECT 1 FROM x;") == Select(
130 |         columns=[Number(1)],
131 |         table=Table("x"),
132 |         where=None,
133 |     )
134 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "parsy"
 3 | description = "Easy-to-use parser combinators, for parsing in pure Python"
 4 | license = {text = "MIT"}
 5 | authors = [
 6 |   { name = "Jeanine Adkisson", email = "jneen@jneen.net" }
 7 | ]
 8 | maintainers = [
 9 |   { name = "Luke Plant", email = "luke@lukeplant.me.uk" }
10 | ]
11 | classifiers = [
12 |   "Development Status :: 5 - Production/Stable",
13 |   "Intended Audience :: Developers",
14 |   "Topic :: Software Development :: Compilers",
15 |   "Topic :: Software Development :: Interpreters",
16 |   "Topic :: Text Processing",
17 |   "License :: OSI Approved :: MIT License",
18 |   "Programming Language :: Python :: 3",
19 |   "Programming Language :: Python :: 3.9",
20 |   "Programming Language :: Python :: 3.10",
21 |   "Programming Language :: Python :: 3.11",
22 |   "Programming Language :: Python :: 3.12",
23 |   "Programming Language :: Python :: 3.13",
24 | ]
25 | keywords = ["parser", "parsers", "parsing", "monad", "combinators"]
26 | urls = {Homepage = "https://github.com/python-parsy/parsy"}
27 | 
28 | requires-python = ">=3.9"
29 | dependencies = []
30 | 
31 | dynamic = ["version"]
32 | 
33 | [project.readme]
34 | file = "README.rst"
35 | content-type = "text/x-rst"
36 | 
37 | [tool.setuptools.dynamic]
38 | version = {attr = "parsy.__version__"}
39 | 
40 | [build-system]
41 | requires = ["setuptools>=61.2"]
42 | build-backend = "setuptools.build_meta"
43 | 
44 | [dependency-groups]
45 | dev = [
46 |     "pre-commit>=4.1.0",
47 |     "pytest>=8.3.4",
48 |     "tox-uv>=1.20.1",
49 |     "tox>=4.24.1",
50 |     "pytest-cov>=6.0.0",
51 |     "pre-commit-uv>=4.1.4",
52 | ]
53 | 
54 | [tool.setuptools]
55 | package-dir = {"" = "src"}
56 | include-package-data = false
57 | 
58 | [tool.setuptools.packages.find]
59 | where = ["src"]
60 | namespaces = false
61 | 
62 | [tool.black]
63 | line-length = 119
64 | target-version = ['py310']
65 | 
66 | [tool.isort]
67 | line_length = 119
68 | profile = "black"
69 | default_section = "THIRDPARTY"
70 | skip = [".tox", ".git", "docs", "dist", "build" , "todo", ".venv"]
71 | known_first_party = "parsy"
72 | 
73 | [tool.flake8]
74 | exclude = [".tox", ".git", "docs", "dist", "build", "todo"]
75 | ignore = ["E731", "E221", "W503", "E741", "E203" ]
76 | max-line-length = 119
77 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | python_files = examples/*.py tests/*.py
3 | pythonpath = src/
4 | 


--------------------------------------------------------------------------------
/release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | pytest || exit 1
 4 | pre-commit run --all --all-files || exit 1
 5 | 
 6 | umask 000
 7 | rm -rf build dist
 8 | git ls-tree --full-tree --name-only -r HEAD | xargs chmod ugo+r
 9 | 
10 | uv build --sdist --wheel || exit 1
11 | uv publish  || exit 1
12 | 
13 | VERSION=$(uv pip show parsy | grep 'Version: ' | cut -f 2 -d ' ' | tr -d '\n') || exit 1
14 | 
15 | git tag v$VERSION || exit 1
16 | git push || exit 1
17 | git push --tags || exit 1
18 | 


--------------------------------------------------------------------------------
/src/parsy/__init__.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import enum
  4 | import operator
  5 | import re
  6 | from dataclasses import dataclass
  7 | from functools import wraps
  8 | from typing import Any, Callable, FrozenSet
  9 | 
 10 | __version__ = "2.1"
 11 | 
 12 | noop = lambda x: x
 13 | 
 14 | 
 15 | def line_info_at(stream, index):
 16 |     if index > len(stream):
 17 |         raise ValueError("invalid index")
 18 |     line = stream.count("\n", 0, index)
 19 |     last_nl = stream.rfind("\n", 0, index)
 20 |     col = index - (last_nl + 1)
 21 |     return (line, col)
 22 | 
 23 | 
 24 | class ParseError(RuntimeError):
 25 |     def __init__(self, expected, stream, index):
 26 |         self.expected = expected
 27 |         self.stream = stream
 28 |         self.index = index
 29 | 
 30 |     def line_info(self):
 31 |         try:
 32 |             return "{}:{}".format(*line_info_at(self.stream, self.index))
 33 |         except (TypeError, AttributeError):  # not a str
 34 |             return str(self.index)
 35 | 
 36 |     def __str__(self):
 37 |         expected_list = sorted(repr(e) for e in self.expected)
 38 | 
 39 |         if len(expected_list) == 1:
 40 |             return f"expected {expected_list[0]} at {self.line_info()}"
 41 |         else:
 42 |             return f"expected one of {', '.join(expected_list)} at {self.line_info()}"
 43 | 
 44 | 
 45 | @dataclass
 46 | class Result:
 47 |     status: bool
 48 |     index: int
 49 |     value: Any
 50 |     furthest: int
 51 |     expected: FrozenSet[str]
 52 | 
 53 |     @staticmethod
 54 |     def success(index, value):
 55 |         return Result(True, index, value, -1, frozenset())
 56 | 
 57 |     @staticmethod
 58 |     def failure(index, expected):
 59 |         return Result(False, -1, None, index, frozenset([expected]))
 60 | 
 61 |     # collect the furthest failure from self and other
 62 |     def aggregate(self, other):
 63 |         if not other:
 64 |             return self
 65 | 
 66 |         if self.furthest > other.furthest:
 67 |             return self
 68 |         elif self.furthest == other.furthest:
 69 |             # if we both have the same failure index, we combine the expected messages.
 70 |             return Result(self.status, self.index, self.value, self.furthest, self.expected | other.expected)
 71 |         else:
 72 |             return Result(self.status, self.index, self.value, other.furthest, other.expected)
 73 | 
 74 | 
 75 | class Parser:
 76 |     """
 77 |     A Parser is an object that wraps a function whose arguments are
 78 |     a string to be parsed and the index on which to begin parsing.
 79 |     The function should return either Result.success(next_index, value),
 80 |     where the next index is where to continue the parse and the value is
 81 |     the yielded value, or Result.failure(index, expected), where expected
 82 |     is a string indicating what was expected, and the index is the index
 83 |     of the failure.
 84 |     """
 85 | 
 86 |     def __init__(self, wrapped_fn: Callable[[str | bytes | list, int], Result]):
 87 |         """
 88 |         Creates a new Parser from a function that takes a stream
 89 |         and returns a Result.
 90 |         """
 91 |         self.wrapped_fn = wrapped_fn
 92 | 
 93 |     def __call__(self, stream: str | bytes | list, index: int):
 94 |         return self.wrapped_fn(stream, index)
 95 | 
 96 |     def parse(self, stream: str | bytes | list) -> Any:
 97 |         """Parses a string or list of tokens and returns the result or raise a ParseError."""
 98 |         (result, _) = (self << eof).parse_partial(stream)
 99 |         return result
100 | 
101 |     def parse_partial(self, stream: str | bytes | list) -> tuple[Any, str | bytes | list]:
102 |         """
103 |         Parses the longest possible prefix of a given string.
104 |         Returns a tuple of the result and the unparsed remainder,
105 |         or raises ParseError
106 |         """
107 |         result = self(stream, 0)
108 | 
109 |         if result.status:
110 |             return (result.value, stream[result.index :])
111 |         else:
112 |             raise ParseError(result.expected, stream, result.furthest)
113 | 
114 |     def bind(self, bind_fn):
115 |         @Parser
116 |         def bound_parser(stream, index):
117 |             result = self(stream, index)
118 | 
119 |             if result.status:
120 |                 next_parser = bind_fn(result.value)
121 |                 return next_parser(stream, result.index).aggregate(result)
122 |             else:
123 |                 return result
124 | 
125 |         return bound_parser
126 | 
127 |     def map(self, map_function: Callable) -> Parser:
128 |         """
129 |         Returns a parser that transforms the produced value of the initial parser with map_function.
130 |         """
131 |         return self.bind(lambda res: success(map_function(res)))
132 | 
133 |     def combine(self, combine_fn: Callable) -> Parser:
134 |         """
135 |         Returns a parser that transforms the produced values of the initial parser
136 |         with ``combine_fn``, passing the arguments using ``*args`` syntax.
137 | 
138 |         The initial parser should return a list/sequence of parse results.
139 |         """
140 |         return self.bind(lambda res: success(combine_fn(*res)))
141 | 
142 |     def combine_dict(self, combine_fn: Callable) -> Parser:
143 |         """
144 |         Returns a parser that transforms the value produced by the initial parser
145 |         using the supplied function/callable, passing the arguments using the
146 |         ``**kwargs`` syntax.
147 | 
148 |         The value produced by the initial parser must be a mapping/dictionary from
149 |         names to values, or a list of two-tuples, or something else that can be
150 |         passed to the ``dict`` constructor.
151 | 
152 |         If ``None`` is present as a key in the dictionary it will be removed
153 |         before passing to ``fn``, as will all keys starting with ``_``.
154 |         """
155 |         return self.bind(
156 |             lambda res: success(
157 |                 combine_fn(
158 |                     **{
159 |                         k: v
160 |                         for k, v in dict(res).items()
161 |                         if k is not None and not (isinstance(k, str) and k.startswith("_"))
162 |                     }
163 |                 )
164 |             )
165 |         )
166 | 
167 |     def concat(self) -> Parser:
168 |         """
169 |         Returns a parser that concatenates together (as a string) the previously
170 |         produced values.
171 |         """
172 |         return self.map("".join)
173 | 
174 |     def then(self, other: Parser) -> Parser:
175 |         """
176 |         Returns a parser which, if the initial parser succeeds, will
177 |         continue parsing with ``other``. This will produce the
178 |         value produced by ``other``.
179 | 
180 |         """
181 |         return seq(self, other).combine(lambda left, right: right)
182 | 
183 |     def skip(self, other: Parser) -> Parser:
184 |         """
185 |         Returns a parser which, if the initial parser succeeds, will
186 |         continue parsing with ``other``. It will produce the
187 |         value produced by the initial parser.
188 |         """
189 |         return seq(self, other).combine(lambda left, right: left)
190 | 
191 |     def result(self, value: Any) -> Parser:
192 |         """
193 |         Returns a parser that, if the initial parser succeeds, always produces
194 |         the passed in ``value``.
195 |         """
196 |         return self >> success(value)
197 | 
198 |     def many(self) -> Parser:
199 |         """
200 |         Returns a parser that expects the initial parser 0 or more times, and
201 |         produces a list of the results.
202 |         """
203 |         return self.times(0, float("inf"))
204 | 
205 |     def times(self, min: int, max: int = None) -> Parser:
206 |         """
207 |         Returns a parser that expects the initial parser at least ``min`` times,
208 |         and at most ``max`` times, and produces a list of the results. If only one
209 |         argument is given, the parser is expected exactly that number of times.
210 |         """
211 |         if max is None:
212 |             max = min
213 | 
214 |         @Parser
215 |         def times_parser(stream, index):
216 |             values = []
217 |             times = 0
218 |             result = None
219 | 
220 |             while times < max:
221 |                 result = self(stream, index).aggregate(result)
222 |                 if result.status:
223 |                     values.append(result.value)
224 |                     index = result.index
225 |                     times += 1
226 |                 elif times >= min:
227 |                     break
228 |                 else:
229 |                     return result
230 | 
231 |             return Result.success(index, values).aggregate(result)
232 | 
233 |         return times_parser
234 | 
235 |     def at_most(self, n: int) -> Parser:
236 |         """
237 |         Returns a parser that expects the initial parser at most ``n`` times, and
238 |         produces a list of the results.
239 |         """
240 |         return self.times(0, n)
241 | 
242 |     def at_least(self, n: int) -> Parser:
243 |         """
244 |         Returns a parser that expects the initial parser at least ``n`` times, and
245 |         produces a list of the results.
246 |         """
247 |         return self.times(n) + self.many()
248 | 
249 |     def optional(self, default: Any = None) -> Parser:
250 |         """
251 |         Returns a parser that expects the initial parser zero or once, and maps
252 |         the result to a given default value in the case of no match. If no default
253 |         value is given, ``None`` is used.
254 |         """
255 |         return self.times(0, 1).map(lambda v: v[0] if v else default)
256 | 
257 |     def until(self, other: Parser, min: int = 0, max: int = float("inf"), consume_other: bool = False) -> Parser:
258 |         """
259 |         Returns a parser that expects the initial parser followed by ``other``.
260 |         The initial parser is expected at least ``min`` times and at most ``max`` times.
261 |         By default, it does not consume ``other`` and it produces a list of the
262 |         results excluding ``other``. If ``consume_other`` is ``True`` then
263 |         ``other`` is consumed and its result is included in the list of results.
264 |         """
265 | 
266 |         @Parser
267 |         def until_parser(stream, index):
268 |             values = []
269 |             times = 0
270 |             while True:
271 | 
272 |                 # try parser first
273 |                 res = other(stream, index)
274 |                 if res.status and times >= min:
275 |                     if consume_other:
276 |                         # consume other
277 |                         values.append(res.value)
278 |                         index = res.index
279 |                     return Result.success(index, values)
280 | 
281 |                 # exceeded max?
282 |                 if times >= max:
283 |                     # return failure, it matched parser more than max times
284 |                     return Result.failure(index, f"at most {max} items")
285 | 
286 |                 # failed, try parser
287 |                 result = self(stream, index)
288 |                 if result.status:
289 |                     # consume
290 |                     values.append(result.value)
291 |                     index = result.index
292 |                     times += 1
293 |                 elif times >= min:
294 |                     # return failure, parser is not followed by other
295 |                     return Result.failure(index, "did not find other parser")
296 |                 else:
297 |                     # return failure, it did not match parser at least min times
298 |                     return Result.failure(index, f"at least {min} items; got {times} item(s)")
299 | 
300 |         return until_parser
301 | 
302 |     def sep_by(self, sep: Parser, *, min: int = 0, max: int = float("inf")) -> Parser:
303 |         """
304 |         Returns a new parser that repeats the initial parser and
305 |         collects the results in a list. Between each item, the ``sep`` parser
306 |         is run (and its return value is discarded). By default it
307 |         repeats with no limit, but minimum and maximum values can be supplied.
308 |         """
309 |         zero_times = success([])
310 |         if max == 0:
311 |             return zero_times
312 |         res = self.times(1) + (sep >> self).times(min - 1, max - 1)
313 |         if min == 0:
314 |             res |= zero_times
315 |         return res
316 | 
317 |     def desc(self, description: str) -> Parser:
318 |         """
319 |         Returns a new parser with a description added, which is used in the error message
320 |         if parsing fails.
321 |         """
322 | 
323 |         @Parser
324 |         def desc_parser(stream, index):
325 |             result = self(stream, index)
326 |             if result.status:
327 |                 return result
328 |             else:
329 |                 return Result.failure(index, description)
330 | 
331 |         return desc_parser
332 | 
333 |     def mark(self) -> Parser:
334 |         """
335 |         Returns a parser that wraps the initial parser's result in a value
336 |         containing column and line information of the match, as well as the
337 |         original value. The new value is a 3-tuple:
338 | 
339 |         ((start_row, start_column),
340 |          original_value,
341 |          (end_row, end_column))
342 |         """
343 | 
344 |         @generate
345 |         def marked():
346 |             start = yield line_info
347 |             body = yield self
348 |             end = yield line_info
349 |             return (start, body, end)
350 | 
351 |         return marked
352 | 
353 |     def tag(self, name: str) -> Parser:
354 |         """
355 |         Returns a parser that wraps the produced value of the initial parser in a
356 |         2 tuple containing ``(name, value)``. This provides a very simple way to
357 |         label parsed components
358 |         """
359 |         return self.map(lambda v: (name, v))
360 | 
361 |     def should_fail(self, description: str) -> Parser:
362 |         """
363 |         Returns a parser that fails when the initial parser succeeds, and succeeds
364 |         when the initial parser fails (consuming no input). A description must
365 |         be passed which is used in parse failure messages.
366 | 
367 |         This is essentially a negative lookahead
368 |         """
369 | 
370 |         @Parser
371 |         def fail_parser(stream, index):
372 |             res = self(stream, index)
373 |             if res.status:
374 |                 return Result.failure(index, description)
375 |             return Result.success(index, res)
376 | 
377 |         return fail_parser
378 | 
379 |     def __add__(self, other: Parser) -> Parser:
380 |         return seq(self, other).combine(operator.add)
381 | 
382 |     def __mul__(self, other: Parser) -> Parser:
383 |         if isinstance(other, range):
384 |             return self.times(other.start, other.stop - 1)
385 |         return self.times(other)
386 | 
387 |     def __or__(self, other: Parser) -> Parser:
388 |         return alt(self, other)
389 | 
390 |     # haskelley operators, for fun #
391 | 
392 |     # >>
393 |     def __rshift__(self, other: Parser) -> Parser:
394 |         return self.then(other)
395 | 
396 |     # <<
397 |     def __lshift__(self, other: Parser) -> Parser:
398 |         return self.skip(other)
399 | 
400 | 
401 | def alt(*parsers: Parser) -> Parser:
402 |     """
403 |     Creates a parser from the passed in argument list of alternative
404 |     parsers, which are tried in order, moving to the next one if the
405 |     current one fails.
406 |     """
407 |     if not parsers:
408 |         return fail("<empty alt>")
409 | 
410 |     @Parser
411 |     def alt_parser(stream, index):
412 |         result = None
413 |         for parser in parsers:
414 |             result = parser(stream, index).aggregate(result)
415 |             if result.status:
416 |                 return result
417 | 
418 |         return result
419 | 
420 |     return alt_parser
421 | 
422 | 
423 | def seq(*parsers: Parser, **kw_parsers: Parser) -> Parser:
424 |     """
425 |     Takes a list of parsers, runs them in order,
426 |     and collects their individuals results in a list,
427 |     or in a dictionary if you pass them as keyword arguments.
428 |     """
429 |     if not parsers and not kw_parsers:
430 |         return success([])
431 | 
432 |     if parsers and kw_parsers:
433 |         raise ValueError("Use either positional arguments or keyword arguments with seq, not both")
434 | 
435 |     if parsers:
436 | 
437 |         @Parser
438 |         def seq_parser(stream, index):
439 |             result = None
440 |             values = []
441 |             for parser in parsers:
442 |                 result = parser(stream, index).aggregate(result)
443 |                 if not result.status:
444 |                     return result
445 |                 index = result.index
446 |                 values.append(result.value)
447 |             return Result.success(index, values).aggregate(result)
448 | 
449 |         return seq_parser
450 |     else:
451 | 
452 |         @Parser
453 |         def seq_kwarg_parser(stream, index):
454 |             result = None
455 |             values = {}
456 |             for name, parser in kw_parsers.items():
457 |                 result = parser(stream, index).aggregate(result)
458 |                 if not result.status:
459 |                     return result
460 |                 index = result.index
461 |                 values[name] = result.value
462 |             return Result.success(index, values).aggregate(result)
463 | 
464 |         return seq_kwarg_parser
465 | 
466 | 
467 | def generate(fn) -> Parser:
468 |     """
469 |     Creates a parser from a generator function
470 |     """
471 |     if isinstance(fn, str):
472 |         return lambda f: generate(f).desc(fn)
473 | 
474 |     @Parser
475 |     @wraps(fn)
476 |     def generated(stream, index):
477 |         # start up the generator
478 |         iterator = fn()
479 | 
480 |         result = None
481 |         value = None
482 |         try:
483 |             while True:
484 |                 next_parser = iterator.send(value)
485 |                 result = next_parser(stream, index).aggregate(result)
486 |                 if not result.status:
487 |                     return result
488 |                 value = result.value
489 |                 index = result.index
490 |         except StopIteration as stop:
491 |             returnVal = stop.value
492 |             if isinstance(returnVal, Parser):
493 |                 return returnVal(stream, index).aggregate(result)
494 | 
495 |             return Result.success(index, returnVal).aggregate(result)
496 | 
497 |     return generated
498 | 
499 | 
500 | index = Parser(lambda _, index: Result.success(index, index))
501 | line_info = Parser(lambda stream, index: Result.success(index, line_info_at(stream, index)))
502 | 
503 | 
504 | def success(value: Any) -> Parser:
505 |     """
506 |     Returns a parser that does not consume any of the stream, but
507 |     produces ``value``.
508 |     """
509 |     return Parser(lambda _, index: Result.success(index, value))
510 | 
511 | 
512 | def fail(expected: str) -> Parser:
513 |     """
514 |     Returns a parser that always fails with the provided error message.
515 |     """
516 |     return Parser(lambda _, index: Result.failure(index, expected))
517 | 
518 | 
519 | def string(expected_string: str, transform: Callable[[str], str] = noop) -> Parser:
520 |     """
521 |     Returns a parser that expects the ``expected_string`` and produces
522 |     that string value.
523 | 
524 |     Optionally, a transform function can be passed, which will be used on both
525 |     the expected string and tested string.
526 |     """
527 | 
528 |     slen = len(expected_string)
529 |     transformed_s = transform(expected_string)
530 | 
531 |     @Parser
532 |     def string_parser(stream, index):
533 |         if transform(stream[index : index + slen]) == transformed_s:
534 |             return Result.success(index + slen, expected_string)
535 |         else:
536 |             return Result.failure(index, expected_string)
537 | 
538 |     return string_parser
539 | 
540 | 
541 | def regex(exp: str, flags=0, group: int | str | tuple = 0) -> Parser:
542 |     """
543 |     Returns a parser that expects the given ``exp``, and produces the
544 |     matched string. ``exp`` can be a compiled regular expression, or a
545 |     string which will be compiled with the given ``flags``.
546 | 
547 |     Optionally, accepts ``group``, which is passed to re.Match.group
548 |     https://docs.python.org/3/library/re.html#re.Match.group> to
549 |     return the text from a capturing group in the regex instead of the
550 |     entire match.
551 |     """
552 | 
553 |     if isinstance(exp, (str, bytes)):
554 |         exp = re.compile(exp, flags)
555 |     if isinstance(group, (str, int)):
556 |         group = (group,)
557 | 
558 |     @Parser
559 |     def regex_parser(stream, index):
560 |         match = exp.match(stream, index)
561 |         if match:
562 |             return Result.success(match.end(), match.group(*group))
563 |         else:
564 |             return Result.failure(index, exp.pattern)
565 | 
566 |     return regex_parser
567 | 
568 | 
569 | def test_item(func: Callable[..., bool], description: str) -> Parser:
570 |     """
571 |     Returns a parser that tests a single item from the list of items being
572 |     consumed, using the callable ``func``. If ``func`` returns ``True``, the
573 |     parse succeeds, otherwise the parse fails with the description
574 |     ``description``.
575 |     """
576 | 
577 |     @Parser
578 |     def test_item_parser(stream, index):
579 |         if index < len(stream):
580 |             if isinstance(stream, bytes):
581 |                 # Subscripting bytes with `[index]` instead of
582 |                 # `[index:index + 1]` returns an int
583 |                 item = stream[index : index + 1]
584 |             else:
585 |                 item = stream[index]
586 |             if func(item):
587 |                 return Result.success(index + 1, item)
588 |         return Result.failure(index, description)
589 | 
590 |     return test_item_parser
591 | 
592 | 
593 | def test_char(func: Callable[..., bool], description: str) -> Parser:
594 |     """
595 |     Returns a parser that tests a single character with the callable
596 |     ``func``. If ``func`` returns ``True``, the parse succeeds, otherwise
597 |     the parse fails with the description ``description``.
598 |     """
599 |     # Implementation is identical to test_item
600 |     return test_item(func, description)
601 | 
602 | 
603 | def match_item(item: Any, description: str = None) -> Parser:
604 |     """
605 |     Returns a parser that tests the next item (or character) from the stream (or
606 |     string) for equality against the provided item. Optionally a string
607 |     description can be passed.
608 |     """
609 | 
610 |     if description is None:
611 |         description = str(item)
612 |     return test_item(lambda i: item == i, description)
613 | 
614 | 
615 | def string_from(*strings: str, transform: Callable[[str], str] = noop):
616 |     """
617 |     Accepts a sequence of strings as positional arguments, and returns a parser
618 |     that matches and returns one string from the list. The list is first sorted
619 |     in descending length order, so that overlapping strings are handled correctly
620 |     by checking the longest one first.
621 |     """
622 |     # Sort longest first, so that overlapping options work correctly
623 |     return alt(*(string(s, transform) for s in sorted(strings, key=len, reverse=True)))
624 | 
625 | 
626 | def char_from(string: str | bytes):
627 |     """
628 |     Accepts a string and returns a parser that matches and returns one character
629 |     from the string.
630 |     """
631 |     if isinstance(string, bytes):
632 |         return test_char(lambda c: c in string, b"[" + string + b"]")
633 |     else:
634 |         return test_char(lambda c: c in string, "[" + string + "]")
635 | 
636 | 
637 | def peek(parser: Parser) -> Parser:
638 |     """
639 |     Returns a lookahead parser that parses the input stream without consuming
640 |     chars.
641 |     """
642 | 
643 |     @Parser
644 |     def peek_parser(stream, index):
645 |         result = parser(stream, index)
646 |         if result.status:
647 |             return Result.success(index, result.value)
648 |         else:
649 |             return result
650 | 
651 |     return peek_parser
652 | 
653 | 
654 | any_char = test_char(lambda c: True, "any character")
655 | 
656 | whitespace = regex(r"\s+")
657 | 
658 | letter = test_char(lambda c: c.isalpha(), "a letter")
659 | 
660 | digit = test_char(lambda c: c.isdigit(), "a digit")
661 | 
662 | decimal_digit = char_from("0123456789")
663 | 
664 | 
665 | @Parser
666 | def eof(stream, index):
667 |     """
668 |     A parser that only succeeds if the end of the stream has been reached.
669 |     """
670 | 
671 |     if index >= len(stream):
672 |         return Result.success(index, None)
673 |     else:
674 |         return Result.failure(index, "EOF")
675 | 
676 | 
677 | def from_enum(enum_cls: type[enum.Enum], transform=noop) -> Parser:
678 |     """
679 |     Given a class that is an enum.Enum class
680 |     https://docs.python.org/3/library/enum.html , returns a parser that
681 |     will parse the values (or the string representations of the values)
682 |     and return the corresponding enum item.
683 |     """
684 | 
685 |     items = sorted(
686 |         ((str(enum_item.value), enum_item) for enum_item in enum_cls), key=lambda t: len(t[0]), reverse=True
687 |     )
688 |     return alt(*(string(value, transform=transform).result(enum_item) for value, enum_item in items))
689 | 
690 | 
691 | class forward_declaration(Parser):
692 |     """
693 |     An empty parser that can be used as a forward declaration,
694 |     especially for parsers that need to be defined recursively.
695 | 
696 |     You must use `.become(parser)` before using.
697 |     """
698 | 
699 |     def __init__(self):
700 |         pass
701 | 
702 |     def _raise_error(self, *args, **kwargs):
703 |         raise ValueError("You must use 'become' before attempting to call `parse` or `parse_partial`")
704 | 
705 |     parse = _raise_error
706 |     parse_partial = _raise_error
707 | 
708 |     def become(self, other: Parser):
709 |         """
710 |         Take on the behavior of the given parser.
711 |         """
712 |         self.__dict__ = other.__dict__
713 |         self.__class__ = other.__class__
714 | 


--------------------------------------------------------------------------------
/tests/requirements-linters.txt:
--------------------------------------------------------------------------------
1 | isort==5.4.2
2 | flake8==3.8.3
3 | 


--------------------------------------------------------------------------------
/tests/requirements-tests.txt:
--------------------------------------------------------------------------------
1 | pytest==7.1.1
2 | pytest-cov==4.0.0
3 | coverage==6.3.2
4 | 


--------------------------------------------------------------------------------
/tests/test_parsy.py:
--------------------------------------------------------------------------------
  1 | # -*- code: utf8 -*-
  2 | import enum
  3 | import re
  4 | import unittest
  5 | from collections import namedtuple
  6 | from datetime import date
  7 | 
  8 | from parsy import (
  9 |     ParseError,
 10 |     alt,
 11 |     any_char,
 12 |     char_from,
 13 |     decimal_digit,
 14 |     digit,
 15 |     forward_declaration,
 16 |     from_enum,
 17 |     generate,
 18 |     index,
 19 |     letter,
 20 |     line_info,
 21 |     line_info_at,
 22 |     match_item,
 23 |     peek,
 24 |     regex,
 25 |     seq,
 26 |     string,
 27 |     string_from,
 28 | )
 29 | from parsy import test_char as parsy_test_char  # to stop pytest thinking this function is a test
 30 | from parsy import test_item as parsy_test_item  # to stop pytest thinking this function is a test
 31 | from parsy import whitespace
 32 | 
 33 | 
 34 | class TestParser(unittest.TestCase):
 35 |     def test_string(self):
 36 |         parser = string("x")
 37 |         self.assertEqual(parser.parse("x"), "x")
 38 | 
 39 |         self.assertRaises(ParseError, parser.parse, "y")
 40 | 
 41 |     def test_string_transform(self):
 42 |         parser = string("x", transform=lambda s: s.lower())
 43 |         self.assertEqual(parser.parse("x"), "x")
 44 |         self.assertEqual(parser.parse("X"), "x")
 45 | 
 46 |         self.assertRaises(ParseError, parser.parse, "y")
 47 | 
 48 |     def test_string_transform_2(self):
 49 |         parser = string("Cat", transform=lambda s: s.lower())
 50 |         self.assertEqual(parser.parse("cat"), "Cat")
 51 |         self.assertEqual(parser.parse("CAT"), "Cat")
 52 |         self.assertEqual(parser.parse("CaT"), "Cat")
 53 | 
 54 |         self.assertRaises(ParseError, parser.parse, "dog")
 55 | 
 56 |     def test_regex_str(self):
 57 |         parser = regex(r"[0-9]")
 58 | 
 59 |         self.assertEqual(parser.parse("1"), "1")
 60 |         self.assertEqual(parser.parse("4"), "4")
 61 | 
 62 |         self.assertRaises(ParseError, parser.parse, "x")
 63 | 
 64 |     def test_regex_bytes(self):
 65 |         parser = regex(rb"[0-9]")
 66 | 
 67 |         self.assertEqual(parser.parse(b"1"), b"1")
 68 |         self.assertEqual(parser.parse(b"4"), b"4")
 69 | 
 70 |         self.assertRaises(ParseError, parser.parse, b"x")
 71 | 
 72 |     def test_regex_compiled(self):
 73 |         parser = regex(re.compile(r"[0-9]"))
 74 |         self.assertEqual(parser.parse("1"), "1")
 75 |         self.assertRaises(ParseError, parser.parse, "x")
 76 | 
 77 |     def test_regex_group_number(self):
 78 |         parser = regex(re.compile(r"a([0-9])b"), group=1)
 79 |         self.assertEqual(parser.parse("a1b"), "1")
 80 |         self.assertRaises(ParseError, parser.parse, "x")
 81 | 
 82 |     def test_regex_group_name(self):
 83 |         parser = regex(re.compile(r"a(?P<name>[0-9])b"), group="name")
 84 |         self.assertEqual(parser.parse("a1b"), "1")
 85 |         self.assertRaises(ParseError, parser.parse, "x")
 86 | 
 87 |     def test_regex_group_tuple(self):
 88 |         parser = regex(re.compile(r"a([0-9])b([0-9])c"), group=(1, 2))
 89 |         self.assertEqual(parser.parse("a1b2c"), ("1", "2"))
 90 |         self.assertRaises(ParseError, parser.parse, "x")
 91 | 
 92 |     def test_then(self):
 93 |         xy_parser = string("x") >> string("y")
 94 |         self.assertEqual(xy_parser.parse("xy"), "y")
 95 | 
 96 |         self.assertRaises(ParseError, xy_parser.parse, "y")
 97 |         self.assertRaises(ParseError, xy_parser.parse, "z")
 98 | 
 99 |     def test_bind(self):
100 |         piped = None
101 | 
102 |         def binder(x):
103 |             nonlocal piped
104 |             piped = x
105 |             return string("y")
106 | 
107 |         parser = string("x").bind(binder)
108 | 
109 |         self.assertEqual(parser.parse("xy"), "y")
110 |         self.assertEqual(piped, "x")
111 | 
112 |         self.assertRaises(ParseError, parser.parse, "x")
113 | 
114 |     def test_map(self):
115 |         parser = digit.map(int)
116 |         self.assertEqual(parser.parse("7"), 7)
117 | 
118 |     def test_combine(self):
119 |         parser = seq(digit, letter).combine(lambda d, l: (d, l))
120 |         self.assertEqual(parser.parse("1A"), ("1", "A"))
121 | 
122 |     def test_combine_dict(self):
123 |         ddmmyyyy = (
124 |             seq(
125 |                 regex(r"[0-9]{2}").map(int).tag("day"),
126 |                 regex(r"[0-9]{2}").map(int).tag("month"),
127 |                 regex(r"[0-9]{4}").map(int).tag("year"),
128 |             )
129 |             .map(dict)
130 |             .combine_dict(date)
131 |         )
132 |         self.assertEqual(ddmmyyyy.parse("05042003"), date(2003, 4, 5))
133 | 
134 |     def test_combine_dict_list(self):
135 |         Pair = namedtuple("Pair", ["word", "number"])
136 |         parser = seq(
137 |             regex(r"[A-Z]+").tag("word"),
138 |             regex(r"[0-9]+").map(int).tag("number"),
139 |         ).combine_dict(Pair)
140 |         self.assertEqual(parser.parse("ABC123"), Pair(word="ABC", number=123))
141 | 
142 |     def test_combine_dict_skip_None(self):
143 |         Pair = namedtuple("Pair", ["word", "number"])
144 |         parser = seq(
145 |             regex(r"[A-Z]+").tag("word"),
146 |             whitespace.tag(None),
147 |             regex(r"[0-9]+").map(int).tag("number"),
148 |         ).combine_dict(Pair)
149 |         self.assertEqual(parser.parse("ABC   123"), Pair(word="ABC", number=123))
150 | 
151 |     def test_combine_dict_skip_underscores(self):
152 |         Pair = namedtuple("Pair", ["word", "number"])
153 |         parser = seq(
154 |             regex(r"[A-Z]+").tag("word"),
155 |             whitespace.tag("_whitespace"),
156 |             regex(r"[0-9]+").map(int).tag("number"),
157 |         ).combine_dict(Pair)
158 |         self.assertEqual(parser.parse("ABC   123"), Pair(word="ABC", number=123))
159 | 
160 |     def test_concat(self):
161 |         parser = letter.many().concat()
162 |         self.assertEqual(parser.parse(""), "")
163 |         self.assertEqual(parser.parse("abc"), "abc")
164 | 
165 |     def test_concat_from_byte_stream(self):
166 |         any_byte = parsy_test_item(lambda c: True, "any byte")
167 |         parser = any_byte.map(lambda b: b.decode("ascii")).many().concat()
168 |         self.assertEqual(parser.parse(b""), "")
169 |         self.assertEqual(parser.parse(b"abc"), "abc")
170 | 
171 |     def test_generate(self):
172 |         x = y = None
173 | 
174 |         @generate
175 |         def xy():
176 |             nonlocal x
177 |             nonlocal y
178 |             x = yield string("x")
179 |             y = yield string("y")
180 |             return 3
181 | 
182 |         self.assertEqual(xy.parse("xy"), 3)
183 |         self.assertEqual(x, "x")
184 |         self.assertEqual(y, "y")
185 | 
186 |     def test_generate_return_parser(self):
187 |         @generate
188 |         def example():
189 |             yield string("x")
190 |             return string("y")
191 | 
192 |         self.assertEqual(example.parse("xy"), "y")
193 | 
194 |     def test_mark(self):
195 |         parser = (letter.many().mark() << string("\n")).many()
196 | 
197 |         lines = parser.parse("asdf\nqwer\n")
198 | 
199 |         self.assertEqual(len(lines), 2)
200 | 
201 |         (start, letters, end) = lines[0]
202 |         self.assertEqual(start, (0, 0))
203 |         self.assertEqual(letters, ["a", "s", "d", "f"])
204 |         self.assertEqual(end, (0, 4))
205 | 
206 |         (start, letters, end) = lines[1]
207 |         self.assertEqual(start, (1, 0))
208 |         self.assertEqual(letters, ["q", "w", "e", "r"])
209 |         self.assertEqual(end, (1, 4))
210 | 
211 |     def test_tag(self):
212 |         parser = letter.many().concat().tag("word")
213 |         self.assertEqual(
214 |             parser.sep_by(string(",")).parse("this,is,a,list"),
215 |             [("word", "this"), ("word", "is"), ("word", "a"), ("word", "list")],
216 |         )
217 | 
218 |     def test_tag_map_dict(self):
219 |         parser = seq(letter.tag("first_letter"), letter.many().concat().tag("remainder")).map(dict)
220 |         self.assertEqual(parser.parse("Hello"), {"first_letter": "H", "remainder": "ello"})
221 | 
222 |     def test_generate_desc(self):
223 |         @generate("a thing")
224 |         def thing():
225 |             yield string("t")
226 | 
227 |         with self.assertRaises(ParseError) as err:
228 |             thing.parse("x")
229 | 
230 |         ex = err.exception
231 | 
232 |         self.assertEqual(ex.expected, frozenset(["a thing"]))
233 |         self.assertEqual(ex.stream, "x")
234 |         self.assertEqual(ex.index, 0)
235 | 
236 |     def test_generate_default_desc(self):
237 |         # We shouldn't give a default desc, the messages from the internal
238 |         # parsers should bubble up.
239 |         @generate
240 |         def thing():
241 |             yield string("a")
242 |             yield string("b")
243 | 
244 |         with self.assertRaises(ParseError) as err:
245 |             thing.parse("ax")
246 | 
247 |         ex = err.exception
248 | 
249 |         self.assertEqual(ex.expected, frozenset(["b"]))
250 |         self.assertEqual(ex.stream, "ax")
251 |         self.assertEqual(ex.index, 1)
252 | 
253 |         self.assertIn("expected 'b' at 0:1", str(ex))
254 | 
255 |     def test_multiple_failures(self):
256 |         abc = string("a") | string("b") | string("c")
257 | 
258 |         with self.assertRaises(ParseError) as err:
259 |             abc.parse("d")
260 | 
261 |         ex = err.exception
262 |         self.assertEqual(ex.expected, frozenset(["a", "b", "c"]))
263 |         self.assertEqual(str(ex), "expected one of 'a', 'b', 'c' at 0:0")
264 | 
265 |     def test_generate_backtracking(self):
266 |         @generate
267 |         def xy():
268 |             yield string("x")
269 |             yield string("y")
270 |             assert False
271 | 
272 |         parser = xy | string("z")
273 |         # should not finish executing xy()
274 |         self.assertEqual(parser.parse("z"), "z")
275 | 
276 |     def test_or(self):
277 |         x_or_y = string("x") | string("y")
278 | 
279 |         self.assertEqual(x_or_y.parse("x"), "x")
280 |         self.assertEqual(x_or_y.parse("y"), "y")
281 | 
282 |     def test_or_with_then(self):
283 |         parser = (string("\\") >> string("y")) | string("z")
284 |         self.assertEqual(parser.parse("\\y"), "y")
285 |         self.assertEqual(parser.parse("z"), "z")
286 | 
287 |         self.assertRaises(ParseError, parser.parse, "\\z")
288 | 
289 |     def test_many(self):
290 |         letters = letter.many()
291 |         self.assertEqual(letters.parse("x"), ["x"])
292 |         self.assertEqual(letters.parse("xyz"), ["x", "y", "z"])
293 |         self.assertEqual(letters.parse(""), [])
294 | 
295 |         self.assertRaises(ParseError, letters.parse, "1")
296 | 
297 |     def test_many_with_then(self):
298 |         parser = string("x").many() >> string("y")
299 |         self.assertEqual(parser.parse("y"), "y")
300 |         self.assertEqual(parser.parse("xy"), "y")
301 |         self.assertEqual(parser.parse("xxxxxy"), "y")
302 | 
303 |     def test_times_zero(self):
304 |         zero_letters = letter.times(0)
305 |         self.assertEqual(zero_letters.parse(""), [])
306 | 
307 |         self.assertRaises(ParseError, zero_letters.parse, "x")
308 | 
309 |     def test_times(self):
310 |         three_letters = letter.times(3)
311 |         self.assertEqual(three_letters.parse("xyz"), ["x", "y", "z"])
312 | 
313 |         self.assertRaises(ParseError, three_letters.parse, "xy")
314 |         self.assertRaises(ParseError, three_letters.parse, "xyzw")
315 | 
316 |     def test_times_with_then(self):
317 |         then_digit = letter.times(3) >> digit
318 |         self.assertEqual(then_digit.parse("xyz1"), "1")
319 | 
320 |         self.assertRaises(ParseError, then_digit.parse, "xy1")
321 |         self.assertRaises(ParseError, then_digit.parse, "xyz")
322 |         self.assertRaises(ParseError, then_digit.parse, "xyzw")
323 | 
324 |     def test_times_with_min_and_max(self):
325 |         some_letters = letter.times(2, 4)
326 | 
327 |         self.assertEqual(some_letters.parse("xy"), ["x", "y"])
328 |         self.assertEqual(some_letters.parse("xyz"), ["x", "y", "z"])
329 |         self.assertEqual(some_letters.parse("xyzw"), ["x", "y", "z", "w"])
330 | 
331 |         self.assertRaises(ParseError, some_letters.parse, "x")
332 |         self.assertRaises(ParseError, some_letters.parse, "xyzwv")
333 | 
334 |     def test_times_with_min_and_max_and_then(self):
335 |         then_digit = letter.times(2, 4) >> digit
336 | 
337 |         self.assertEqual(then_digit.parse("xy1"), "1")
338 |         self.assertEqual(then_digit.parse("xyz1"), "1")
339 |         self.assertEqual(then_digit.parse("xyzw1"), "1")
340 | 
341 |         self.assertRaises(ParseError, then_digit.parse, "xy")
342 |         self.assertRaises(ParseError, then_digit.parse, "xyzw")
343 |         self.assertRaises(ParseError, then_digit.parse, "xyzwv1")
344 |         self.assertRaises(ParseError, then_digit.parse, "x1")
345 | 
346 |     def test_at_most(self):
347 |         ab = string("ab")
348 |         self.assertEqual(ab.at_most(2).parse(""), [])
349 |         self.assertEqual(ab.at_most(2).parse("ab"), ["ab"])
350 |         self.assertEqual(ab.at_most(2).parse("abab"), ["ab", "ab"])
351 |         self.assertRaises(ParseError, ab.at_most(2).parse, "ababab")
352 | 
353 |     def test_until(self):
354 | 
355 |         until = string("s").until(string("x"))
356 | 
357 |         s = "ssssx"
358 |         self.assertEqual(until.parse_partial(s), (4 * ["s"], "x"))
359 |         self.assertEqual(seq(until, string("x")).parse(s), [4 * ["s"], "x"])
360 |         self.assertEqual(until.then(string("x")).parse(s), "x")
361 | 
362 |         s = "ssssxy"
363 |         self.assertEqual(until.parse_partial(s), (4 * ["s"], "xy"))
364 |         self.assertEqual(seq(until, string("x")).parse_partial(s), ([4 * ["s"], "x"], "y"))
365 |         self.assertEqual(until.then(string("x")).parse_partial(s), ("x", "y"))
366 | 
367 |         self.assertRaises(ParseError, until.parse, "ssssy")
368 |         self.assertRaises(ParseError, until.parse, "xssssxy")
369 | 
370 |         self.assertEqual(until.parse_partial("xxx"), ([], "xxx"))
371 | 
372 |         until = regex(".").until(string("x"))
373 |         self.assertEqual(until.parse_partial("xxxx"), ([], "xxxx"))
374 | 
375 |     def test_until_with_consume_other(self):
376 | 
377 |         until = string("s").until(string("x"), consume_other=True)
378 | 
379 |         self.assertEqual(until.parse("ssssx"), 4 * ["s"] + ["x"])
380 |         self.assertEqual(until.parse_partial("ssssxy"), (4 * ["s"] + ["x"], "y"))
381 | 
382 |         self.assertEqual(until.parse_partial("xxx"), (["x"], "xx"))
383 | 
384 |         self.assertRaises(ParseError, until.parse, "ssssy")
385 |         self.assertRaises(ParseError, until.parse, "xssssxy")
386 | 
387 |     def test_until_with_min(self):
388 | 
389 |         until = string("s").until(string("x"), min=3)
390 | 
391 |         self.assertEqual(until.parse_partial("sssx"), (3 * ["s"], "x"))
392 |         self.assertEqual(until.parse_partial("sssssx"), (5 * ["s"], "x"))
393 | 
394 |         self.assertRaises(ParseError, until.parse_partial, "ssx")
395 | 
396 |     def test_until_with_max(self):
397 | 
398 |         # until with max
399 |         until = string("s").until(string("x"), max=3)
400 | 
401 |         self.assertEqual(until.parse_partial("ssx"), (2 * ["s"], "x"))
402 |         self.assertEqual(until.parse_partial("sssx"), (3 * ["s"], "x"))
403 | 
404 |         self.assertRaises(ParseError, until.parse_partial, "ssssx")
405 | 
406 |     def test_until_with_min_max(self):
407 | 
408 |         until = string("s").until(string("x"), min=3, max=5)
409 | 
410 |         self.assertEqual(until.parse_partial("sssx"), (3 * ["s"], "x"))
411 |         self.assertEqual(until.parse_partial("sssssx"), (5 * ["s"], "x"))
412 | 
413 |         with self.assertRaises(ParseError) as cm:
414 |             until.parse_partial("ssx")
415 |         assert cm.exception.args[0] == frozenset({"at least 3 items; got 2 item(s)"})
416 |         with self.assertRaises(ParseError) as cm:
417 |             until.parse_partial("ssssssx")
418 |         assert cm.exception.args[0] == frozenset({"at most 5 items"})
419 | 
420 |     def test_optional(self):
421 |         p = string("a").optional()
422 |         self.assertEqual(p.parse("a"), "a")
423 |         self.assertEqual(p.parse(""), None)
424 |         p = string("a").optional("b")
425 |         self.assertEqual(p.parse("a"), "a")
426 |         self.assertEqual(p.parse(""), "b")
427 | 
428 |     def test_sep_by(self):
429 |         digit_list = digit.map(int).sep_by(string(","))
430 | 
431 |         self.assertEqual(digit_list.parse("1,2,3,4"), [1, 2, 3, 4])
432 |         self.assertEqual(digit_list.parse("9,0,4,7"), [9, 0, 4, 7])
433 |         self.assertEqual(digit_list.parse("3,7"), [3, 7])
434 |         self.assertEqual(digit_list.parse("8"), [8])
435 |         self.assertEqual(digit_list.parse(""), [])
436 | 
437 |         self.assertRaises(ParseError, digit_list.parse, "8,")
438 |         self.assertRaises(ParseError, digit_list.parse, ",9")
439 |         self.assertRaises(ParseError, digit_list.parse, "82")
440 |         self.assertRaises(ParseError, digit_list.parse, "7.6")
441 | 
442 |     def test_sep_by_with_min_and_max(self):
443 |         digit_list = digit.map(int).sep_by(string(","), min=2, max=4)
444 | 
445 |         self.assertEqual(digit_list.parse("1,2,3,4"), [1, 2, 3, 4])
446 |         self.assertEqual(digit_list.parse("9,0,4,7"), [9, 0, 4, 7])
447 |         self.assertEqual(digit_list.parse("3,7"), [3, 7])
448 | 
449 |         self.assertRaises(ParseError, digit_list.parse, "8")
450 |         self.assertRaises(ParseError, digit_list.parse, "")
451 |         self.assertRaises(ParseError, digit_list.parse, "8,")
452 |         self.assertRaises(ParseError, digit_list.parse, ",9")
453 |         self.assertRaises(ParseError, digit_list.parse, "82")
454 |         self.assertRaises(ParseError, digit_list.parse, "7.6")
455 |         self.assertEqual(digit.sep_by(string(","), max=0).parse(""), [])
456 | 
457 |     def test_add(self):
458 |         self.assertEqual((letter + digit).parse("a1"), "a1")
459 | 
460 |     def test_multiply(self):
461 |         self.assertEqual((letter * 3).parse("abc"), ["a", "b", "c"])
462 | 
463 |     def test_multiply_range(self):
464 |         self.assertEqual((letter * range(1, 2)).parse("a"), ["a"])
465 |         self.assertRaises(ParseError, (letter * range(1, 2)).parse, "aa")
466 | 
467 |     # Primitives
468 |     def test_alt(self):
469 |         self.assertRaises(ParseError, alt().parse, "")
470 |         self.assertEqual(alt(letter, digit).parse("a"), "a")
471 |         self.assertEqual(alt(letter, digit).parse("1"), "1")
472 |         self.assertRaises(ParseError, alt(letter, digit).parse, ".")
473 | 
474 |     def test_seq(self):
475 |         self.assertEqual(seq().parse(""), [])
476 |         self.assertEqual(seq(letter).parse("a"), ["a"])
477 |         self.assertEqual(seq(letter, digit).parse("a1"), ["a", "1"])
478 |         self.assertRaises(ParseError, seq(letter, digit).parse, "1a")
479 | 
480 |     def test_seq_kwargs(self):
481 |         self.assertEqual(
482 |             seq(first_name=regex(r"\S+") << whitespace, last_name=regex(r"\S+")).parse("Jane Smith"),
483 |             {"first_name": "Jane", "last_name": "Smith"},
484 |         )
485 | 
486 |     def test_seq_kwargs_fail(self):
487 |         self.assertRaises(ParseError, seq(a=string("a")).parse, "b")
488 | 
489 |     def test_seq_kwargs_error(self):
490 |         self.assertRaises(ValueError, lambda: seq(string("a"), b=string("b")))
491 | 
492 |     def test_test_char(self):
493 |         ascii = parsy_test_char(lambda c: ord(c) < 128, "ascii character")
494 |         self.assertEqual(ascii.parse("a"), "a")
495 |         with self.assertRaises(ParseError) as err:
496 |             ascii.parse("☺")
497 |         ex = err.exception
498 |         self.assertEqual(str(ex), """expected 'ascii character' at 0:0""")
499 | 
500 |         with self.assertRaises(ParseError) as err:
501 |             ascii.parse("")
502 |         ex = err.exception
503 |         self.assertEqual(str(ex), """expected 'ascii character' at 0:0""")
504 | 
505 |     def test_char_from_str(self):
506 |         ab = char_from("ab")
507 |         self.assertEqual(ab.parse("a"), "a")
508 |         self.assertEqual(ab.parse("b"), "b")
509 | 
510 |         with self.assertRaises(ParseError) as err:
511 |             ab.parse("x")
512 | 
513 |         ex = err.exception
514 |         self.assertEqual(str(ex), """expected '[ab]' at 0:0""")
515 | 
516 |     def test_char_from_bytes(self):
517 |         ab = char_from(b"ab")
518 |         self.assertEqual(ab.parse(b"a"), b"a")
519 |         self.assertEqual(ab.parse(b"b"), b"b")
520 | 
521 |         with self.assertRaises(ParseError) as err:
522 |             ab.parse(b"x")
523 | 
524 |         ex = err.exception
525 |         self.assertEqual(str(ex), """expected b'[ab]' at 0""")
526 | 
527 |     def test_string_from(self):
528 |         titles = string_from("Mr", "Mr.", "Mrs", "Mrs.")
529 |         self.assertEqual(titles.parse("Mr"), "Mr")
530 |         self.assertEqual(titles.parse("Mr."), "Mr.")
531 |         self.assertEqual((titles + string(" Hyde")).parse("Mr. Hyde"), "Mr. Hyde")
532 |         with self.assertRaises(ParseError) as err:
533 |             titles.parse("foo")
534 | 
535 |         ex = err.exception
536 |         self.assertEqual(str(ex), """expected one of 'Mr', 'Mr.', 'Mrs', 'Mrs.' at 0:0""")
537 | 
538 |     def test_string_from_transform(self):
539 |         titles = string_from("Mr", "Mr.", "Mrs", "Mrs.", transform=lambda s: s.lower())
540 |         self.assertEqual(titles.parse("mr"), "Mr")
541 |         self.assertEqual(titles.parse("mr."), "Mr.")
542 |         self.assertEqual(titles.parse("MR"), "Mr")
543 |         self.assertEqual(titles.parse("MR."), "Mr.")
544 | 
545 |     def test_peek(self):
546 |         self.assertEqual(peek(any_char).parse_partial("abc"), ("a", "abc"))
547 |         with self.assertRaises(ParseError) as err:
548 |             peek(digit).parse("a")
549 |         self.assertEqual(str(err.exception), "expected 'a digit' at 0:0")
550 | 
551 |     def test_any_char(self):
552 |         self.assertEqual(any_char.parse("x"), "x")
553 |         self.assertEqual(any_char.parse("\n"), "\n")
554 |         self.assertRaises(ParseError, any_char.parse, "")
555 | 
556 |     def test_whitespace(self):
557 |         self.assertEqual(whitespace.parse("\n"), "\n")
558 |         self.assertEqual(whitespace.parse(" "), " ")
559 |         self.assertRaises(ParseError, whitespace.parse, "x")
560 | 
561 |     def test_letter(self):
562 |         self.assertEqual(letter.parse("a"), "a")
563 |         self.assertRaises(ParseError, letter.parse, "1")
564 | 
565 |     def test_digit(self):
566 |         self.assertEqual(digit.parse("¹"), "¹")
567 |         self.assertEqual(digit.parse("2"), "2")
568 |         self.assertRaises(ParseError, digit.parse, "x")
569 | 
570 |     def test_decimal_digit(self):
571 |         self.assertEqual(decimal_digit.at_least(1).concat().parse("9876543210"), "9876543210")
572 |         self.assertRaises(ParseError, decimal_digit.parse, "¹")
573 | 
574 |     def test_line_info(self):
575 |         @generate
576 |         def foo():
577 |             i = yield line_info
578 |             l = yield any_char
579 |             return (l, i)
580 | 
581 |         self.assertEqual(
582 |             foo.many().parse("AB\nCD"),
583 |             [
584 |                 ("A", (0, 0)),
585 |                 ("B", (0, 1)),
586 |                 ("\n", (0, 2)),
587 |                 ("C", (1, 0)),
588 |                 ("D", (1, 1)),
589 |             ],
590 |         )
591 | 
592 |     def test_should_fail(self):
593 |         not_a_digit = digit.should_fail("not a digit") >> regex(r".*")
594 | 
595 |         self.assertEqual(not_a_digit.parse("a"), "a")
596 |         self.assertEqual(not_a_digit.parse("abc"), "abc")
597 |         self.assertEqual(not_a_digit.parse("a10"), "a10")
598 |         self.assertEqual(not_a_digit.parse(""), "")
599 | 
600 |         with self.assertRaises(ParseError) as err:
601 |             not_a_digit.parse("8")
602 |         self.assertEqual(str(err.exception), "expected 'not a digit' at 0:0")
603 | 
604 |         self.assertRaises(ParseError, not_a_digit.parse, "8ab")
605 | 
606 |     def test_from_enum_string(self):
607 |         class Pet(enum.Enum):
608 |             CAT = "cat"
609 |             DOG = "dog"
610 | 
611 |         pet = from_enum(Pet)
612 |         self.assertEqual(pet.parse("cat"), Pet.CAT)
613 |         self.assertEqual(pet.parse("dog"), Pet.DOG)
614 |         self.assertRaises(ParseError, pet.parse, "foo")
615 | 
616 |     def test_from_enum_int(self):
617 |         class Position(enum.Enum):
618 |             FIRST = 1
619 |             SECOND = 2
620 | 
621 |         position = from_enum(Position)
622 |         self.assertEqual(position.parse("1"), Position.FIRST)
623 |         self.assertEqual(position.parse("2"), Position.SECOND)
624 |         self.assertRaises(ParseError, position.parse, "foo")
625 | 
626 |     def test_from_enum_transform(self):
627 |         class Pet(enum.Enum):
628 |             CAT = "cat"
629 |             DOG = "dog"
630 | 
631 |         pet = from_enum(Pet, transform=lambda s: s.lower())
632 |         self.assertEqual(pet.parse("cat"), Pet.CAT)
633 |         self.assertEqual(pet.parse("CAT"), Pet.CAT)
634 | 
635 | 
636 | class TestParserTokens(unittest.TestCase):
637 |     """
638 |     Tests that ensure that `.parse` can handle an arbitrary list of tokens,
639 |     rather than a string.
640 |     """
641 | 
642 |     # Some opaque objects we will use in our stream:
643 |     START = object()
644 |     STOP = object()
645 | 
646 |     def test_test_item(self):
647 |         start_stop = parsy_test_item(lambda i: i in [self.START, self.STOP], "START/STOP")
648 |         self.assertEqual(start_stop.parse([self.START]), self.START)
649 |         self.assertEqual(start_stop.parse([self.STOP]), self.STOP)
650 |         with self.assertRaises(ParseError) as err:
651 |             start_stop.many().parse([self.START, "hello"])
652 | 
653 |         ex = err.exception
654 |         self.assertEqual(str(ex), "expected one of 'EOF', 'START/STOP' at 1")
655 |         self.assertEqual(ex.expected, {"EOF", "START/STOP"})
656 |         self.assertEqual(ex.index, 1)
657 | 
658 |     def test_match_item(self):
659 |         self.assertEqual(match_item(self.START).parse([self.START]), self.START)
660 |         with self.assertRaises(ParseError) as err:
661 |             match_item(self.START, "START").parse([])
662 | 
663 |         ex = err.exception
664 |         self.assertEqual(str(ex), "expected 'START' at 0")
665 | 
666 |     def test_parse_tokens(self):
667 |         other_vals = parsy_test_item(lambda i: i not in [self.START, self.STOP], "not START/STOP")
668 | 
669 |         bracketed = match_item(self.START) >> other_vals.many() << match_item(self.STOP)
670 |         stream = [self.START, "hello", 1, 2, "goodbye", self.STOP]
671 |         result = bracketed.parse(stream)
672 |         self.assertEqual(result, ["hello", 1, 2, "goodbye"])
673 | 
674 |     def test_index(self):
675 |         @generate
676 |         def foo():
677 |             i = yield index
678 |             l = yield letter
679 |             return (l, i)
680 | 
681 |         self.assertEqual(foo.many().parse(["A", "B"]), [("A", 0), ("B", 1)])
682 | 
683 | 
684 | class TestUtils(unittest.TestCase):
685 |     def test_line_info_at(self):
686 |         text = "abc\ndef"
687 |         self.assertEqual(line_info_at(text, 0), (0, 0))
688 |         self.assertEqual(line_info_at(text, 2), (0, 2))
689 |         self.assertEqual(line_info_at(text, 3), (0, 3))
690 |         self.assertEqual(line_info_at(text, 4), (1, 0))
691 |         self.assertEqual(line_info_at(text, 7), (1, 3))
692 |         self.assertRaises(ValueError, lambda: line_info_at(text, 8))
693 | 
694 | 
695 | class TestForwardDeclaration(unittest.TestCase):
696 |     def test_forward_declaration_1(self):
697 |         # This is the example from the docs
698 |         expr = forward_declaration()
699 |         with self.assertRaises(ValueError):
700 |             expr.parse("()")
701 | 
702 |         with self.assertRaises(ValueError):
703 |             expr.parse_partial("()")
704 | 
705 |         simple = regex("[0-9]+").map(int)
706 |         group = string("(") >> expr.sep_by(string(" ")) << string(")")
707 |         expr.become(simple | group)
708 | 
709 |         self.assertEqual(expr.parse("(0 1 (2 3))"), [0, 1, [2, 3]])
710 | 
711 |     def test_forward_declaration_2(self):
712 |         # Simplest example I could think of
713 |         expr = forward_declaration()
714 |         expr.become(string("A") + expr | string("Z"))
715 | 
716 |         self.assertEqual(expr.parse("Z"), "Z")
717 |         self.assertEqual(expr.parse("AZ"), "AZ")
718 |         self.assertEqual(expr.parse("AAAAAZ"), "AAAAAZ")
719 | 
720 |         with self.assertRaises(ParseError):
721 |             expr.parse("A")
722 | 
723 |         with self.assertRaises(ParseError):
724 |             expr.parse("B")
725 | 
726 |         self.assertEqual(expr.parse_partial("AAZXX"), ("AAZ", "XX"))
727 | 
728 |     def test_forward_declaration_cant_become_twice(self):
729 |         dec = forward_declaration()
730 |         other = string("X")
731 |         dec.become(other)
732 | 
733 |         with self.assertRaises((AttributeError, TypeError)):
734 |             dec.become(other)
735 | 
736 | 
737 | if __name__ == "__main__":
738 |     unittest.main()
739 | 


--------------------------------------------------------------------------------
/tests/test_sexpr.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import unittest
 3 | 
 4 | from parsy import generate, regex, string
 5 | 
 6 | whitespace = regex(r"\s+", re.MULTILINE)
 7 | comment = regex(r";.*")
 8 | ignore = (whitespace | comment).many()
 9 | 
10 | lexeme = lambda p: p << ignore
11 | 
12 | lparen = lexeme(string("("))
13 | rparen = lexeme(string(")"))
14 | number = lexeme(regex(r"\d+")).map(int)
15 | symbol = lexeme(regex(r"[\d\w_-]+"))
16 | true = lexeme(string("#t")).result(True)
17 | false = lexeme(string("#f")).result(False)
18 | 
19 | atom = true | false | number | symbol
20 | 
21 | 
22 | @generate("a form")
23 | def form():
24 |     yield lparen
25 |     els = yield expr.many()
26 |     yield rparen
27 |     return els
28 | 
29 | 
30 | @generate
31 | def quote():
32 |     yield string("'")
33 |     e = yield expr
34 |     return ["quote", e]
35 | 
36 | 
37 | expr = form | quote | atom
38 | program = ignore >> expr.many()
39 | 
40 | 
41 | class TestSexpr(unittest.TestCase):
42 |     def test_form(self):
43 |         result = program.parse("(1 2 3)")
44 |         self.assertEqual(result, [[1, 2, 3]])
45 | 
46 |     def test_quote(self):
47 |         result = program.parse("'foo '(bar baz)")
48 |         self.assertEqual(result, [["quote", "foo"], ["quote", ["bar", "baz"]]])
49 | 
50 |     def test_double_quote(self):
51 |         result = program.parse("''foo")
52 |         self.assertEqual(result, [["quote", ["quote", "foo"]]])
53 | 
54 |     def test_boolean(self):
55 |         result = program.parse("#t #f")
56 |         self.assertEqual(result, [True, False])
57 | 
58 |     def test_comments(self):
59 |         result = program.parse(
60 |             """
61 |             ; a program with a comment
62 |             (           foo ; that's a foo
63 |             bar )
64 |             ; some comments at the end
65 |             """
66 |         )
67 | 
68 |         self.assertEqual(result, [["foo", "bar"]])
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     unittest.main()
73 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py39,py310,py311,py312,py313,pypy39,isort-check,flake8-check
 3 | 
 4 | [testenv]
 5 | commands = pytest
 6 | allowlist_externals = ["pytest"]
 7 | deps = -e .
 8 |   pytest
 9 | 
10 | [testenv:isort-check]
11 | # isort configurations are located in pyproject.toml
12 | basepython = python3.9
13 | deps = -r tests/requirements-linters.txt
14 | commands = isort -c {toxinidir}
15 | 
16 | [testenv:flake8-check]
17 | basepython = python3.9
18 | deps = -r tests/requirements-linters.txt
19 | commands = flake8
20 | 


--------------------------------------------------------------------------------