├── src
    └── parsy
    │   ├── version.py
    │   └── __init__.py
├── .gitignore
├── docs
    ├── installation.rst
    ├── howto
    │   ├── index.rst
    │   ├── other_examples.rst
    │   └── lexing.rst
    ├── ref
    │   ├── index.rst
    │   ├── parser_instances.rst
    │   ├── generating.rst
    │   ├── primitives.rst
    │   └── methods_and_combinators.rst
    ├── index.rst
    ├── Makefile
    ├── make.bat
    ├── history.rst
    ├── contributing.rst
    ├── overview.rst
    ├── conf.py
    └── tutorial.rst
├── .editorconfig
├── MANIFEST.in
├── setup.cfg
├── travis_tests.sh
├── .travis.yml
├── tox.ini
├── examples
    ├── simple_logo_lexer.py
    ├── simple_logo_parser.py
    ├── json.py
    └── simple_eval.py
├── RELEASE.rst
├── LICENSE
├── README.rst
├── setup.py
└── test
    ├── test_sexpr.py
    └── test_parsy.py


/src/parsy/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.0.1-dev1'
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /dist
3 | .tox
4 | src/parsy.egg-info
5 | docs/_build
6 | .cache
7 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Installation
 3 | ============
 4 | 
 5 | parsy can be installed with pip::
 6 | 
 7 |     pip install parsy
 8 | 
 9 | 
10 | Python 3.3 or greater is required.
11 | 
12 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # See http://editorconfig.org/
 2 | root = true
 3 | 
 4 | [*]
 5 | end_of_line = lf
 6 | insert_final_newline = true
 7 | charset = utf-8
 8 | indent_style = space
 9 | 
10 | [*.py]
11 | indent_size = 4
12 | 


--------------------------------------------------------------------------------
/docs/howto/index.rst:
--------------------------------------------------------------------------------
 1 | =================================
 2 |  Howto's, cookbooks and examples
 3 | =================================
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 |    :caption: Contents:
 8 | 
 9 |    lexing
10 |    other_examples
11 | 


--------------------------------------------------------------------------------
/docs/ref/index.rst:
--------------------------------------------------------------------------------
 1 | ===============
 2 |  API reference
 3 | ===============
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 3
 7 |    :caption: Contents:
 8 | 
 9 |    primitives
10 |    methods_and_combinators
11 |    generating
12 |    parser_instances
13 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.md
 2 | include *.rst
 3 | include *.sh
 4 | include LICENSE
 5 | include tox.ini
 6 | include .editorconfig
 7 | recursive-include docs *.bat
 8 | recursive-include docs *.py
 9 | recursive-include docs *.rst
10 | recursive-include docs Makefile
11 | recursive-include examples *.py
12 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [wheel]
 2 | 
 3 | [isort]
 4 | multi_line_output = 5
 5 | line_length = 119
 6 | default_section = THIRDPARTY
 7 | skip = .tox,.git,docs,dist,build
 8 | known_first_party = parsy
 9 | 
10 | [flake8]
11 | exclude = .tox,.git,docs,dist,build
12 | ignore = E731,E221,W503
13 | max-line-length = 119
14 | 


--------------------------------------------------------------------------------
/travis_tests.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | coverage run --branch --source=parsy `which py.test` || exit 1
4 | 
5 | # Coveralls is flaky sometimes, especially for concurrent uploads.
6 | # https://github.com/lemurheavy/coveralls-public/issues/487
7 | # So try again if it fails first time.
8 | coveralls || { sleep $((RANDOM / 4000 + 1)); coveralls; }
9 | 


--------------------------------------------------------------------------------
/docs/howto/other_examples.rst:
--------------------------------------------------------------------------------
 1 | ==============
 2 | Other examples
 3 | ==============
 4 | 
 5 | This section has some further example parsers that you can study. There are also
 6 | examples in the :doc:`/tutorial` and in :doc:`/ref/generating`.
 7 | 
 8 | JSON parser
 9 | ===========
10 | 
11 | .. literalinclude:: ../../examples/json.py
12 |    :language: python
13 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.3"
 4 |   - "3.4"
 5 |   - "3.5"
 6 |   - "3.6"
 7 | env: SCRIPT=./travis_tests.sh
 8 | matrix:
 9 |   include:
10 |     - python: "3.5"
11 |       env: SCRIPT=flake8
12 |     - python: "3.5"
13 |       env: SCRIPT="isort -c"
14 |     - python: "3.5"
15 |       env: SCRIPT=check-manifest
16 | install:
17 |   - pip install pytest flake8 check-manifest isort coverage coveralls
18 |   - ./setup.py develop
19 | script:
20 |   - $SCRIPT
21 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to parsy's documentation!
 2 | =================================
 3 | 
 4 | These are the docs for parsy |release|. Check the :doc:`/history` for
 5 | significant changes.
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 |    :caption: Contents:
10 | 
11 |    installation
12 |    overview
13 |    tutorial
14 |    ref/index
15 |    howto/index
16 |    history
17 |    contributing
18 | 
19 | Indices and tables
20 | ==================
21 | 
22 | * :ref:`genindex`
23 | * :ref:`modindex`
24 | * :ref:`search`
25 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py33,py34,py35,py36,checkmanifest,isort-check,flake8-check
 3 | 
 4 | [testenv]
 5 | deps = pytest
 6 | commands = pytest
 7 | 
 8 | 
 9 | [testenv:checkmanifest]
10 | basepython = python3.3
11 | deps = check-manifest
12 | commands = check-manifest
13 | 
14 | [testenv:isort-check]
15 | # isort configurations are located in setup.cfg
16 | basepython = python3.3
17 | deps = isort==4.2.15
18 | commands = isort -rc -c {toxinidir}
19 | 
20 | [testenv:flake8-check]
21 | basepython = python3.3
22 | deps = flake8==3.4.1
23 | commands = flake8
24 | 


--------------------------------------------------------------------------------
/examples/simple_logo_lexer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Stripped down logo lexer, for tokenizing Turtle Logo programs like:
 3 | 
 4 |    fd 1
 5 |    bk 2
 6 |    rt 90
 7 | 
 8 | etc.
 9 | """
10 | 
11 | from parsy import eof, regex, seq, string, string_from, whitespace
12 | 
13 | command = string_from("fd", "bk", "rt", "lt")
14 | number = regex(r'[0-9]+').map(int)
15 | optional_whitespace = regex(r'\s*')
16 | eol = string("\n")
17 | line = seq(optional_whitespace >> command,
18 |            whitespace >> number,
19 |            (eof | eol | (whitespace >> eol)).result("\n"))
20 | flatten_list = lambda ls: sum(ls, [])
21 | lexer = line.many().map(flatten_list)
22 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = parsy
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/RELEASE.rst:
--------------------------------------------------------------------------------
 1 | ==================
 2 | How to do releases
 3 | ==================
 4 | 
 5 | * Check test suite passes on all supported versions::
 6 | 
 7 |     tox
 8 | 
 9 | * Change docs/history.rst to remove " - unreleased"
10 | 
11 | * Update the version number (removing the ``-dev1`` part):
12 | 
13 |   * src/parsy/version.py
14 |   * docs/conf.py
15 | 
16 | * Commit with "Version bump"
17 | 
18 | * Release to PyPI::
19 | 
20 |     ./setup.py sdist bdist_wheel upload
21 | 
22 | * Tag and push::
23 | 
24 | 
25 |     git tag v$VERSION
26 |     git push
27 |     git push --tags
28 | 
29 | 
30 | Post release
31 | ------------
32 | 
33 | * Bump version numbers to next version, and add ``-dev1`` suffix, for example
34 |   ``0.9.0-dev1``
35 | 
36 | * Add new section to docs/history.rst, with " - unreleased".
37 | 
38 | * Commit and push
39 | 


--------------------------------------------------------------------------------
/examples/simple_logo_parser.py:
--------------------------------------------------------------------------------
 1 | from parsy import generate, match_item, test_item
 2 | 
 3 | 
 4 | class Command:
 5 |     def __init__(self, parameter):
 6 |         self.parameter = parameter
 7 | 
 8 |     def __repr__(self):
 9 |         return "{0}({1})".format(self.__class__.__name__, self.parameter)
10 | 
11 | 
12 | class Forward(Command):
13 |     pass
14 | 
15 | 
16 | class Backward(Command):
17 |     pass
18 | 
19 | 
20 | class Right(Command):
21 |     pass
22 | 
23 | 
24 | class Left(Command):
25 |     pass
26 | 
27 | 
28 | commands = {
29 |     'fd': Forward,
30 |     'bk': Backward,
31 |     'rt': Right,
32 |     'lt': Left,
33 | }
34 | 
35 | 
36 | @generate
37 | def statement():
38 |     cmd_name = yield test_item(lambda i: i in commands.keys(), "command")
39 |     parameter = yield test_item(lambda i: isinstance(i, int), "number")
40 |     yield match_item('\n')
41 |     return commands[cmd_name](int(parameter))
42 | 
43 | 
44 | program = statement.many()
45 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=python -msphinx
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=parsy
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The Sphinx module was not found. Make sure you have Sphinx installed,
20 | 	echo.then set the SPHINXBUILD environment variable to point to the full
21 | 	echo.path of the 'sphinx-build' executable. Alternatively you may add the
22 | 	echo.Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | # MIT license.  See http://www.opensource.org/licenses/mit-license.php
 2 | 
 3 | Copyright (c) 2013 Jeanine Adkisson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/history.rst:
--------------------------------------------------------------------------------
 1 | =========================
 2 | History and release notes
 3 | =========================
 4 | 
 5 | .. currentmodule:: parsy
 6 | 
 7 | 1.0.1 - unreleased
 8 | ------------------
 9 | 
10 | 
11 | 1.0.0 - 2017-10-10
12 | ------------------
13 | 
14 | * Improved parse failure messages of ``@generate`` parsers. Previously
15 |   the parser was given a default description of the function name,
16 |   which hides all useful internal info there might be.
17 | * Added :meth:`Parser.sep_by`
18 | * Added :func:`test_char`
19 | * Added :func:`char_from`
20 | * Added :func:`string_from`
21 | * Added :data:`any_char`
22 | * Added :data:`decimal_digit`
23 | * Added :meth:`Parser.concat`
24 | * Fixed parsy so that it can again work with tokens as well as strings, allowing it to
25 |   be used as both a :doc:`lexer or parser or both <howto/lexing>`, with docs and tests.
26 | * Added :func:`test_item`
27 | * Added :func:`match_item`
28 | * Added :meth:`Parser.should_fail`
29 | 
30 | 0.9.0 - 2017-09-28
31 | ------------------
32 | 
33 | * Better error reporting of failed parses.
34 | * Documentation overhaul and expansion.
35 | * Added :meth:`Parser.combine`.
36 | 
37 | 0.0.4 - 2014-12-28
38 | ------------------
39 | 
40 | * See git logs for changes before this point.
41 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | parsy
 2 | =====
 3 | 
 4 | |Documentation Status| |Build Status| |Coveralls|
 5 | 
 6 | Parsy is an easy way to combine simple, small parsers into complex, larger
 7 | parsers. If it means anything to you, it's a monadic parser combinator library
 8 | for LL(infinity) grammars in the spirit of `Parsec
 9 | <https://github.com/haskell/parsec>`_, `Parsnip
10 | <http://parsnip-parser.sourceforge.net/>`_, and `Parsimmon
11 | <https://github.com/jneen/parsimmon>`_.
12 | 
13 | Parsy requires Python 3.3 or greater.
14 | 
15 | Links:
16 | 
17 | - `Documentation <http://parsy.readthedocs.io/en/latest/>`_
18 | - `History and changelog <http://parsy.readthedocs.io/en/latest/history.html>`_
19 | - `PyPI <https://pypi.python.org/pypi/parsy/>`_
20 | 
21 | To contribute, please create a fork and submit a pull request on GitHub,
22 | after checking the "contributing" section of the docs. Thanks!
23 | 
24 | Parsy was originally written by `Jeanine Adkisson <https://github.com/jneen>`_,
25 | with contributions by other people as can be found in the git commit history.
26 | 
27 | .. |Documentation Status| image:: https://readthedocs.org/projects/parsy/badge/?version=latest
28 |    :target: http://parsy.readthedocs.io/en/latest/?badge=latest
29 | .. |Build Status| image:: https://travis-ci.org/python-parsy/parsy.svg?branch=master
30 |    :target: https://travis-ci.org/python-parsy/parsy
31 | .. |Coveralls| image:: https://coveralls.io/repos/github/python-parsy/parsy/badge.svg?branch=master
32 |    :target: https://coveralls.io/github/python-parsy/parsy?branch=master
33 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os.path
 4 | 
 5 | from setuptools import find_packages, setup
 6 | 
 7 | # Evaluate version module without importing parsy, which could have undesirable
 8 | # effects.
 9 | version_file = os.path.join(os.path.dirname(__file__),
10 |                             "src", "parsy", "version.py")
11 | namespace = {}
12 | exec(compile(open(version_file, "rb").read(), version_file, 'exec'),
13 |      globals(), namespace)
14 | version = namespace['__version__']
15 | 
16 | readme = open('README.rst').read()
17 | 
18 | setup(
19 |     name="parsy",
20 |     version=version,
21 |     description="easy-to-use parser combinators, for parsing in pure Python",
22 |     long_description=readme,
23 |     author="Jeanine Adkisson",
24 |     author_email="jneen at jneen dot net (humans only, please)",
25 |     maintainer="Luke Plant",
26 |     maintainer_email="L.Plant.98@cantab.net",
27 |     url="https://github.com/python-parsy/parsy",
28 |     license="MIT",
29 |     classifiers=[
30 |         "Development Status :: 5 - Production/Stable",
31 |         "Intended Audience :: Developers",
32 |         "Topic :: Software Development :: Compilers",
33 |         "Topic :: Software Development :: Interpreters",
34 |         "Topic :: Text Processing",
35 |         "License :: OSI Approved :: MIT License",
36 |         "Programming Language :: Python :: 3",
37 |         "Programming Language :: Python :: 3.3",
38 |         "Programming Language :: Python :: 3.4",
39 |         "Programming Language :: Python :: 3.5",
40 |         "Programming Language :: Python :: 3.6",
41 |     ],
42 |     keywords="parser parsers parsing monad combinators",
43 |     packages=find_packages('src'),
44 |     package_dir={'': 'src'},
45 | )
46 | 


--------------------------------------------------------------------------------
/examples/json.py:
--------------------------------------------------------------------------------
 1 | from sys import stdin
 2 | 
 3 | from parsy import generate, regex, string
 4 | 
 5 | whitespace = regex(r'\s*')
 6 | lexeme = lambda p: p << whitespace
 7 | lbrace = lexeme(string('{'))
 8 | rbrace = lexeme(string('}'))
 9 | lbrack = lexeme(string('['))
10 | rbrack = lexeme(string(']'))
11 | colon  = lexeme(string(':'))
12 | comma  = lexeme(string(','))
13 | true   = lexeme(string('true')).result(True)
14 | false  = lexeme(string('false')).result(False)
15 | null   = lexeme(string('null')).result(None)
16 | number = lexeme(
17 |     regex(r'-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?')
18 | ).map(float)
19 | string_part = regex(r'[^"\\]+')
20 | string_esc = string('\\') >> (
21 |     string('\\')
22 |     | string('/')
23 |     | string('"')
24 |     | string('b').result('\b')
25 |     | string('f').result('\f')
26 |     | string('n').result('\n')
27 |     | string('r').result('\r')
28 |     | string('t').result('\t')
29 |     | regex(r'u[0-9a-fA-F]{4}').map(lambda s: chr(int(s[1:], 16)))
30 | )
31 | quoted = lexeme(string('"') >> (string_part | string_esc).many().concat() << string('"'))
32 | 
33 | 
34 | # Circular dependency between array and value means we use `generate` form here
35 | @generate
36 | def array():
37 |     yield lbrack
38 |     elements = yield value.sep_by(comma)
39 |     yield rbrack
40 |     return elements
41 | 
42 | 
43 | @generate
44 | def object_pair():
45 |     key = yield quoted
46 |     yield colon
47 |     val = yield value
48 |     return (key, val)
49 | 
50 | 
51 | json_object = lbrace >> object_pair.sep_by(comma).map(dict) << rbrace
52 | value = quoted | number | json_object | array | true | false | null
53 | json = whitespace >> value
54 | 
55 | if __name__ == '__main__':
56 |     print(repr(json.parse(stdin.read())))
57 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
 1 | Contributing to parsy
 2 | =====================
 3 | 
 4 | Contributions to parsy, whether code or docs, are very welcome. Please
 5 | contribute by making a fork, and submitting a PR on `GitHub
 6 | <https://github.com/python-parsy/parsy>`_.
 7 | 
 8 | We have a high standard in terms of quality. All contributions will need to be
 9 | fully covered by unit tests and documentation. Code should be formatted
10 | according to pep8, and the formatting defined by the ``../.editorconfig`` file
11 | (see `EditorConfig <http://editorconfig.org/>`_).
12 | 
13 | To run the test suite::
14 | 
15 |     pip install pytest
16 |     pytest
17 | 
18 | To run the test suite on all supported Python versions, and code quality checks,
19 | first install the various Python versions, then::
20 | 
21 |     pip install tox
22 |     tox
23 | 
24 | To build the docs, do::
25 | 
26 |     pip install sphinx
27 |     cd docs
28 |     make html
29 | 
30 | We also require that `flake8 <http://flake8.pycqa.org/en/latest/>`_, `isort
31 | <https://github.com/timothycrosley/isort#readme>`_ and checkmanifest report zero
32 | errors (these are run by tox).
33 | 
34 | When writing documentation, please keep in mind Daniele Procida's `great article
35 | on documentation <https://www.divio.com/en/blog/documentation/>`_. To summarise,
36 | there are 4 types of docs:
37 | 
38 | * Tutorials (focus: learning, analogy: teaching a child to cook)
39 | * How-to guides (focus: goals, analogy: a recipe in a cook book)
40 | * Discussions (focus: understanding, analogy: an article on culinary history)
41 | * Reference (focus: information, analogy: encyclopedia article)
42 | 
43 | We do not (yet) have documentation that fits into the "Discussions" category,
44 | but we do have the others, and when adding new features, documentation of the
45 | right sort(s) should be added. With parsy, where code is often very succinct,
46 | this often takes several times longer than righting the code.
47 | 


--------------------------------------------------------------------------------
/test/test_sexpr.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import unittest
 3 | 
 4 | from parsy import generate, regex, string
 5 | 
 6 | whitespace = regex(r'\s+', re.MULTILINE)
 7 | comment = regex(r';.*')
 8 | ignore = (whitespace | comment).many()
 9 | 
10 | lexeme = lambda p: p << ignore
11 | 
12 | lparen = lexeme(string('('))
13 | rparen = lexeme(string(')'))
14 | number = lexeme(regex(r'\d+')).map(int)
15 | symbol = lexeme(regex(r'[\d\w_-]+'))
16 | true   = lexeme(string('#t')).result(True)
17 | false  = lexeme(string('#f')).result(False)
18 | 
19 | atom = true | false | number | symbol
20 | 
21 | 
22 | @generate('a form')
23 | def form():
24 |     yield lparen
25 |     els = yield expr.many()
26 |     yield rparen
27 |     return els
28 | 
29 | 
30 | @generate
31 | def quote():
32 |     yield string("'")
33 |     e = yield expr
34 |     return ['quote', e]
35 | 
36 | 
37 | expr = form | quote | atom
38 | program = ignore >> expr.many()
39 | 
40 | 
41 | class TestSexpr(unittest.TestCase):
42 |     def test_form(self):
43 |         result = program.parse('(1 2 3)')
44 |         self.assertEqual(result, [[1, 2, 3]])
45 | 
46 |     def test_quote(self):
47 |         result = program.parse("'foo '(bar baz)")
48 |         self.assertEqual(result,
49 |                          [['quote', 'foo'], ['quote', ['bar', 'baz']]])
50 | 
51 |     def test_double_quote(self):
52 |         result = program.parse("''foo")
53 |         self.assertEqual(result, [['quote', ['quote', 'foo']]])
54 | 
55 |     def test_boolean(self):
56 |         result = program.parse('#t #f')
57 |         self.assertEqual(result, [True, False])
58 | 
59 |     def test_comments(self):
60 |         result = program.parse(
61 |             """
62 |             ; a program with a comment
63 |             (           foo ; that's a foo
64 |             bar )
65 |             ; some comments at the end
66 |             """
67 |         )
68 | 
69 |         self.assertEqual(result, [['foo', 'bar']])
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     unittest.main()
74 | 


--------------------------------------------------------------------------------
/examples/simple_eval.py:
--------------------------------------------------------------------------------
 1 | from parsy import digit, generate, match_item, regex, string, success, test_item
 2 | 
 3 | 
 4 | def lexer(code):
 5 |     whitespace = regex(r'\s*')
 6 |     integer = digit.at_least(1).concat().map(int)
 7 |     float_ = (
 8 |         digit.many() + string('.').result(['.']) + digit.many()
 9 |     ).concat().map(float)
10 |     parser = whitespace >> ((
11 |         float_ | integer  | regex(r'[()*/+-]')
12 |     ) << whitespace).many()
13 |     return parser.parse(code)
14 | 
15 | 
16 | def eval_tokens(tokens):
17 |     # This function parses and evaluates at the same time.
18 | 
19 |     lparen = match_item('(')
20 |     rparen = match_item(')')
21 | 
22 |     @generate
23 |     def additive():
24 |         res = yield multiplicative
25 |         sign = match_item('+') | match_item('-')
26 |         while True:
27 |             operation = yield sign | success('')
28 |             if not operation:
29 |                 break
30 |             operand = yield multiplicative
31 |             if operation == '+':
32 |                 res += operand
33 |             elif operation == '-':
34 |                 res -= operand
35 |         return res
36 | 
37 |     @generate
38 |     def multiplicative():
39 |         res = yield simple
40 |         op = match_item('*') | match_item('/')
41 |         while True:
42 |             operation = yield op | success('')
43 |             if not operation:
44 |                 break
45 |             operand = yield simple
46 |             if operation == '*':
47 |                 res *= operand
48 |             elif operation == '/':
49 |                 res /= operand
50 |         return res
51 | 
52 |     @generate
53 |     def number():
54 |         sign = yield match_item('+') | match_item('-') | success('+')
55 |         value = yield test_item(
56 |             lambda x: isinstance(x, (int, float)), 'number')
57 |         return value if sign == '+' else -value
58 | 
59 |     expr = additive
60 |     simple = (lparen >> expr << rparen) | number
61 | 
62 |     return expr.parse(tokens)
63 | 
64 | 
65 | def simple_eval(expr):
66 |     return eval_tokens(lexer(expr))
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     print(simple_eval(input()))
71 | 


--------------------------------------------------------------------------------
/docs/ref/parser_instances.rst:
--------------------------------------------------------------------------------
 1 | =============================
 2 | Creating new Parser instances
 3 | =============================
 4 | 
 5 | .. currentmodule:: parsy
 6 | 
 7 | Normally you will create Parser instances using the provided :doc:`primitives
 8 | </ref/primitives>` and :doc:`combinators </ref/methods_and_combinators>`.
 9 | 
10 | However it is also possible to create them manually, as below.
11 | 
12 | The :class:`Parser` constructor should be passed a function that takes the
13 | string/list to be parsed, and returns a and returns a :class:`Result` object.
14 | The ``Result`` object will be created either using :meth:`Result.success` or
15 | :meth:`Result.failure` to indicate success or failure respectively.
16 | :meth:`Result.success` should be passed the next index to continue parsing with,
17 | and the value that is returned from the parsing. :meth:`Result.failure` should
18 | return the index at which failure occurred i.e. the index passed in, and a
19 | string indicating what the parser expected to find.
20 | 
21 | The ``Parser`` constructor will usually be called using decorator syntax. In
22 | order to pass parameters to the ``Parser`` instance, it is typically created
23 | using a closure. In the example below, we create a parser that matches any
24 | string/list of tokens of a given length. This could also be written as something
25 | like ``any_char.times(n).concat()`` but the following will be more efficient:
26 | 
27 | 
28 | .. code-block:: python
29 | 
30 |    def consume(n):
31 | 
32 |        @Parser
33 |        def consumer(stream, index):
34 |            items = stream[index:index + n]
35 |            if len(items) == n:
36 |                return Result.success(index + n, items)
37 |            else:
38 |                return Result.failure(index, "{0} items".format(n))
39 | 
40 |        return consumer
41 | 
42 | 
43 | .. code-block:: python
44 | 
45 |    >>> consume(3).many().parse('abc123def')
46 |    ['abc', '123', 'def']
47 | 
48 | 
49 | Result objects
50 | ==============
51 | 
52 | .. class:: Result
53 | 
54 |    .. staticmethod:: success(next_index, value)
55 | 
56 |       Creates a ``Result`` object indicating parsing succeeded. The index to
57 |       continue parsing at, and the value retrieved from the parsing, should be
58 |       passed.
59 | 
60 |    .. staticmethod:: failure(index, expected)
61 | 
62 |       Creates a ``Result`` object indicating parsing failed. The index to
63 |       continue parsing at, and a string representing what the parser expected to
64 |       find, should be passed.
65 | 


--------------------------------------------------------------------------------
/docs/howto/lexing.rst:
--------------------------------------------------------------------------------
 1 | =====================================
 2 |  Separate lexing/tokenization phases
 3 | =====================================
 4 | 
 5 | .. currentmodule:: parsy
 6 | 
 7 | Most of the documentation in parsy assumes that when you call
 8 | :meth:`Parser.parse` you will pass a string, and will get back your final
 9 | parsed, constructed object (of whatever type you desire).
10 | 
11 | A more classical approach to parsing is that you first have a
12 | lexing/tokenization phase, the result of which is a simple list of tokens. These
13 | tokens could be strings, or other objects.
14 | 
15 | You then have a separate parsing phase that consumes this list of tokens, and
16 | produces your final object, which is very often a tree-like structure or other
17 | complex object.
18 | 
19 | Parsy can actually work with either approach. Further, for the split
20 | lexing/parsing approach, parsy can be used either to implement the lexer, or the
21 | parser, or both! The following examples use parsy to do both lexing and parsing.
22 | 
23 | Turtle Logo
24 | ===========
25 | 
26 | For our first example, we'll do a very stripped down Turtle Logo parser. First,
27 | the lexer:
28 | 
29 | .. literalinclude:: ../../examples/simple_logo_lexer.py
30 |    :language: python
31 | 
32 | 
33 | We are not interested in whitespace, so our lexer removes it all, apart from
34 | newlines. We can now parse a program into the tokens we are interested in:
35 | 
36 | .. code-block:: python
37 | 
38 |    >>> l = lexer.parse("fd 1\nbk 2")
39 |    >>> l
40 |    ['fd', 1, '\n', 'bk', 2, '\n']
41 | 
42 | The ``line`` parser produces a list, so after applying ``many`` which also
43 | produces a list, we applied a level of flattening so that we end up with a
44 | simple list of tokens. We also chose to convert the parameters to integers while
45 | we were at it, so in this case our list of tokens is not a list of strings, but
46 | heterogeneous.
47 | 
48 | The next step is the parser. We create some classes to represent different
49 | commands, and then use parsy again to create a parser which is very simple
50 | because this is a very limited language:
51 | 
52 | .. literalinclude:: ../../examples/simple_logo_parser.py
53 |    :language: python
54 | 
55 | To use it, we pass the the list of tokens generated above into
56 | ``program.parse``:
57 | 
58 | .. code-block:: python
59 | 
60 |    >>> program.parse(l)
61 |    [Forward(1), Backward(2)]
62 | 
63 | In a real implementation, we could then have ``execute`` methods on the
64 | ``Command`` sub-classes if we wanted to implement an interpreter, for example.
65 | 
66 | Calculator
67 | ==========
68 | 
69 | Our second example illustrates lexing and then parsing a sequence of
70 | mathematical operations, e.g "1 + 2 * (3 - 4.5)", with precedence.
71 | 
72 | In this case, while doing the parsing stage, instead of building up an AST of
73 | objects representing the operations, the parser actually evaluates the
74 | expression.
75 | 
76 | .. literalinclude:: ../../examples/simple_eval.py
77 |    :language: python
78 | 


--------------------------------------------------------------------------------
/docs/overview.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 | Overview
 3 | ========
 4 | 
 5 | Parsy is an easy way to combine simple, small parsers into complex, larger
 6 | parsers.
 7 | 
 8 | If it means anything to you, it's a monadic parser combinator library for
 9 | LL(infinity) grammars in the spirit of `Parsec
10 | <https://github.com/haskell/parsec>`_, `Parsnip
11 | <http://parsnip-parser.sourceforge.net/>`_, and `Parsimmon
12 | <https://github.com/jneen/parsimmon>`_.
13 | 
14 | If that means nothing, rest assured that parsy is a very straightforward and
15 | Pythonic solution for parsing text that doesn't require knowing anything about
16 | monads.
17 | 
18 | Parsy differentiates itself from other solutions with the following:
19 | 
20 | * it is not a parser generator, but a combinator based parsing library.
21 | * a very clean implementation, only a few hundred lines, that borrows
22 |   from the best of recent combinator libraries.
23 | * free, good quality documentation, all in one place. (Please raise an issue on
24 |   GitHub if you have any problems, or find the documentation lacking in any
25 |   way).
26 | * it avoids mutability, and therefore a ton of related bugs.
27 | * it has monadic binding with a :doc:`nice syntax </ref/generating>`. In plain
28 |   English:
29 | 
30 |   * we can easily handle cases where later parsing depends on the value of
31 |     something parsed earlier e.g. Hollerith constants.
32 |   * it's easy to build up complex result objects, rather than having lists of
33 |     lists etc.
34 |   * there is no need for things like `pyparsing's Forward class
35 |     <http://infohost.nmt.edu/tcc/help/pubs/pyparsing/web/class-Forward.html>`_ .
36 | 
37 | * it has a minimalist philosophy. It doesn't include built-in helpers for any
38 |   specific grammars or languages, but provides building blocks for making these.
39 | 
40 | Basic usage looks like this:
41 | 
42 | Example 1 - parsing a set of alternatives:
43 | 
44 | .. code-block:: python
45 | 
46 |    >>> from parsy import string
47 |    >>> parser = (string('Dr.') | string('Mr.') | string('Mrs.')).desc("title")
48 |    >>> parser.parse('Mrs.')
49 |    'Mrs.'
50 |    >>> parser.parse('Mr.')
51 |    'Mr.'
52 | 
53 |    >>> parser.parse('Joe')
54 |    ParseError: expected title at 0:0
55 | 
56 |    >>> parser.parse_partial('Dr. Who')
57 |    ('Dr.', ' Who')
58 | 
59 | Example 2 - Parsing a dd-mm-yy date:
60 | 
61 | .. code-block:: python
62 | 
63 |    >>> from parsy import string, regex
64 |    >>> from datetime import date
65 |    >>> ddmmyy = regex(r'[0-9]{2}').map(int).sep_by(string("-"), min=3, max=3).combine(
66 |    ...                lambda d, m, y: date(2000 + y, m, d))
67 |    >>> ddmmyy.parse('06-05-14')
68 |    datetime.date(2014, 5, 6)
69 | 
70 | 
71 | To learn how to use parsy, you should continue with:
72 | 
73 | * the :doc:`tutorial </tutorial>`, especially if you are not familiar with this
74 |   type of parser library.
75 | * the :doc:`parser generator decorator </ref/generating>`
76 | * the :doc:`builtin parser primitives </ref/primitives>`
77 | * the :doc:`method and combinator reference </ref/methods_and_combinators>`
78 | 
79 | Other Python projects
80 | =====================
81 | 
82 | * `pyparsing <http://pyparsing.wikispaces.com/>`_. Also a combinator approach,
83 |   but in general much less cleanly implemented, and rather scattered
84 |   documentation.
85 | 
86 | * `funcparserlib <https://github.com/vlasovskikh/funcparserlib>`_ - the most
87 |   similar to parsy. It differs from parsy mainly in normally using a separate
88 |   tokenization phase, lacking the convenience of the :func:`generate` method for
89 |   creating parsers, and documentation that relies on understanding Haskell type
90 |   annotations.
91 | 
92 | * `Lark <https://github.com/erezsh/lark>`_. With Lark you write a grammar
93 |   definition in a separate mini-language as a string, and have a parser
94 |   generated for you, rather than writing the grammar in Python. It has the
95 |   advantage of speed and being able to use different parsing algorithms.
96 | 


--------------------------------------------------------------------------------
/docs/ref/generating.rst:
--------------------------------------------------------------------------------
  1 | ===================
  2 | Generating a parser
  3 | ===================
  4 | 
  5 | .. currentmodule:: parsy
  6 | .. function:: generate
  7 | 
  8 | ``generate`` converts a generator function (one that uses the ``yield`` keyword)
  9 | into a parser. The generator function must yield parsers. These parsers are
 10 | applied successively and their results are sent back to the generator using the
 11 | ``.send()`` protocol. The generator function should return the final result of
 12 | the parsing. Alternatively it can return another parser, which is equivalent to
 13 | applying it and returning its result.
 14 | 
 15 | Motivation and examples
 16 | =======================
 17 | 
 18 | Constructing parsers by using combinators and :class:`Parser` methods to make
 19 | larger parsers works well for many simpler cases. However, for more complex
 20 | cases the ``generate`` function decorator is both more readable and more
 21 | powerful.
 22 | 
 23 | Alternative syntax to combinators
 24 | ---------------------------------
 25 | 
 26 | The first example just shows a different way of building a parser that could
 27 | have easily been using combinators:
 28 | 
 29 | .. code:: python
 30 | 
 31 |    from parsy import generate
 32 | 
 33 |    @generate("form")
 34 |    def form():
 35 |        """
 36 |        Parse an s-expression form, like (a b c).
 37 |        An equivalent to lparen >> expr.many() << rparen
 38 |        """
 39 |        yield lparen
 40 |        exprs = yield expr.many()
 41 |        yield rparen
 42 |        return exprs
 43 | 
 44 | In the example above, the parser was given a string name ``"form"``, which does
 45 | the same as :meth:`Parser.desc`. This is not required, as per the examples below.
 46 | 
 47 | Note that there is no guarantee that the entire function is executed: if any of
 48 | the yielded parsers fails, the function will not complete, and parsy will try to
 49 | backtrack to an alternative parser if there is one.
 50 | 
 51 | Building complex objects
 52 | ------------------------
 53 | 
 54 | The second example shows how you can use multiple parse results to build up a
 55 | complex object:
 56 | 
 57 | .. code:: python
 58 | 
 59 |    from datetime import date
 60 | 
 61 |    from parsy import generate, regex, string
 62 | 
 63 |    @generate
 64 |    def date():
 65 |        """
 66 |        Parse a date in the format YYYY-MM-DD
 67 |        """
 68 |        year = yield regex("[0-9]{4}").map(int)
 69 |        yield string("-")
 70 |        month = yield regex("[0-9]{2}").map(int)
 71 |        yield string("-")
 72 |        day = yield regex("[0-9]{2}").map(int)
 73 | 
 74 |        return date(year, month, day)
 75 | 
 76 | This could also have been achieved using :func:`seq` and :meth:`Parser.combine`.
 77 | 
 78 | Using values already parsed
 79 | ---------------------------
 80 | 
 81 | The third example shows how we can use an earlier parsed value to influence the
 82 | subsequent parsing. This example parses Hollerith constants. Hollerith constants
 83 | are a way of specifying an arbitrary set of characters by first writing the
 84 | integer that specifies the length, followed by the character H, followed by the
 85 | set of characters. For example, ``pancakes`` would be written ``8Hpancakes``.
 86 | 
 87 | .. code:: python
 88 | 
 89 |    from parsy import generate, regex, string, any_char
 90 | 
 91 |    @generate
 92 |    def hollerith():
 93 |        num = yield regex(r'[0-9]+').map(int)
 94 |        yield string('H')
 95 |        return any_char.times(num).concat()
 96 | 
 97 | (You may want to compare this with an `implementation of Hollerith constants
 98 | <https://gist.github.com/spookylukey/591aa8a6a9af7cf0f1e22129b29288d6>`_ that
 99 | uses `pyparsing <http://pyparsing.wikispaces.com/>`_, originally by John
100 | Shipman from his `pyparsing docs
101 | <http://infohost.nmt.edu/tcc/help/pubs/pyparsing/web/class-Forward.html>`_.)
102 | 
103 | There are also more complex examples in the :ref:`tutorial
104 | <using-previous-values>` of using the ``generate`` decorator to create parsers
105 | where there is logic that is conditional upon earlier parsed values.
106 | 
107 | Implementing recursive definitions
108 | ----------------------------------
109 | 
110 | A fourth examples shows how you can use this syntax for grammars that you would
111 | like to define recursively (or mutually recursively).
112 | 
113 | Say we want to be able to pass an s-expression like syntax which uses
114 | parenthesis for grouping items into a tree structure, like the following::
115 | 
116 |      (0 1 (2 3) (4 5 6) 7 8)
117 | 
118 | A naive approach would be:
119 | 
120 | .. code-block:: python
121 | 
122 |    simple = regex('[0-9]+').map(int)
123 |    group = string('(') >> expr.sep_by(string(' ')) << string(')')
124 |    expr = simple | group
125 | 
126 | The problem is that the second line will get a ``NameError`` because ``expr`` is
127 | not defined yet.
128 | 
129 | Using the ``@generate`` syntax will introduce a level of laziness in resolving
130 | ``expr`` that allows things to work:
131 | 
132 | .. code-block:: python
133 | 
134 |    simple = regex('[0-9]+').map(int)
135 | 
136 |    @generate
137 |    def group():
138 |        return (yield string('(') >> expr.sep_by(string(' ')) << string(')'))
139 | 
140 |    expr = simple | group
141 | 
142 | .. code-block:: python
143 | 
144 |    >>> expr.parse("(0 1 (2 3) (4 5 6) 7 8)")
145 |    [0, 1, [2, 3], [4, 5, 6], 7, 8]
146 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # parsy documentation build configuration file, created by
  5 | # sphinx-quickstart on Mon Sep 25 22:24:17 2017.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | import sys
 22 | sys.path.insert(0, os.path.abspath('../src'))
 23 | 
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #
 29 | # needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = ['sphinx.ext.viewcode']
 35 | 
 36 | # Add any paths that contain templates here, relative to this directory.
 37 | templates_path = ['_templates']
 38 | 
 39 | # The suffix(es) of source filenames.
 40 | # You can specify multiple suffix as a list of string:
 41 | #
 42 | # source_suffix = ['.rst', '.md']
 43 | source_suffix = '.rst'
 44 | 
 45 | # The master toctree document.
 46 | master_doc = 'index'
 47 | 
 48 | # General information about the project.
 49 | project = 'parsy'
 50 | copyright = '2017, Jeanine Adkisson, Luke Plant'
 51 | author = 'Jeanine Adkisson'
 52 | 
 53 | # The version info for the project you're documenting, acts as replacement for
 54 | # |version| and |release|, also used in various other places throughout the
 55 | # built documents.
 56 | #
 57 | # The short X.Y version.
 58 | version = '1.0.1'
 59 | # The full version, including alpha/beta/rc tags.
 60 | release = '1.0.1-dev1'
 61 | 
 62 | # The language for content autogenerated by Sphinx. Refer to documentation
 63 | # for a list of supported languages.
 64 | #
 65 | # This is also used if you do content translation via gettext catalogs.
 66 | # Usually you set "language" from the command line for these cases.
 67 | language = None
 68 | 
 69 | # List of patterns, relative to source directory, that match files and
 70 | # directories to ignore when looking for source files.
 71 | # This patterns also effect to html_static_path and html_extra_path
 72 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 73 | 
 74 | # The name of the Pygments (syntax highlighting) style to use.
 75 | pygments_style = 'sphinx'
 76 | 
 77 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 78 | todo_include_todos = False
 79 | 
 80 | 
 81 | # -- Options for HTML output ----------------------------------------------
 82 | 
 83 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 84 | # a list of builtin themes.
 85 | #
 86 | html_theme = 'default'
 87 | 
 88 | # Theme options are theme-specific and customize the look and feel of a theme
 89 | # further.  For a list of options available for each theme, see the
 90 | # documentation.
 91 | #
 92 | # html_theme_options = {}
 93 | 
 94 | # Add any paths that contain custom static files (such as style sheets) here,
 95 | # relative to this directory. They are copied after the builtin static files,
 96 | # so a file named "default.css" will overwrite the builtin "default.css".
 97 | html_static_path = ['_static']
 98 | 
 99 | 
100 | # -- Options for HTMLHelp output ------------------------------------------
101 | 
102 | # Output file base name for HTML help builder.
103 | htmlhelp_basename = 'parsydoc'
104 | 
105 | 
106 | # -- Options for LaTeX output ---------------------------------------------
107 | 
108 | latex_elements = {
109 |     # The paper size ('letterpaper' or 'a4paper').
110 |     #
111 |     # 'papersize': 'letterpaper',
112 | 
113 |     # The font size ('10pt', '11pt' or '12pt').
114 |     #
115 |     # 'pointsize': '10pt',
116 | 
117 |     # Additional stuff for the LaTeX preamble.
118 |     #
119 |     # 'preamble': '',
120 | 
121 |     # Latex figure (float) alignment
122 |     #
123 |     # 'figure_align': 'htbp',
124 | }
125 | 
126 | # Grouping the document tree into LaTeX files. List of tuples
127 | # (source start file, target name, title,
128 | #  author, documentclass [howto, manual, or own class]).
129 | latex_documents = [
130 |     (master_doc, 'parsy.tex', 'parsy Documentation',
131 |      'Jeanine Adkisson', 'manual'),
132 | ]
133 | 
134 | 
135 | # -- Options for manual page output ---------------------------------------
136 | 
137 | # One entry per manual page. List of tuples
138 | # (source start file, name, description, authors, manual section).
139 | man_pages = [
140 |     (master_doc, 'parsy', 'parsy Documentation',
141 |      [author], 1)
142 | ]
143 | 
144 | 
145 | # -- Options for Texinfo output -------------------------------------------
146 | 
147 | # Grouping the document tree into Texinfo files. List of tuples
148 | # (source start file, target name, title, author,
149 | #  dir menu entry, description, category)
150 | texinfo_documents = [
151 |     (master_doc, 'parsy', 'parsy Documentation',
152 |      author, 'parsy', 'One line description of project.',
153 |      'Miscellaneous'),
154 | ]
155 | 


--------------------------------------------------------------------------------
/docs/ref/primitives.rst:
--------------------------------------------------------------------------------
  1 | ==================
  2 | Parsing primitives
  3 | ==================
  4 | 
  5 | These are the lowest level building blocks for creating parsers.
  6 | 
  7 | .. module:: parsy
  8 | 
  9 | .. function:: string(expected_string)
 10 | 
 11 |    Returns a parser that expects the ``expected_string`` and produces
 12 |    that string value.
 13 | 
 14 | .. function:: regex(exp, flags=0)
 15 | 
 16 |    Returns a parser that expects the given ``exp``, and produces the
 17 |    matched string. ``exp`` can be a compiled regular expression, or a
 18 |    string which will be compiled with the given ``flags``.
 19 | 
 20 |    Using a regex parser for small building blocks, instead of building up
 21 |    parsers from primitives like :func:`string`, :func:`test_char` and
 22 |    :meth:`Parser.times` combinators etc., can have several advantages,
 23 |    including:
 24 | 
 25 |    * It can be more succinct e.g. compare:
 26 | 
 27 |      .. code-block:: python
 28 | 
 29 |         >>> (string('a') | string('b')).times(1, 4)
 30 |         >>> regex(r'[ab]{1,4}')
 31 | 
 32 |    * It will return the entire matched string as a single item,
 33 |      so you don't need to use :meth:`Parser.concat`.
 34 |    * It can be much faster.
 35 | 
 36 | .. function:: test_char(func, description)
 37 | 
 38 |    Returns a parser that tests a single character with the callable
 39 |    ``func``. If ``func`` returns ``True``, the parse succeeds, otherwise
 40 |    the parse fails with the description ``description``.
 41 | 
 42 |    .. code-block:: python
 43 | 
 44 |       >>> ascii = test_char(lambda c: ord(c) < 128,
 45 |       ...                   'ascii character')
 46 |       >>> ascii.parse('A')
 47 |       'A'
 48 | 
 49 | .. function:: test_item(func, description)
 50 | 
 51 |    Returns a parser that tests a single item from the list of items being
 52 |    consumed, using the callable ``func``. If ``func`` returns ``True``, the
 53 |    parse succeeds, otherwise the parse fails with the description
 54 |    ``description``.
 55 | 
 56 |    If you are parsing a string, i.e. a list of characters, you can use
 57 |    :func:`test_char` instead. (In fact the implementations are identical, these
 58 |    functions are aliases for the sake of clear code).
 59 | 
 60 |    .. code-block:: python
 61 | 
 62 |       >>> numeric = test_item(str.isnumeric, 'numeric')
 63 |       >>> numeric.many().parse(['123', '456'])
 64 |       ['123', '456']
 65 | 
 66 | .. function:: char_from(characters)
 67 | 
 68 |    Accepts a string and returns a parser that matches and returns one character
 69 |    from the string.
 70 | 
 71 |    .. code-block:: python
 72 | 
 73 |       >>> char_from('abc').parse('a')
 74 |       'a'
 75 | 
 76 | .. function:: string_from(*strings)
 77 | 
 78 |    Accepts a sequence of strings as positional arguments, and returns a parser
 79 |    that matches and returns one string from the list. The list is first sorted
 80 |    in descending length order, so that overlapping strings are handled correctly
 81 |    by checking the longest one first.
 82 | 
 83 |    .. code-block:: python
 84 | 
 85 |       >>> string_from('y', 'yes').parse('yes')
 86 |       'yes'
 87 | 
 88 | 
 89 | .. function:: match_item(item, description=None)
 90 | 
 91 |    Returns a parser that tests the next item (or character) from the stream (or
 92 |    string) for equality against the provided item. Optionally a string
 93 |    description can be passed.
 94 | 
 95 |    Parsing a string:
 96 | 
 97 |    >>> letter_A = match_item('A')
 98 |    >>> letter_A.parse_partial('ABC')
 99 |    ('A', 'BC')
100 | 
101 |    Parsing a list of tokens:
102 | 
103 |    >>> hello = match_item('hello')
104 |    >>> hello.parse_partial(['hello', 'how', 'are', 'you'])
105 |    ('hello', ['how', 'are', 'you'])
106 | 
107 | .. function:: success(val)
108 | 
109 |    Returns a parser that does not consume any of the stream, but
110 |    produces ``val``.
111 | 
112 | .. function:: fail(expected)
113 | 
114 |    Returns a parser that always fails with the provided error message.
115 | 
116 | Pre-built parsers
117 | =================
118 | 
119 | Some common, pre-built parsers (all of these are :class:`Parser` objects created
120 | using the primitives above):
121 | 
122 | 
123 | .. data:: any_char
124 | 
125 |    A parser that matches any single character.
126 | 
127 | .. data:: whitespace
128 | 
129 |    A parser that matches and returns one or more whitespace characters.
130 | 
131 | .. data:: letter
132 | 
133 |    A parser that matches and returns a single letter, as defined by
134 |    `str.isalpha <https://docs.python.org/3/library/stdtypes.html#str.isalpha>`_.
135 | 
136 | .. data:: digit
137 | 
138 |    A parser that matches and returns a single digit, as defined by `str.isdigit
139 |    <https://docs.python.org/3/library/stdtypes.html#str.isdigit>`_. Note that
140 |    this includes various unicode characters outside of the normal 0-9 range,
141 |    such as ¹²³.
142 | 
143 | .. data:: decimal_digit
144 | 
145 |    A parser that matches and returns a single decimal digit, one of
146 |    "0123456789".
147 | 
148 | .. data:: line_info
149 | 
150 |    A parser that consumes no input and always just returns the current line
151 |    information, a tuple of (line, column), zero-indexed, where lines are
152 |    terminated by ``\n``. This is normally useful when wanting to build more
153 |    debugging information into parse failure error messages.
154 | 
155 | .. data:: index
156 | 
157 |    A parser that consumes no input and always just returns the current stream
158 |    index. This is normally useful when wanting to build more debugging
159 |    information into parse failure error messages.
160 | 


--------------------------------------------------------------------------------
/src/parsy/__init__.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*- #
  2 | 
  3 | import operator
  4 | import re
  5 | from .version import __version__  # noqa: F401
  6 | from functools import wraps
  7 | from collections import namedtuple
  8 | 
  9 | 
 10 | def line_info_at(stream, index):
 11 |     if index > len(stream):
 12 |         raise ValueError("invalid index")
 13 |     line = stream.count("\n", 0, index)
 14 |     last_nl = stream.rfind("\n", 0, index)
 15 |     col = index - (last_nl + 1)
 16 |     return (line, col)
 17 | 
 18 | 
 19 | class ParseError(RuntimeError):
 20 |     def __init__(self, expected, stream, index):
 21 |         self.expected = expected
 22 |         self.stream = stream
 23 |         self.index = index
 24 | 
 25 |     def line_info(self):
 26 |         try:
 27 |             return '{}:{}'.format(*line_info_at(self.stream, self.index))
 28 |         except (TypeError, AttributeError):  # not a str
 29 |             return str(self.index)
 30 | 
 31 |     def __str__(self):
 32 |         expected_list = sorted(repr(e) for e in self.expected)
 33 | 
 34 |         if len(expected_list) == 1:
 35 |             return 'expected {} at {}'.format(expected_list[0], self.line_info())
 36 |         else:
 37 |             return 'expected one of {} at {}'.format(', '.join(expected_list), self.line_info())
 38 | 
 39 | 
 40 | class Result(namedtuple('Result', 'status index value furthest expected')):
 41 |     @staticmethod
 42 |     def success(index, value):
 43 |         return Result(True, index, value, -1, frozenset())
 44 | 
 45 |     @staticmethod
 46 |     def failure(index, expected):
 47 |         return Result(False, -1, None, index, frozenset([expected]))
 48 | 
 49 |     # collect the furthest failure from self and other
 50 |     def aggregate(self, other):
 51 |         if not other:
 52 |             return self
 53 | 
 54 |         if self.furthest > other.furthest:
 55 |             return self
 56 |         elif self.furthest == other.furthest:
 57 |             # if we both have the same failure index, we combine the expected messages.
 58 |             return Result(self.status, self.index, self.value, self.furthest, self.expected | other.expected)
 59 |         else:
 60 |             return Result(self.status, self.index, self.value, other.furthest, other.expected)
 61 | 
 62 | 
 63 | class Parser(object):
 64 |     """
 65 |     A Parser is an object that wraps a function whose arguments are
 66 |     a string to be parsed and the index on which to begin parsing.
 67 |     The function should return either Result.success(next_index, value),
 68 |     where the next index is where to continue the parse and the value is
 69 |     the yielded value, or Result.failure(index, expected), where expected
 70 |     is a string indicating what was expected, and the index is the index
 71 |     of the failure.
 72 |     """
 73 | 
 74 |     def __init__(self, wrapped_fn):
 75 |         self.wrapped_fn = wrapped_fn
 76 | 
 77 |     def __call__(self, stream, index):
 78 |         return self.wrapped_fn(stream, index)
 79 | 
 80 |     def parse(self, stream):
 81 |         """Parse a string or list of tokens and return the result or raise a ParseError."""
 82 |         (result, _) = (self << eof).parse_partial(stream)
 83 |         return result
 84 | 
 85 |     def parse_partial(self, stream):
 86 |         """
 87 |         Parse the longest possible prefix of a given string.
 88 |         Return a tuple of the result and the rest of the string,
 89 |         or raise a ParseError.
 90 |         """
 91 |         result = self(stream, 0)
 92 | 
 93 |         if result.status:
 94 |             return (result.value, stream[result.index:])
 95 |         else:
 96 |             raise ParseError(result.expected, stream, result.furthest)
 97 | 
 98 |     def bind(self, bind_fn):
 99 |         @Parser
100 |         def bound_parser(stream, index):
101 |             result = self(stream, index)
102 | 
103 |             if result.status:
104 |                 next_parser = bind_fn(result.value)
105 |                 return next_parser(stream, result.index).aggregate(result)
106 |             else:
107 |                 return result
108 | 
109 |         return bound_parser
110 | 
111 |     def map(self, map_fn):
112 |         return self.bind(lambda res: success(map_fn(res)))
113 | 
114 |     def combine(self, combine_fn):
115 |         return self.bind(lambda res: success(combine_fn(*res)))
116 | 
117 |     def concat(self):
118 |         return self.map(''.join)
119 | 
120 |     def then(self, other):
121 |         return seq(self, other).combine(lambda left, right: right)
122 | 
123 |     def skip(self, other):
124 |         return seq(self, other).combine(lambda left, right: left)
125 | 
126 |     def result(self, res):
127 |         return self >> success(res)
128 | 
129 |     def many(self):
130 |         return self.times(0, float('inf'))
131 | 
132 |     def times(self, min, max=None):
133 |         # max=None means exactly min
134 |         # min=max=None means from 0 to infinity
135 |         if max is None:
136 |             max = min
137 | 
138 |         @Parser
139 |         def times_parser(stream, index):
140 |             values = []
141 |             times = 0
142 |             result = None
143 | 
144 |             while times < max:
145 |                 result = self(stream, index).aggregate(result)
146 |                 if result.status:
147 |                     values.append(result.value)
148 |                     index = result.index
149 |                     times += 1
150 |                 elif times >= min:
151 |                     break
152 |                 else:
153 |                     return result
154 | 
155 |             return Result.success(index, values).aggregate(result)
156 | 
157 |         return times_parser
158 | 
159 |     def at_most(self, n):
160 |         return self.times(0, n)
161 | 
162 |     def at_least(self, n):
163 |         return self.times(n) + self.many()
164 | 
165 |     def sep_by(self, sep, *, min=0, max=float('inf')):
166 |         zero_times = success([])
167 |         if max == 0:
168 |             return zero_times
169 |         res = self.times(1) + (sep >> self).times(min - 1, max - 1)
170 |         if min == 0:
171 |             res |= zero_times
172 |         return res
173 | 
174 |     def desc(self, description):
175 |         @Parser
176 |         def desc_parser(stream, index):
177 |             result = self(stream, index)
178 |             if result.status:
179 |                 return result
180 |             else:
181 |                 return Result.failure(index, description)
182 | 
183 |         return desc_parser
184 | 
185 |     def mark(self):
186 |         @generate
187 |         def marked():
188 |             start = yield line_info
189 |             body = yield self
190 |             end = yield line_info
191 |             return (start, body, end)
192 | 
193 |         return marked
194 | 
195 |     def should_fail(self, description):
196 |         @Parser
197 |         def fail_parser(stream, index):
198 |             res = self(stream, index)
199 |             if res.status:
200 |                 return Result.failure(index, description)
201 |             return Result.success(index, res)
202 | 
203 |         return fail_parser
204 | 
205 |     def __add__(self, other):
206 |         return seq(self, other).combine(operator.add)
207 | 
208 |     def __mul__(self, other):
209 |         if isinstance(other, range):
210 |             return self.times(other.start, other.stop - 1)
211 |         return self.times(other)
212 | 
213 |     def __or__(self, other):
214 |         return alt(self, other)
215 | 
216 |     # haskelley operators, for fun #
217 | 
218 |     # >>
219 |     def __rshift__(self, other):
220 |         return self.then(other)
221 | 
222 |     # <<
223 |     def __lshift__(self, other):
224 |         return self.skip(other)
225 | 
226 | 
227 | def alt(*parsers):
228 |     if not parsers:
229 |         return fail('<empty alt>')
230 | 
231 |     @Parser
232 |     def alt_parser(stream, index):
233 |         result = None
234 |         for parser in parsers:
235 |             result = parser(stream, index).aggregate(result)
236 |             if result.status:
237 |                 return result
238 | 
239 |         return result
240 | 
241 |     return alt_parser
242 | 
243 | 
244 | def seq(*parsers):
245 |     """
246 |     Takes a list of list of parsers, runs them in order,
247 |     and collects their individuals results in a list
248 |     """
249 |     if not parsers:
250 |         return success([])
251 | 
252 |     @Parser
253 |     def seq_parser(stream, index):
254 |         result = None
255 |         values = []
256 |         for parser in parsers:
257 |             result = parser(stream, index).aggregate(result)
258 |             if not result.status:
259 |                 return result
260 |             index = result.index
261 |             values.append(result.value)
262 | 
263 |         return Result.success(index, values).aggregate(result)
264 | 
265 |     return seq_parser
266 | 
267 | 
268 | # combinator syntax
269 | def generate(fn):
270 |     if isinstance(fn, str):
271 |         return lambda f: generate(f).desc(fn)
272 | 
273 |     @Parser
274 |     @wraps(fn)
275 |     def generated(stream, index):
276 |         # start up the generator
277 |         iterator = fn()
278 | 
279 |         result = None
280 |         value = None
281 |         try:
282 |             while True:
283 |                 next_parser = iterator.send(value)
284 |                 result = next_parser(stream, index).aggregate(result)
285 |                 if not result.status:
286 |                     return result
287 |                 value = result.value
288 |                 index = result.index
289 |         except StopIteration as stop:
290 |             returnVal = stop.value
291 |             if isinstance(returnVal, Parser):
292 |                 return returnVal(stream, index).aggregate(result)
293 | 
294 |             return Result.success(index, returnVal).aggregate(result)
295 | 
296 |     return generated
297 | 
298 | 
299 | index = Parser(lambda _, index: Result.success(index, index))
300 | line_info = Parser(lambda stream, index: Result.success(index, line_info_at(stream, index)))
301 | 
302 | 
303 | def success(val):
304 |     return Parser(lambda _, index: Result.success(index, val))
305 | 
306 | 
307 | def fail(expected):
308 |     return Parser(lambda _, index: Result.failure(index, expected))
309 | 
310 | 
311 | def string(s):
312 |     slen = len(s)
313 | 
314 |     @Parser
315 |     def string_parser(stream, index):
316 |         if stream[index:index + slen] == s:
317 |             return Result.success(index + slen, s)
318 |         else:
319 |             return Result.failure(index, s)
320 | 
321 |     return string_parser
322 | 
323 | 
324 | def regex(exp, flags=0):
325 |     if isinstance(exp, str):
326 |         exp = re.compile(exp, flags)
327 | 
328 |     @Parser
329 |     def regex_parser(stream, index):
330 |         match = exp.match(stream, index)
331 |         if match:
332 |             return Result.success(match.end(), match.group(0))
333 |         else:
334 |             return Result.failure(index, exp.pattern)
335 | 
336 |     return regex_parser
337 | 
338 | 
339 | def test_item(func, description):
340 |     @Parser
341 |     def test_item_parser(stream, index):
342 |         if index < len(stream):
343 |             item = stream[index]
344 |             if func(item):
345 |                 return Result.success(index + 1, item)
346 |         return Result.failure(index, description)
347 | 
348 |     return test_item_parser
349 | 
350 | 
351 | def test_char(func, description):
352 |     # Implementation is identical to test_item
353 |     return test_item(func, description)
354 | 
355 | 
356 | def match_item(item, description=None):
357 |     if description is None:
358 |         description = str(item)
359 |     return test_item(lambda i: item == i, description)
360 | 
361 | 
362 | def string_from(*strings):
363 |     # Sort longest first, so that overlapping options work correctly
364 |     return alt(*map(string, sorted(strings, key=len, reverse=True)))
365 | 
366 | 
367 | def char_from(string):
368 |     return test_char(lambda c: c in string, "[" + string + "]")
369 | 
370 | 
371 | any_char = test_char(lambda c: True, "any character")
372 | 
373 | whitespace = regex(r'\s+')
374 | 
375 | letter = test_char(lambda c: c.isalpha(), 'a letter')
376 | 
377 | digit = test_char(lambda c: c.isdigit(), 'a digit')
378 | 
379 | decimal_digit = char_from("0123456789")
380 | 
381 | 
382 | @Parser
383 | def eof(stream, index):
384 |     if index >= len(stream):
385 |         return Result.success(index, None)
386 |     else:
387 |         return Result.failure(index, 'EOF')
388 | 


--------------------------------------------------------------------------------
/docs/ref/methods_and_combinators.rst:
--------------------------------------------------------------------------------
  1 | =========================================
  2 | Parser methods, operators and combinators
  3 | =========================================
  4 | 
  5 | Parser methods
  6 | ==============
  7 | 
  8 | Parser objects are returned by any of the built-in parser :doc:`primitives`. They
  9 | can be used and manipulated as below.
 10 | 
 11 | .. currentmodule:: parsy
 12 | 
 13 | .. class:: Parser
 14 | 
 15 |    .. method:: __init__(wrapped_fn)
 16 | 
 17 |       This is a low level function to create new parsers that is used internally
 18 |       but is rarely needed by users of the parsy library. It should be passed a
 19 |       parsing function, which takes two arguments - a string/list to be parsed
 20 |       and the current index into the list - and returns a :class:`Result` object,
 21 |       as described in :doc:`/ref/parser_instances`.
 22 | 
 23 |    The following methods are for actually **using** the parsers that you have
 24 |    created:
 25 | 
 26 |    .. method:: parse(string_or_list)
 27 | 
 28 |       Attempts to parse the given string (or list). If the parse is successful
 29 |       and consumes the entire string, the result is returned - otherwise, a
 30 |       ``ParseError`` is raised.
 31 | 
 32 |       Instead of passing a string, you can in fact pass a list of tokens. Almost
 33 |       all the examples assume strings for simplicity. Some of the primitives are
 34 |       also clearly string specific, and a few of the combinators (such as
 35 |       :meth:`Parser.concat`) are string specific, but most of the rest of the
 36 |       library will work with tokens just as well. See :doc:`/howto/lexing` for
 37 |       more information.
 38 | 
 39 |    .. method:: parse_partial(string_or_list)
 40 | 
 41 |       Similar to ``parse``, except that it does not require the entire
 42 |       string (or list) to be consumed. Returns a tuple of
 43 |       ``(result, remainder)``, where ``remainder`` is the part of
 44 |       the string (or list) that was left over.
 45 | 
 46 |    The following methods are essentially **combinators** that produce new
 47 |    parsers from the existing one. They are provided as methods on ``Parser`` for
 48 |    convenience. More combinators are documented below.
 49 | 
 50 |    .. method:: desc(string)
 51 | 
 52 |       Adds a desciption to the parser, which is used in the error message
 53 |       if parsing fails.
 54 | 
 55 |       >>> year = regex(r'[0-9]{4}').desc('4 digit year')
 56 |       >>> year.parse('123')
 57 |       ParseError: expected 4 digit year at 0:0
 58 | 
 59 |    .. method:: then(other_parser)
 60 | 
 61 |       Returns a parser which, if the initial parser succeeds, will continue parsing
 62 |       with ``other_parser``. This will produce the value produced by
 63 |       ``other_parser``.
 64 | 
 65 |       .. code:: python
 66 | 
 67 |          >>> string('x').then(string('y')).parse('xy')
 68 |          'y'
 69 | 
 70 |       See also :ref:`parser-rshift`.
 71 | 
 72 |    .. method:: skip(other_parser)
 73 | 
 74 |       Similar to :meth:`Parser.then`, except the resulting parser will use
 75 |       the value produced by the first parser.
 76 | 
 77 |       .. code:: python
 78 | 
 79 |          >>> string('x').skip(string('y')).parse('xy')
 80 |          'x'
 81 | 
 82 |       See also :ref:`parser-lshift`.
 83 | 
 84 |    .. method:: many()
 85 | 
 86 |       Returns a parser that expects the initial parser 0 or more times, and
 87 |       produces a list of the results. Note that this parser does not fail if
 88 |       nothing matches, but instead consumes nothing and produces an empty list.
 89 | 
 90 |       .. code:: python
 91 | 
 92 |          >>> parser = regex(r'[a-z]').many()
 93 |          >>> parser.parse('')
 94 |          []
 95 |          >>> parser.parse('abc')
 96 |          ['a', 'b', 'c']
 97 | 
 98 |    .. method:: times(min [, max=min])
 99 | 
100 |       Returns a parser that expects the initial parser at least ``min`` times,
101 |       and at most ``max`` times, and produces a list of the results. If only one
102 |       argument is given, the parser is expected exactly that number of times.
103 | 
104 |    .. method:: at_most(n)
105 | 
106 |       Returns a parser that expects the initial parser at most ``n`` times, and
107 |       produces a list of the results.
108 | 
109 |    .. method:: at_least(n)
110 | 
111 |       Returns a parser that expects the initial parser at least ``n`` times, and
112 |       produces a list of the results.
113 | 
114 |    .. method:: map(fn)
115 | 
116 |       Returns a parser that transforms the produced value of the initial parser
117 |       with ``fn``.
118 | 
119 |       .. code:: python
120 | 
121 |          >>> regex(r'[0-9]+').map(int).parse('1234')
122 |          1234
123 | 
124 |       This is the simplest way to convert parsed strings into the data types
125 |       that you need.
126 | 
127 |    .. method:: combine(fn)
128 | 
129 |       Returns a parser that transforms the produced values of the initial parser
130 |       with ``fn``, passing the arguments using ``*args`` syntax.
131 | 
132 |       Where the current parser produces an iterable of values, this can be a
133 |       more convenient way to combine them than :meth:`~Parser.map`.
134 | 
135 |       Example 1 - the argument order of our callable already matches:
136 | 
137 |       .. code:: python
138 | 
139 |          >>> from datetime import date
140 |          >>> yyyymmdd = seq(regex(r'[0-9]{4}').map(int),
141 |          ...                regex(r'[0-9]{2}').map(int),
142 |          ...                regex(r'[0-9]{2}').map(int)).combine(date)
143 |          >>> yyyymmdd.parse('20140506')
144 |          datetime.date(2014, 5, 6)
145 | 
146 |       Example 2 - the argument order of our callable doesn't match, and
147 |       we need to adjust a parameter, so we can fix it using a lambda.
148 | 
149 |       .. code:: python
150 | 
151 |          >>> ddmmyy = regex(r'[0-9]{2}').map(int).times(3).combine(
152 |          ...                lambda d, m, y: date(2000 + y, m, d))
153 |          >>> ddmmyy.parse('060514')
154 |          datetime.date(2014, 5, 6)
155 | 
156 |       The equivalent ``lambda`` to use with ``map`` would be ``lambda res:
157 |       date(2000 + res[2], res[1], res[0])``, which is less readable. The version
158 |       with ``combine`` also ensures that exactly 3 items are generated by the
159 |       previous parser, otherwise you get a ``TypeError``.
160 | 
161 |    .. method:: concat()
162 | 
163 |       Returns a parser that concatenates together (as a string) the previously
164 |       produced values. Usually used after :meth:`~Parser.many` and similar
165 |       methods that produce multiple values.
166 | 
167 |       .. code:: python
168 | 
169 |          >>> letter.at_least(1).parse("hello")
170 |          ['h', 'e', 'l', 'l', 'o']
171 |          >>> letter.at_least(1).concat().parse("hello")
172 |          'hello'
173 | 
174 |    .. method:: result(val)
175 | 
176 |       Returns a parser that, if the initial parser succeeds, always produces
177 |       ``val``.
178 | 
179 |       .. code:: python
180 | 
181 |          >>> string('foo').result(42).parse('foo')
182 |          42
183 | 
184 |    .. method:: should_fail(description)
185 | 
186 |       Returns a parser that fails when the initial parser succeeds, and succeeds
187 |       when the initial parser fails (consuming no input). A description must
188 |       be passed which is used in parse failure messages.
189 | 
190 |       This is essentially a negative lookahead:
191 | 
192 |       .. code:: python
193 | 
194 |          >>> p = letter << string(" ").should_fail("not space")
195 |          >>> p.parse('A')
196 |          'A'
197 |          >>> p.parse('A ')
198 |          ParseError: expected 'not space' at 0:1
199 | 
200 |       It is also useful for implementing things like parsing repeatedly until a
201 |       marker:
202 | 
203 |       .. code:: python
204 | 
205 |          >>> (string(";").should_fail("not ;") >> letter).many().concat().parse_partial('ABC;')
206 |          ('ABC', ';')
207 | 
208 |    .. method:: bind(fn)
209 | 
210 |       Returns a parser which, if the initial parser is successful, passes the
211 |       result to ``fn``, and continues with the parser returned from ``fn``.
212 |       This is the monadic binding operation.
213 | 
214 |    .. method:: sep_by(sep, min=0, max=inf)
215 | 
216 |       Like :meth:`Parser.times`, this returns a new parser that repeats
217 |       the initial parser and collects the results in a list, but in this case separated
218 |       by the parser ``sep`` (whose return value is discarded). By default it
219 |       repeats with no limit, but minimum and maximum values can be supplied.
220 | 
221 |       .. code:: python
222 | 
223 |          >>> csv = letter.at_least(1).concat().sep_by(string(","))
224 |          >>> csv.parse("abc,def")
225 |          ['abc', 'def']
226 | 
227 | .. _operators:
228 | 
229 | Parser operators
230 | ================
231 | 
232 | This section describes operators that you can use on :class:`Parser` objects to
233 | build new parsers.
234 | 
235 | 
236 | .. _parser-or:
237 | 
238 | ``|`` operator
239 | --------------
240 | 
241 | ``parser | other_parser``
242 | 
243 | Returns a parser that tries ``parser`` and, if it fails, backtracks
244 | and tries ``other_parser``. These can be chained together.
245 | 
246 | The resulting parser will produce the value produced by the first
247 | successful parser.
248 | 
249 | .. code:: python
250 | 
251 |    >>> parser = string('x') | string('y') | string('z')
252 |    >>> parser.parse('x')
253 |    'x'
254 |    >>> parser.parse('y')
255 |    'y'
256 |    >>> parser.parse('z')
257 |    'z'
258 | 
259 |    >>> (string('x') >> string('y')).parse('xy')
260 |    'y'
261 | 
262 | .. _parser-lshift:
263 | 
264 | ``<<`` operator
265 | ---------------
266 | 
267 | ``parser << other_parser``
268 | 
269 | The same as ``parser.skip(other_parser)`` - see :meth:`Parser.skip`.
270 | 
271 | (Hint - the arrows point at the important parser!)
272 | 
273 | .. code:: python
274 | 
275 |    >>> (string('x') << string('y')).parse('xy')
276 |    'x'
277 | 
278 | .. _parser-rshift:
279 | 
280 | ``>>`` operator
281 | ---------------
282 | 
283 | ``parser >> other_parser``
284 | 
285 | The same as ``parser.then(other_parser)`` - see :meth:`Parser.then`.
286 | 
287 | (Hint - the arrows point at the important parser!)
288 | 
289 | .. code-block:: python
290 | 
291 |    >>> (string('x') >> string('y')).parse('xy')
292 |    'y'
293 | 
294 | 
295 | .. _parser-plus:
296 | 
297 | ``+`` operator
298 | --------------
299 | 
300 | ``parser1 + parser2``
301 | 
302 | Requires both parsers to match in order, and adds the two results together using
303 | the + operator. This will only work if the results support the plus operator
304 | (e.g. strings and lists):
305 | 
306 | 
307 | .. code-block:: python
308 | 
309 |    >>> (string("x") + regex("[0-9]")).parse("x1")
310 |    "x1"
311 | 
312 |    >>> (string("x").many() + regex("[0-9]").map(int).many()).parse("xx123")
313 |    ['x', 'x', 1, 2, 3]
314 | 
315 | The plus operator is a convenient shortcut for:
316 | 
317 |    >>> seq(parser1, parser2).combine(lambda a, b: a + b)
318 | 
319 | .. _parser-times:
320 | 
321 | ``*`` operator
322 | --------------
323 | 
324 | ``parser1 * number``
325 | 
326 | This is a shortcut for doing :meth:`Parser.times`:
327 | 
328 | .. code-block:: python
329 | 
330 |    >>> (string("x") * 3).parse("xxx")
331 |    ["x", "x", "x"]
332 | 
333 | You can also set both upper and lower bounds by multiplying by a range:
334 | 
335 | .. code-block:: python
336 | 
337 |    >>> (string("x") * range(0, 3)).parse("xxx")
338 |    ParseError: expected EOF at 0:2
339 | 
340 | (Note the normal semantics of ``range`` are respected - the second number is an
341 | *exclusive* upper bound, not inclusive).
342 | 
343 | Parser combinators
344 | ==================
345 | 
346 | .. function:: alt(*parsers)
347 | 
348 |    Creates a parser from the passed in argument list of alternative parsers,
349 |    which are tried in order, moving to the next one if the current one fails, as
350 |    per the :ref:`parser-or` - in other words, it matches any one of the
351 |    alternative parsers.
352 | 
353 |    Example using `*arg` syntax to pass a list of parsers that have been
354 |    generated by mapping :func:`string` over a list of characters:
355 | 
356 |    .. code-block:: python
357 | 
358 |       >>> hexdigit = alt(*map(string, "0123456789abcdef"))
359 | 
360 |    (In this case you would be better off using :func:`char_from`)
361 | 
362 | .. function:: seq(*parsers)
363 | 
364 |    Creates a parser that runs a sequence of parsers in order and combines
365 |    their results in a list.
366 | 
367 | 
368 |    .. code-block:: python
369 | 
370 |       >>> x_bottles_of_y_on_the_z = \
371 |       ...    seq(regex(r"[0-9]+").map(int) << string(" bottles of "),
372 |       ...        regex(r"\S+") << string(" on the "),
373 |       ...        regex(r"\S+")
374 |       ...        )
375 |       >>> x_bottles_of_y_on_the_z.parse("99 bottles of beer on the wall")
376 |       [99, 'beer', 'wall']
377 | 
378 | Other combinators
379 | =================
380 | 
381 | Parsy does not try to include every possible combinator - there is no reason why
382 | you cannot create your own for your needs using the built-in combinators and
383 | primitives. If you find something that is very generic and would be very useful
384 | to have as a built-in, please :doc:`submit </contributing>`: as a PR!
385 | 


--------------------------------------------------------------------------------
/docs/tutorial.rst:
--------------------------------------------------------------------------------
  1 | ========
  2 | Tutorial
  3 | ========
  4 | 
  5 | .. currentmodule:: parsy
  6 | 
  7 | First :doc:`install parsy </installation>`, and check that the documentation you
  8 | are reading matches the version you just installed.
  9 | 
 10 | Building an ISO 8601 parser
 11 | ===========================
 12 | 
 13 | In this tutorial, we are going to gradually build a parser for a subset of an
 14 | ISO 8601 date. Specifically, we want to handle dates that look like this:
 15 | ``2017-09-25``.
 16 | 
 17 | A problem of this size could admittedly be solved fairly easily with regexes.
 18 | But very quickly regexes don't scale, especially when it comes to getting the
 19 | parsed data out, and for this tutorial we need to start with a simple example.
 20 | 
 21 | With parsy, you start by breaking the problem down into the smallest components.
 22 | So we need first to match the 4 digit year at the beginning.
 23 | 
 24 | There are various ways we can do this, but a regex works nicely, and
 25 | :func:`regex` is a built-in primitive of the parsy library:
 26 | 
 27 | .. code-block:: python
 28 | 
 29 |    >>> from parsy import regex
 30 |    >>> year = regex(r'[0-9]{4}')
 31 | 
 32 | This has produced a :class:`Parser` object which has various methods. We can
 33 | immediately check that it works using the :meth:`Parser.parse` method:
 34 | 
 35 | .. code-block:: python
 36 | 
 37 |    >>> year.parse('2017')
 38 |    '2017'
 39 |    >>> year.parse('abc')
 40 |    ParseError: expected '[0-9]{4}' at 0:0
 41 | 
 42 | Notice first of all that a parser consumes input (the value we pass to
 43 | ``parse``), and it produces an output. In the case of ``regex``, the produced
 44 | output is the string that was matched, but this doesn't have to be the case for
 45 | all parsers.
 46 | 
 47 | If there is no match, it raises a ``ParseError``.
 48 | 
 49 | Notice as well that the parser expects to consume all the input, so if there are
 50 | extra characters at the end, even if it is just whitespace, parsing will fail
 51 | with a message saying it expected EOF (End Of File/Data):
 52 | 
 53 | .. code-block:: python
 54 | 
 55 |    >>> year.parse('2017 ')
 56 |    ParseError: expected 'EOF' at 0:4
 57 | 
 58 | To parse the data, we need to parse months, days, and the dash symbol, so we'll
 59 | add those:
 60 | 
 61 | .. code-block:: python
 62 | 
 63 |    >>> from parsy import string
 64 |    >>> month = regex('[0-9]{2}')
 65 |    >>> day = regex('[0-9]{2}')
 66 |    >>> dash = string('-')
 67 | 
 68 | We've added use of the :func:`string` primitive here, that matches just the
 69 | string passed in, and returns that string.
 70 | 
 71 | Next we need to combine these parsers into something that will parse the whole
 72 | date. The simplest way is to use the :meth:`Parser.then` method:
 73 | 
 74 | .. code-block:: python
 75 | 
 76 |    >>> fulldate = year.then(dash).then(month).then(dash).then(day)
 77 | 
 78 | The ``then`` method returns a new parser that requires the first parser to
 79 | succeed, followed by the second parser (the argument to the method).
 80 | 
 81 | We could also write this using the :ref:`parser-rshift` which
 82 | does the same thing as :meth:`Parser.then`:
 83 | 
 84 | .. code-block:: python
 85 | 
 86 |    >>> fulldate = year >> dash >> month >> dash >> day
 87 | 
 88 | This parser has some problems which we need to address, but it is already useful
 89 | as a basic validator:
 90 | 
 91 | .. code-block:: python
 92 | 
 93 |    >>> fulldate.parse('2017-xx')
 94 |    ParseError: expected '[0-9]{2}' at 0:5
 95 |    >>> fulldate.parse('2017-01')
 96 |    ParseError: expected '-' at 0:7
 97 |    >>> fulldate.parse('2017-02-01')
 98 |    '01'
 99 | 
100 | If the parse doesn't succeed, we'll get ``ParseError``, otherwise it is valid
101 | (at least as far as the basic syntax checks we've added).
102 | 
103 | The first problem with this parser is that it doesn't return a very useful
104 | value. Due to the way that :meth:`Parser.then` works, when it combines two
105 | parsers to produce a larger one, the value from the first parser is discarded,
106 | and the value returned by the second parser is the overall return value. So, we
107 | end up getting only the 'day' component as the result of our parse. We really
108 | want the year, month and day packaged up nicely, and converted to integers.
109 | 
110 | A second problem is that our error messages are not very friendly.
111 | 
112 | Our first attempt at fixing these might be to use the :ref:`parser-plus` instead
113 | of ``then``. This operator is defined to combine the results of the two parsers
114 | using the normal plus operator, which will work fine on strings:
115 | 
116 |    >>> fulldate = year + dash + month + dash + day
117 |    >>> fulldate.parse('2017-02-01')
118 |    '2017-02-01'
119 | 
120 | However, it won't help us if we want to split our data up into a set of
121 | integers.
122 | 
123 | Our first step should actually be to work on the year, month and day components
124 | using :meth:`Parser.map`, which allows us to convert the strings to other
125 | objects - in our case we want integers.
126 | 
127 | We can also use the :meth:`Parser.desc` method to give nicer error messages, so
128 | our components now look this this:
129 | 
130 | .. code-block:: python
131 | 
132 |    >>> year = regex('[0-9]{4}').map(int).desc('4 digit year')
133 |    >>> month = regex('[0-9]{2}').map(int).desc('2 digit month')
134 |    >>> day = regex('[0-9]{2}').map(int).desc('2 digit day')
135 | 
136 | We get better error messages now:
137 | 
138 | .. code-block:: python
139 | 
140 |    >>> year.then(dash).then(month).parse('2017-xx')
141 |    ParseError: expected '2 digit month' at 0:5
142 | 
143 | 
144 | Notice that the ``map`` and ``desc`` methods, like all similar methods on
145 | ``Parser``, return new parser objects - they do not modify the existing one.
146 | This allows us to build up parsers with a 'fluent' interface, and avoid problems
147 | caused by mutating objects.
148 | 
149 | However, we still need a way to package up the year, month and day as separate
150 | values.
151 | 
152 | The :func:`seq` combinator provides one easy way to do that. It takes the
153 | parsers that are passed in as arguments, and combines their results into a
154 | list:
155 | 
156 | .. code-block:: python
157 | 
158 |    >>> fulldate = seq(year, dash, month, dash, day)
159 |    >>> fulldate.parse('2017-01-02')
160 |    [2017, '-', 1, '-', 2]
161 | 
162 | Now, we don't need those dashes, so we can eliminate them using the :ref:`parser-rshift` or :ref:`parser-lshift`:
163 | 
164 | .. code-block:: python
165 | 
166 |    >>> fulldate = seq(year, dash >> month, dash >> day)
167 |    >>> fulldate.parse('2017-01-02')
168 |    [2017, 1, 2]
169 | 
170 | At this point, we could also convert this to a date object if we wanted using
171 | :meth:`Parser.combine`:
172 | 
173 | .. code-block:: python
174 | 
175 |    >>> from datetime import date
176 |    >>> fulldate = seq(year, dash >> month, dash >> day).combine(date)
177 | 
178 | We could have used :meth:`Parser.map` here, but :meth:`Parser.combine` is a bit
179 | nicer. It's especially succinct because the argument order to ``date`` matches
180 | the order of the values parsed (year, month, day), otherwise we could pass a
181 | ``lambda`` to ``combine``.
182 | 
183 | .. _using-previous-values:
184 | 
185 | Using previously parsed values
186 | ==============================
187 | 
188 | Now, sometimes we might want to do more complex logic with the values that are
189 | collected as parse results, and do while we are still parsing.
190 | 
191 | To continue our example, the above parser has a problem that it will raise an
192 | exception if the day and month values are not valid. We'd like to be able to
193 | check this, and produce a parse error instead, which will make our parser play
194 | better with others if we want to use it to build something bigger.
195 | 
196 | Also, in ISO8601, strictly speaking you can just write the year, or the year and
197 | the month, and leave off the other parts. We'd like to handle that by returning
198 | a tuple for the result, and ``None`` for the missing data.
199 | 
200 | To do this, we need to allow the parse to continue if the later components (with
201 | their leading dashes) are missing - that is, we need to express optional
202 | components, and we need a way to be able to test earlier values while in the
203 | middle of parsing, to see if we should continue looking for another component.
204 | 
205 | The :meth:`Parser.bind` method provides one way to do it (yay monads!). You pass
206 | it a function that takes the output value from one parser as its input, and
207 | returns another parser as its output. (An example will help!) By appropriate use
208 | of closures, plus the :func:`success` primitive to return our values as a tuple,
209 | we can put together a parser.
210 | 
211 | For our first attempt, we'll make a parser that is similar to the previous ones
212 | and requires the full date to be present. It might look like this:
213 | 
214 | .. code-block:: python
215 | 
216 |    fulldate = \
217 |        year.skip(dash).bind(lambda y:
218 |            month.skip(dash).bind(lambda m:
219 |                day.bind(lambda d:
220 |                    success((y, m, d)))))
221 | 
222 | That is not a pretty sight, and it will get even worse if we want to use
223 | statements that are not allowed inside a lambda, and therefore need to define
224 | the callables using ``def``. Can we do better?
225 | 
226 | In Haskell, there is ``do`` notation that eliminates the lambdas. We don't have
227 | that in Python, but instead we can use generators and the ``yield`` keyword to
228 | great effect.
229 | 
230 | Instead of wrangling lambdas or having to create many little functions, we use
231 | use a generator function and convert it into a parser by using the
232 | :func:`generate` decorator. The idea is that you ``yield`` every parser that you
233 | want to run, and receive the result of that parser as the value of the yield
234 | expression. You can then put parsers together using any logic you like, and
235 | finally return the value.
236 | 
237 | An equivalent parser to the one above can be written like this:
238 | 
239 | .. code-block:: python
240 | 
241 |    @generate
242 |    def full_date():
243 |        y = yield year
244 |        yield dash  # implicit skip, since we do nothing with the value
245 |        m = yield month
246 |        yield dash
247 |        d = yield day
248 |        return (y, m, d)
249 | 
250 | This is much better, and provides a good starting point for our next set of
251 | requirements.
252 | 
253 | First of all, we need to express optional components - that is we need to be
254 | able to handle missing dashes, and return what we've got so far rather than
255 | failing the whole parse.
256 | 
257 | :class:`Parser` has a set of methods that convert parsers into ones that allow
258 | multiples of the parser - including :meth:`Parser.times`, :meth:`Parser.at_most`
259 | and :meth:`Parser.at_least`.
260 | 
261 | The :meth:`Parser.at_most` method will take the initial parser and return one
262 | that succeeds if there are between zero and n repetitions of matching input. It
263 | returns a (possibly empty) list of produced values. With ``n=1`` we can get an
264 | optional dash, and then we then check the length of what was produced to see if
265 | a dash was present.
266 | 
267 | We also need to do checking on the month and the day. We'll take a shortcut and
268 | use the built-in ``datetime.date`` class to do the validation for us. However,
269 | rather than allow exceptions to be raised, we convert the exception into a
270 | parsing failure.
271 | 
272 | 
273 | .. code-block:: python
274 | 
275 |    optional_dash = dash.at_most(1)
276 | 
277 |    @generate
278 |    def full_or_partial_date():
279 |        d = None
280 |        m = None
281 |        y = yield year
282 |        dash1 = yield optional_dash
283 |        if len(dash1) > 0:
284 |            m = yield month
285 |            dash2 = yield optional_dash
286 |            if len(dash2) > 0:
287 |                 d = yield day
288 |        if m is not None:
289 |           if m < 1 or m > 12:
290 |               return fail("month must be in 1..12")
291 |        if d is not None:
292 |           try:
293 |               datetime.date(y, m, d)
294 |           except ValueError as e:
295 |               return fail(e.args[0])
296 | 
297 |        return (y, m, d)
298 | 
299 | 
300 | This works now works as expected:
301 | 
302 | .. code-block:: python
303 | 
304 |    >>> full_or_partial_date('2017-02')
305 |    (2017, 2, None)
306 |    >>> full_or_partial_date('2017-02-29')
307 |    ParseError: expected 'day is out of range for month' at 0:10
308 | 
309 | We could of course use a custom object in the final line to return a more
310 | convenient data type, if wanted.
311 | 
312 | Alternatives and backtracking
313 | =============================
314 | 
315 | Suppose we are using our date parser to scrape dates off articles on a web site.
316 | We then discover that for recently published articles, instead of printing a
317 | timestamp, they write "X days ago".
318 | 
319 | We want to parse this, and we'll use a timedelta object to represent the value
320 | (to easily distinguish it from other values and consume it later). We can write
321 | a parser for this easily:
322 | 
323 | .. code-block:: python
324 | 
325 |    >>> days_ago = regex("[0-9]+").map(lambda d: timedelta(days=-int(d))) << string(" days ago")
326 |    >>> days_ago.parse("5 days ago")
327 |    datetime.timedelta(-5)
328 | 
329 | Now we need to combine it with our date parser, and allow either to succeed.
330 | This is done using the :ref:`parser-or`, as follows:
331 | 
332 | 
333 | .. code-block:: python
334 | 
335 |    >>> flexi_date = full_or_partial_date | days_ago
336 |    >>> flexi_date.parse('2012-01-05')
337 |    (2012, 1, 5)
338 |    >>> days_ago.parse("2 days ago")
339 |    datetime.timedelta(-2)
340 | 
341 | Notice that you still get good error messages from the appropriate parser,
342 | depending on which parser got furthest before returning a failure:
343 | 
344 | .. code-block:: python
345 | 
346 |    >>> flexi_date.parse('2012-')
347 |    ParseError: expected '2 digit month' at 0:5
348 |    >>> flexi_data.parse('2 years ago')
349 |    ParseError: expected ' days ago' at 0:1
350 | 
351 | When using backtracking, you need to understand that backtracking to the other
352 | option only occurs if the first parser fails. So, for example:
353 | 
354 | .. code-block:: python
355 | 
356 |    >>> a = string("a")
357 |    >>> ab = string("ab")
358 |    >>> c = string("c")
359 |    >>> a_or_ab_and_c = ((a | ab) + c)
360 |    >>> a_or_ab_and_c.parse('ac')
361 |    'ac'
362 |    >>> a_or_ab_and_c.parse('abc')
363 |    ParseError: expected 'c' at 0:1
364 | 
365 | The parse fails because the ``a`` parser succeeds, and so the ``ab`` parser is
366 | never tried. This is different from most regular expressions engines, where
367 | backtracking is done over the whole regex by default.
368 | 
369 | In this case we can get the parse to succeed by switching the order:
370 | 
371 | .. code-block:: python
372 | 
373 |    >>> ((ab | a) + c).parse('abc')
374 |    'abc'
375 | 
376 |    >>> ((ab | a) + c).parse('ac')
377 |    'ac'
378 | 
379 | We could also fix it like this:
380 | 
381 | .. code-block:: python
382 | 
383 |    >>> ((a + c) | (ab + c)).parse('abc')
384 |    'abc'
385 | 
386 | Learn more
387 | ==========
388 | 
389 | For further topics, see the :doc:`table of contents </index>` for the rest of
390 | the documentation that should enable you to build parsers for your needs.
391 | 


--------------------------------------------------------------------------------
/test/test_parsy.py:
--------------------------------------------------------------------------------
  1 | # -*- code: utf8 -*-
  2 | import re
  3 | import unittest
  4 | 
  5 | from parsy import test_char as parsy_test_char  # to stop pytest thinking this function is a test
  6 | from parsy import test_item as parsy_test_item  # to stop pytest thinking this function is a test
  7 | from parsy import (
  8 |     ParseError, alt, any_char, char_from, decimal_digit, digit, generate, index, letter, line_info, line_info_at,
  9 |     match_item, regex, seq, string, string_from, whitespace
 10 | )
 11 | 
 12 | 
 13 | class TestParser(unittest.TestCase):
 14 | 
 15 |     def test_string(self):
 16 |         parser = string('x')
 17 |         self.assertEqual(parser.parse('x'), 'x')
 18 | 
 19 |         self.assertRaises(ParseError, parser.parse, 'y')
 20 | 
 21 |     def test_regex(self):
 22 |         parser = regex(r'[0-9]')
 23 | 
 24 |         self.assertEqual(parser.parse('1'), '1')
 25 |         self.assertEqual(parser.parse('4'), '4')
 26 | 
 27 |         self.assertRaises(ParseError, parser.parse, 'x')
 28 | 
 29 |     def test_regex_compiled(self):
 30 |         parser = regex(re.compile(r'[0-9]'))
 31 |         self.assertEqual(parser.parse('1'), '1')
 32 |         self.assertRaises(ParseError, parser.parse, 'x')
 33 | 
 34 |     def test_then(self):
 35 |         xy_parser = string('x') >> string('y')
 36 |         self.assertEqual(xy_parser.parse('xy'), 'y')
 37 | 
 38 |         self.assertRaises(ParseError, xy_parser.parse, 'y')
 39 |         self.assertRaises(ParseError, xy_parser.parse, 'z')
 40 | 
 41 |     def test_bind(self):
 42 |         piped = None
 43 | 
 44 |         def binder(x):
 45 |             nonlocal piped
 46 |             piped = x
 47 |             return string('y')
 48 | 
 49 |         parser = string('x').bind(binder)
 50 | 
 51 |         self.assertEqual(parser.parse('xy'), 'y')
 52 |         self.assertEqual(piped, 'x')
 53 | 
 54 |         self.assertRaises(ParseError, parser.parse, 'x')
 55 | 
 56 |     def test_map(self):
 57 |         parser = digit.map(int)
 58 |         self.assertEqual(parser.parse('7'),
 59 |                          7)
 60 | 
 61 |     def test_combine(self):
 62 |         parser = (seq(digit, letter)
 63 |                   .combine(lambda d, l: (d, l)))
 64 |         self.assertEqual(parser.parse('1A'),
 65 |                          ('1', 'A'))
 66 | 
 67 |     def test_concat(self):
 68 |         parser = letter.many().concat()
 69 |         self.assertEqual(parser.parse(''), '')
 70 |         self.assertEqual(parser.parse('abc'), 'abc')
 71 | 
 72 |     def test_generate(self):
 73 |         x = y = None
 74 | 
 75 |         @generate
 76 |         def xy():
 77 |             nonlocal x
 78 |             nonlocal y
 79 |             x = yield string('x')
 80 |             y = yield string('y')
 81 |             return 3
 82 | 
 83 |         self.assertEqual(xy.parse('xy'), 3)
 84 |         self.assertEqual(x, 'x')
 85 |         self.assertEqual(y, 'y')
 86 | 
 87 |     def test_generate_return_parser(self):
 88 |         @generate
 89 |         def example():
 90 |             yield string('x')
 91 |             return string('y')
 92 |         self.assertEqual(example.parse("xy"), "y")
 93 | 
 94 |     def test_mark(self):
 95 |         parser = (letter.many().mark() << string("\n")).many()
 96 | 
 97 |         lines = parser.parse("asdf\nqwer\n")
 98 | 
 99 |         self.assertEqual(len(lines), 2)
100 | 
101 |         (start, letters, end) = lines[0]
102 |         self.assertEqual(start, (0, 0))
103 |         self.assertEqual(letters, ['a', 's', 'd', 'f'])
104 |         self.assertEqual(end, (0, 4))
105 | 
106 |         (start, letters, end) = lines[1]
107 |         self.assertEqual(start, (1, 0))
108 |         self.assertEqual(letters, ['q', 'w', 'e', 'r'])
109 |         self.assertEqual(end, (1, 4))
110 | 
111 |     def test_generate_desc(self):
112 |         @generate('a thing')
113 |         def thing():
114 |             yield string('t')
115 | 
116 |         with self.assertRaises(ParseError) as err:
117 |             thing.parse('x')
118 | 
119 |         ex = err.exception
120 | 
121 |         self.assertEqual(ex.expected, frozenset(['a thing']))
122 |         self.assertEqual(ex.stream, 'x')
123 |         self.assertEqual(ex.index, 0)
124 | 
125 |     def test_generate_default_desc(self):
126 |         # We shouldn't give a default desc, the messages from the internal
127 |         # parsers should bubble up.
128 |         @generate
129 |         def thing():
130 |             yield string('a')
131 |             yield string('b')
132 | 
133 |         with self.assertRaises(ParseError) as err:
134 |             thing.parse('ax')
135 | 
136 |         ex = err.exception
137 | 
138 |         self.assertEqual(ex.expected, frozenset(['b']))
139 |         self.assertEqual(ex.stream, 'ax')
140 |         self.assertEqual(ex.index, 1)
141 | 
142 |         self.assertIn("expected 'b' at 0:1",
143 |                       str(ex))
144 | 
145 |     def test_multiple_failures(self):
146 |         abc = string('a') | string('b') | string('c')
147 | 
148 |         with self.assertRaises(ParseError) as err:
149 |             abc.parse('d')
150 | 
151 |         ex = err.exception
152 |         self.assertEqual(ex.expected, frozenset(['a', 'b', 'c']))
153 |         self.assertEqual(str(ex), "expected one of 'a', 'b', 'c' at 0:0")
154 | 
155 |     def test_generate_backtracking(self):
156 |         @generate
157 |         def xy():
158 |             yield string('x')
159 |             yield string('y')
160 |             assert False
161 | 
162 |         parser = xy | string('z')
163 |         # should not finish executing xy()
164 |         self.assertEqual(parser.parse('z'), 'z')
165 | 
166 |     def test_or(self):
167 |         x_or_y = string('x') | string('y')
168 | 
169 |         self.assertEqual(x_or_y.parse('x'), 'x')
170 |         self.assertEqual(x_or_y.parse('y'), 'y')
171 | 
172 |     def test_or_with_then(self):
173 |         parser = (string('\\') >> string('y')) | string('z')
174 |         self.assertEqual(parser.parse('\\y'), 'y')
175 |         self.assertEqual(parser.parse('z'), 'z')
176 | 
177 |         self.assertRaises(ParseError, parser.parse, '\\z')
178 | 
179 |     def test_many(self):
180 |         letters = letter.many()
181 |         self.assertEqual(letters.parse('x'), ['x'])
182 |         self.assertEqual(letters.parse('xyz'), ['x', 'y', 'z'])
183 |         self.assertEqual(letters.parse(''), [])
184 | 
185 |         self.assertRaises(ParseError, letters.parse, '1')
186 | 
187 |     def test_many_with_then(self):
188 |         parser = string('x').many() >> string('y')
189 |         self.assertEqual(parser.parse('y'), 'y')
190 |         self.assertEqual(parser.parse('xy'), 'y')
191 |         self.assertEqual(parser.parse('xxxxxy'), 'y')
192 | 
193 |     def test_times_zero(self):
194 |         zero_letters = letter.times(0)
195 |         self.assertEqual(zero_letters.parse(''), [])
196 | 
197 |         self.assertRaises(ParseError, zero_letters.parse, 'x')
198 | 
199 |     def test_times(self):
200 |         three_letters = letter.times(3)
201 |         self.assertEqual(three_letters.parse('xyz'), ['x', 'y', 'z'])
202 | 
203 |         self.assertRaises(ParseError, three_letters.parse, 'xy')
204 |         self.assertRaises(ParseError, three_letters.parse, 'xyzw')
205 | 
206 |     def test_times_with_then(self):
207 |         then_digit = letter.times(3) >> digit
208 |         self.assertEqual(then_digit.parse('xyz1'), '1')
209 | 
210 |         self.assertRaises(ParseError, then_digit.parse, 'xy1')
211 |         self.assertRaises(ParseError, then_digit.parse, 'xyz')
212 |         self.assertRaises(ParseError, then_digit.parse, 'xyzw')
213 | 
214 |     def test_times_with_min_and_max(self):
215 |         some_letters = letter.times(2, 4)
216 | 
217 |         self.assertEqual(some_letters.parse('xy'), ['x', 'y'])
218 |         self.assertEqual(some_letters.parse('xyz'), ['x', 'y', 'z'])
219 |         self.assertEqual(some_letters.parse('xyzw'), ['x', 'y', 'z', 'w'])
220 | 
221 |         self.assertRaises(ParseError, some_letters.parse, 'x')
222 |         self.assertRaises(ParseError, some_letters.parse, 'xyzwv')
223 | 
224 |     def test_times_with_min_and_max_and_then(self):
225 |         then_digit = letter.times(2, 4) >> digit
226 | 
227 |         self.assertEqual(then_digit.parse('xy1'), '1')
228 |         self.assertEqual(then_digit.parse('xyz1'), '1')
229 |         self.assertEqual(then_digit.parse('xyzw1'), '1')
230 | 
231 |         self.assertRaises(ParseError, then_digit.parse, 'xy')
232 |         self.assertRaises(ParseError, then_digit.parse, 'xyzw')
233 |         self.assertRaises(ParseError, then_digit.parse, 'xyzwv1')
234 |         self.assertRaises(ParseError, then_digit.parse, 'x1')
235 | 
236 |     def test_at_most(self):
237 |         ab = string("ab")
238 |         self.assertEqual(ab.at_most(2).parse(""),
239 |                          [])
240 |         self.assertEqual(ab.at_most(2).parse("ab"),
241 |                          ["ab"])
242 |         self.assertEqual(ab.at_most(2).parse("abab"),
243 |                          ["ab", "ab"])
244 |         self.assertRaises(ParseError,
245 |                           ab.at_most(2).parse, "ababab")
246 | 
247 |     def test_sep_by(self):
248 |         digit_list = digit.map(int).sep_by(string(','))
249 | 
250 |         self.assertEqual(digit_list.parse('1,2,3,4'), [1, 2, 3, 4])
251 |         self.assertEqual(digit_list.parse('9,0,4,7'), [9, 0, 4, 7])
252 |         self.assertEqual(digit_list.parse('3,7'), [3, 7])
253 |         self.assertEqual(digit_list.parse('8'), [8])
254 |         self.assertEqual(digit_list.parse(''), [])
255 | 
256 |         self.assertRaises(ParseError, digit_list.parse, '8,')
257 |         self.assertRaises(ParseError, digit_list.parse, ',9')
258 |         self.assertRaises(ParseError, digit_list.parse, '82')
259 |         self.assertRaises(ParseError, digit_list.parse, '7.6')
260 | 
261 |     def test_sep_by_with_min_and_max(self):
262 |         digit_list = digit.map(int).sep_by(string(','), min=2, max=4)
263 | 
264 |         self.assertEqual(digit_list.parse('1,2,3,4'), [1, 2, 3, 4])
265 |         self.assertEqual(digit_list.parse('9,0,4,7'), [9, 0, 4, 7])
266 |         self.assertEqual(digit_list.parse('3,7'), [3, 7])
267 | 
268 |         self.assertRaises(ParseError, digit_list.parse, '8')
269 |         self.assertRaises(ParseError, digit_list.parse, '')
270 |         self.assertRaises(ParseError, digit_list.parse, '8,')
271 |         self.assertRaises(ParseError, digit_list.parse, ',9')
272 |         self.assertRaises(ParseError, digit_list.parse, '82')
273 |         self.assertRaises(ParseError, digit_list.parse, '7.6')
274 |         self.assertEqual(digit.sep_by(string(","), max=0).parse(''),
275 |                          [])
276 | 
277 |     def test_add(self):
278 |         self.assertEqual((letter + digit).parse("a1"),
279 |                          "a1")
280 | 
281 |     def test_multiply(self):
282 |         self.assertEqual((letter * 3).parse("abc"),
283 |                          ['a', 'b', 'c'])
284 | 
285 |     def test_multiply_range(self):
286 |         self.assertEqual((letter * range(1, 2)).parse("a"),
287 |                          ["a"])
288 |         self.assertRaises(ParseError, (letter * range(1, 2)).parse, "aa")
289 | 
290 |     # Primitives
291 |     def test_alt(self):
292 |         self.assertRaises(ParseError, alt().parse, '')
293 |         self.assertEqual(alt(letter, digit).parse('a'),
294 |                          'a')
295 |         self.assertEqual(alt(letter, digit).parse('1'),
296 |                          '1')
297 |         self.assertRaises(ParseError, alt(letter, digit).parse, '.')
298 | 
299 |     def test_seq(self):
300 |         self.assertEqual(seq().parse(''),
301 |                          [])
302 |         self.assertEqual(seq(letter).parse('a'),
303 |                          ['a'])
304 |         self.assertEqual(seq(letter, digit).parse('a1'),
305 |                          ['a', '1'])
306 |         self.assertRaises(ParseError, seq(letter, digit).parse, '1a')
307 | 
308 |     def test_test_char(self):
309 |         ascii = parsy_test_char(lambda c: ord(c) < 128,
310 |                                 "ascii character")
311 |         self.assertEqual(ascii.parse("a"), "a")
312 |         with self.assertRaises(ParseError) as err:
313 |             ascii.parse('☺')
314 |         ex = err.exception
315 |         self.assertEqual(str(ex), """expected 'ascii character' at 0:0""")
316 | 
317 |         with self.assertRaises(ParseError) as err:
318 |             ascii.parse('')
319 |         ex = err.exception
320 |         self.assertEqual(str(ex), """expected 'ascii character' at 0:0""")
321 | 
322 |     def test_char_from(self):
323 |         ab = char_from("ab")
324 |         self.assertEqual(ab.parse("a"), "a")
325 |         self.assertEqual(ab.parse("b"), "b")
326 | 
327 |         with self.assertRaises(ParseError) as err:
328 |             ab.parse('x')
329 | 
330 |         ex = err.exception
331 |         self.assertEqual(str(ex), """expected '[ab]' at 0:0""")
332 | 
333 |     def test_string_from(self):
334 |         titles = string_from("Mr", "Mr.", "Mrs", "Mrs.")
335 |         self.assertEqual(titles.parse("Mr"), "Mr")
336 |         self.assertEqual(titles.parse("Mr."), "Mr.")
337 |         self.assertEqual((titles + string(" Hyde")).parse("Mr. Hyde"),
338 |                          "Mr. Hyde")
339 |         with self.assertRaises(ParseError) as err:
340 |             titles.parse('foo')
341 | 
342 |         ex = err.exception
343 |         self.assertEqual(str(ex), """expected one of 'Mr', 'Mr.', 'Mrs', 'Mrs.' at 0:0""")
344 | 
345 |     def test_any_char(self):
346 |         self.assertEqual(any_char.parse("x"), "x")
347 |         self.assertEqual(any_char.parse("\n"), "\n")
348 |         self.assertRaises(ParseError, any_char.parse, "")
349 | 
350 |     def test_whitespace(self):
351 |         self.assertEqual(whitespace.parse("\n"), "\n")
352 |         self.assertEqual(whitespace.parse(" "), " ")
353 |         self.assertRaises(ParseError, whitespace.parse, "x")
354 | 
355 |     def test_letter(self):
356 |         self.assertEqual(letter.parse("a"), "a")
357 |         self.assertRaises(ParseError, letter.parse, "1")
358 | 
359 |     def test_digit(self):
360 |         self.assertEqual(digit.parse("¹"), "¹")
361 |         self.assertEqual(digit.parse("2"), "2")
362 |         self.assertRaises(ParseError, digit.parse, "x")
363 | 
364 |     def test_decimal_digit(self):
365 |         self.assertEqual(decimal_digit.at_least(1).concat().parse("9876543210"),
366 |                          "9876543210")
367 |         self.assertRaises(ParseError, decimal_digit.parse, "¹")
368 | 
369 |     def test_line_info(self):
370 |         @generate
371 |         def foo():
372 |             i = yield line_info
373 |             l = yield any_char
374 |             return (l, i)
375 | 
376 |         self.assertEqual(foo.many().parse("AB\nCD"),
377 |                          [("A", (0, 0)), ("B", (0, 1)),
378 |                           ("\n", (0, 2)),
379 |                           ("C", (1, 0)), ("D", (1, 1)),
380 |                           ])
381 | 
382 |     def test_should_fail(self):
383 |         not_a_digit = digit.should_fail('not a digit') >> regex(r'.*')
384 | 
385 |         self.assertEqual(not_a_digit.parse('a'), 'a')
386 |         self.assertEqual(not_a_digit.parse('abc'), 'abc')
387 |         self.assertEqual(not_a_digit.parse('a10'), 'a10')
388 |         self.assertEqual(not_a_digit.parse(''), '')
389 | 
390 |         with self.assertRaises(ParseError) as err:
391 |             not_a_digit.parse('8')
392 |         self.assertEqual(str(err.exception), "expected 'not a digit' at 0:0")
393 | 
394 |         self.assertRaises(ParseError, not_a_digit.parse, '8ab')
395 | 
396 | 
397 | class TestParserTokens(unittest.TestCase):
398 |     """
399 |     Tests that ensure that `.parse` can handle an arbitrary list of tokens,
400 |     rather than a string.
401 |     """
402 |     # Some opaque objects we will use in our stream:
403 |     START = object()
404 |     STOP = object()
405 | 
406 |     def test_test_item(self):
407 |         start_stop = parsy_test_item(lambda i: i in [self.START, self.STOP], "START/STOP")
408 |         self.assertEqual(start_stop.parse([self.START]),
409 |                          self.START)
410 |         self.assertEqual(start_stop.parse([self.STOP]),
411 |                          self.STOP)
412 |         with self.assertRaises(ParseError) as err:
413 |             start_stop.many().parse([self.START, "hello"])
414 | 
415 |         ex = err.exception
416 |         self.assertEqual(str(ex),
417 |                          "expected one of 'EOF', 'START/STOP' at 1")
418 |         self.assertEqual(ex.expected,
419 |                          {'EOF', 'START/STOP'})
420 |         self.assertEqual(ex.index,
421 |                          1)
422 | 
423 |     def test_match_item(self):
424 |         self.assertEqual(match_item(self.START).parse([self.START]),
425 |                          self.START)
426 |         with self.assertRaises(ParseError) as err:
427 |             match_item(self.START, "START").parse([])
428 | 
429 |         ex = err.exception
430 |         self.assertEqual(str(ex),
431 |                          "expected 'START' at 0")
432 | 
433 |     def test_parse_tokens(self):
434 |         other_vals = parsy_test_item(lambda i: i not in [self.START, self.STOP],
435 |                                      "not START/STOP")
436 | 
437 |         bracketed = match_item(self.START) >> other_vals.many() << match_item(self.STOP)
438 |         stream = [self.START, "hello", 1, 2, "goodbye", self.STOP]
439 |         result = bracketed.parse(stream)
440 |         self.assertEqual(result, ["hello", 1, 2, "goodbye"])
441 | 
442 |     def test_index(self):
443 |         @generate
444 |         def foo():
445 |             i = yield index
446 |             l = yield letter
447 |             return (l, i)
448 | 
449 |         self.assertEqual(foo.many().parse(["A", "B"]),
450 |                          [("A", 0), ("B", 1)])
451 | 
452 | 
453 | class TestUtils(unittest.TestCase):
454 |     def test_line_info_at(self):
455 |         text = "abc\ndef"
456 |         self.assertEqual(line_info_at(text, 0),
457 |                          (0, 0))
458 |         self.assertEqual(line_info_at(text, 2),
459 |                          (0, 2))
460 |         self.assertEqual(line_info_at(text, 3),
461 |                          (0, 3))
462 |         self.assertEqual(line_info_at(text, 4),
463 |                          (1, 0))
464 |         self.assertEqual(line_info_at(text, 7),
465 |                          (1, 3))
466 |         self.assertRaises(ValueError, lambda: line_info_at(text, 8))
467 | 
468 | 
469 | if __name__ == '__main__':
470 |     unittest.main()
471 | 


--------------------------------------------------------------------------------