├── docs
├── requirements.txt
├── _static
│ └── dmrs-tikz-pdf.png
├── api
│ ├── delphin.tokens.rst
│ ├── delphin.web.rst
│ ├── delphin.exceptions.rst
│ ├── delphin.edm.rst
│ ├── delphin.sembase.rst
│ ├── delphin.codecs.ace.rst
│ ├── delphin.highlight.rst
│ ├── delphin.vpm.rst
│ ├── delphin.commands.rst
│ ├── delphin.tfs.rst
│ ├── delphin.hierarchy.rst
│ ├── delphin.eds.rst
│ ├── delphin.cli.rst
│ ├── delphin.scope.rst
│ ├── delphin.mrs.rst
│ ├── delphin.repp.rst
│ ├── delphin.dmrs.rst
│ ├── delphin.codecs.mrsprolog.rst
│ ├── delphin.predicate.rst
│ ├── delphin.codecs.eds.rst
│ ├── delphin.codecs.simpledmrs.rst
│ ├── delphin.itsdb.rst
│ ├── delphin.codecs.simplemrs.rst
│ ├── delphin.interface.rst
│ ├── delphin.codecs.indexedmrs.rst
│ ├── delphin.lnk.rst
│ ├── delphin.semi.rst
│ ├── delphin.codecs.dmrstikz.rst
│ ├── delphin.codecs.edspenman.rst
│ ├── delphin.codecs.dmrspenman.rst
│ ├── delphin.web.server.rst
│ ├── delphin.web.client.rst
│ ├── delphin.variable.rst
│ ├── delphin.codecs.edsjson.rst
│ ├── delphin.tsql.rst
│ ├── delphin.tdl.rst
│ ├── delphin.codecs.dmrsjson.rst
│ ├── delphin.codecs.dmrx.rst
│ └── delphin.tsdb.rst
├── Makefile
├── .readthedocs.yaml
├── _extensions
│ └── wiki.py
├── README.md
├── index.rst
└── guides
│ └── edm.rst
├── delphin
├── main.py
├── web
│ └── __init__.py
├── mrs
│ ├── _exceptions.py
│ └── __init__.py
├── eds
│ ├── _exceptions.py
│ └── __init__.py
├── dmrs
│ ├── _exceptions.py
│ └── __init__.py
├── __about__.py
├── cli
│ ├── select.py
│ ├── compare.py
│ ├── repp.py
│ ├── mkprof.py
│ ├── process.py
│ ├── convert.py
│ └── edm.py
├── exceptions.py
├── codecs
│ ├── ace.py
│ ├── mrsprolog.py
│ └── edsjson.py
├── __main__.py
└── variable.py
├── tests
├── codecs
│ ├── mrx_test.py
│ ├── conftest.py
│ ├── dmrx_test.py
│ ├── edsnative_test.py
│ ├── simplemrs_test.py
│ └── simpledmrs_test.py
├── scope_test.py
├── ace_test.py
├── variable_test.py
├── edm_test.py
├── eds_test.py
├── predicate_test.py
├── lnk_test.py
└── tfs_test.py
├── LICENSE
├── .github
└── workflows
│ ├── checks.yml
│ └── publish.yml
├── .gitignore
├── pyproject.toml
├── CODE_OF_CONDUCT.md
└── CONTRIBUTING.md
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinx-copybutton
3 | furo
4 | httpx
5 | falcon
6 |
--------------------------------------------------------------------------------
/docs/_static/dmrs-tikz-pdf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/delph-in/pydelphin/HEAD/docs/_static/dmrs-tikz-pdf.png
--------------------------------------------------------------------------------
/docs/api/delphin.tokens.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.tokens
3 | ==============
4 |
5 | .. automodule:: delphin.tokens
6 | :members:
7 |
--------------------------------------------------------------------------------
/delphin/main.py:
--------------------------------------------------------------------------------
1 | # For backward compatibility
2 |
3 | from delphin.__main__ import main
4 |
5 | if __name__ == '__main__':
6 | main()
7 |
--------------------------------------------------------------------------------
/docs/api/delphin.web.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.web
3 | ===========
4 |
5 | .. automodule:: delphin.web
6 |
7 | .. toctree::
8 | :maxdepth: 1
9 |
10 | delphin.web.client
11 | delphin.web.server
12 |
13 |
--------------------------------------------------------------------------------
/delphin/web/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | Client interfaces and a server for the DELPH-IN Web API.
4 | """
5 |
6 | # Default modules need to import the PyDelphin version
7 | from delphin.__about__ import __version__ # noqa: F401
8 |
--------------------------------------------------------------------------------
/docs/api/delphin.exceptions.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.exceptions
3 | ==================
4 |
5 | .. automodule:: delphin.exceptions
6 |
7 | .. autoexception:: PyDelphinException
8 | .. autoexception:: PyDelphinSyntaxError
9 | .. autoexception:: PyDelphinWarning
10 |
--------------------------------------------------------------------------------
/delphin/mrs/_exceptions.py:
--------------------------------------------------------------------------------
1 |
2 | from delphin.exceptions import PyDelphinException, PyDelphinSyntaxError
3 |
4 |
5 | class MRSError(PyDelphinException):
6 | """Raises on invalid MRS operations."""
7 |
8 |
9 | class MRSSyntaxError(PyDelphinSyntaxError):
10 | """Raised when an invalid MRS serialization is encountered."""
11 |
--------------------------------------------------------------------------------
/docs/api/delphin.edm.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.edm
3 | ===========
4 |
5 | .. automodule:: delphin.edm
6 |
7 | This module provides the implementation of Elementary Dependency
8 | Matching used by the :command:`edm` subcommand, which is the
9 | recommended interface (see the :doc:`../guides/edm` guide for more
10 | information). Only the :func:`compute` function is made available at
11 | this time.
12 |
13 | .. autofunction:: compute
14 |
--------------------------------------------------------------------------------
/docs/api/delphin.sembase.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.sembase
3 | ===============
4 |
5 | .. automodule:: delphin.sembase
6 |
7 | Module Functions
8 | ----------------
9 |
10 | .. autofunction:: role_priority
11 | .. autofunction:: property_priority
12 |
13 | Classes
14 | -------
15 |
16 | .. autoclass:: Predication
17 | :show-inheritance:
18 | :members:
19 |
20 | .. autoclass:: SemanticStructure
21 | :show-inheritance:
22 | :members:
23 |
--------------------------------------------------------------------------------
/delphin/eds/_exceptions.py:
--------------------------------------------------------------------------------
1 |
2 | from delphin.exceptions import (
3 | PyDelphinException,
4 | PyDelphinSyntaxError,
5 | PyDelphinWarning,
6 | )
7 |
8 |
9 | class EDSError(PyDelphinException):
10 | """Raises on invalid EDS operations."""
11 |
12 |
13 | class EDSSyntaxError(PyDelphinSyntaxError):
14 | """Raised when an invalid EDS string is encountered."""
15 |
16 |
17 | class EDSWarning(PyDelphinWarning):
18 | """Issued when an EDS may be incorrect or incomplete."""
19 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.ace.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.ace
3 | ==================
4 |
5 | .. automodule:: delphin.codecs.ace
6 |
7 | Deserialization Functions
8 | -------------------------
9 |
10 | .. function:: load(source)
11 |
12 | See the :func:`load` codec API documentation.
13 |
14 | .. function:: loads(s)
15 |
16 | See the :func:`loads` codec API documentation.
17 |
18 | .. function:: decode(s)
19 |
20 | See the :func:`decode` codec API documentation.
21 |
--------------------------------------------------------------------------------
/delphin/dmrs/_exceptions.py:
--------------------------------------------------------------------------------
1 |
2 | from delphin.exceptions import (
3 | PyDelphinException,
4 | PyDelphinSyntaxError,
5 | PyDelphinWarning,
6 | )
7 |
8 |
9 | class DMRSError(PyDelphinException):
10 | """Raises on invalid DMRS operations."""
11 |
12 |
13 | class DMRSSyntaxError(PyDelphinSyntaxError):
14 | """Raised when an invalid DMRS serialization is encountered."""
15 |
16 |
17 | class DMRSWarning(PyDelphinWarning):
18 | """Issued when a DMRS may be incorrect or incomplete."""
19 |
--------------------------------------------------------------------------------
/docs/api/delphin.highlight.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.highlight
3 | =================
4 |
5 | .. automodule:: delphin.highlight
6 |
7 | This module contains `Pygments `_ lexers for
8 | highlighting SimpleMRS and TDL, as well as a style for coloring MRS
9 | outputs. This module is primarily intended for use by PyDelphin at
10 | the command line and in its documentation, but the lexers and style
11 | can be used directly with Pygments for other purposes.
12 |
13 | Classes
14 | -------
15 |
16 | .. autoclass:: TDLLexer
17 | .. autoclass:: SimpleMRSLexer
18 | .. autoclass:: MRSStyle
19 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = PyDelphin
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/delphin/__about__.py:
--------------------------------------------------------------------------------
1 |
2 | # This __about__.py file for storing project metadata is inspired by
3 | # the warehouse project:
4 | # https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
5 |
6 | __version__ = '1.10.0'
7 | __version_info__ = __version__.replace('.', ' ').replace('-', ' ').split()
8 |
9 | __title__ = 'PyDelphin'
10 | __summary__ = 'Libraries and scripts for DELPH-IN data'
11 | __uri__ = 'https://github.com/delph-in/pydelphin'
12 |
13 | __author__ = 'Michael Wayne Goodman'
14 | __email__ = 'goodman.m.w@gmail.com'
15 |
16 | __maintainer__ = 'Michael Wayne Goodman'
17 |
18 | __license__ = 'MIT'
19 | __copyright__ = '2013--2020 %s <%s> and contributors' % (__author__, __email__)
20 |
--------------------------------------------------------------------------------
/docs/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yaml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Set the version of Python and other tools you might need
9 | build:
10 | os: ubuntu-22.04
11 | tools:
12 | python: "3.11"
13 |
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 | configuration: docs/conf.py
17 |
18 | # We recommend specifying your dependencies to enable reproducible builds:
19 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
20 | python:
21 | install:
22 | - requirements: docs/requirements.txt
23 |
24 | formats:
25 | - pdf
26 | - epub
27 |
--------------------------------------------------------------------------------
/docs/api/delphin.vpm.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.vpm
3 | ===========
4 |
5 | .. automodule:: delphin.vpm
6 |
7 | Variable property mappings (VPMs) convert grammar-internal
8 | variables (e.g. `event5`) to the grammar-external form (e.g. `e5`),
9 | and also map variable properties (e.g. `PNG: 1pl` might map to
10 | `PERS: 1` and `NUM: pl`).
11 |
12 | .. seealso::
13 | - Wiki about VPM: https://github.com/delph-in/docs/wiki/RmrsVpm
14 |
15 |
16 | Module functions
17 | ----------------
18 |
19 | .. autofunction:: load
20 |
21 | Classes
22 | -------
23 |
24 | .. autoclass:: VPM
25 | :members:
26 |
27 | Exceptions
28 | ----------
29 |
30 | .. autoexception:: VPMSyntaxError
31 | :show-inheritance:
32 |
--------------------------------------------------------------------------------
/docs/_extensions/wiki.py:
--------------------------------------------------------------------------------
1 | import re
2 | from urllib.parse import urljoin
3 |
4 | from docutils import nodes
5 |
6 |
7 | def setup(app):
8 | app.add_role('wiki', wikilink)
9 | app.add_config_value('wiki_url', None, 'env')
10 |
11 |
12 | def wikilink(name, rawtext, text, lineno, inliner, options={}, content=[]):
13 | base = inliner.document.settings.env.app.config.wiki_url
14 | match = re.search(r'(.*)\s+<(.*)>', text)
15 | if match:
16 | text, slug = match.groups()
17 | text = text.strip()
18 | slug = slug.strip()
19 | else:
20 | text = text.strip()
21 | slug = text
22 | url = urljoin(base, slug)
23 | node = nodes.reference(rawtext, text, refuri=url, **options)
24 | return [node], []
25 |
--------------------------------------------------------------------------------
/docs/api/delphin.commands.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.commands
3 | ================
4 |
5 | .. automodule:: delphin.commands
6 |
7 | The public functions in this module largely mirror the front-end
8 | subcommands provided by the `delphin` command, with some small
9 | changes to argument names or values to be better-suited to being
10 | called from within Python.
11 |
12 | convert
13 | -------
14 |
15 | .. autofunction:: convert
16 |
17 | select
18 | ------
19 |
20 | .. autofunction:: select
21 |
22 | mkprof
23 | ------
24 |
25 | .. autofunction:: mkprof
26 |
27 | process
28 | -------
29 |
30 | .. autofunction:: process
31 |
32 | compare
33 | -------
34 |
35 | .. autofunction:: compare
36 |
37 | repp
38 | ----
39 |
40 | .. autofunction:: repp
41 |
42 | Exceptions
43 | ----------
44 |
45 | .. autoexception:: CommandError
46 |
--------------------------------------------------------------------------------
/docs/api/delphin.tfs.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.tfs
3 | ===========
4 |
5 | .. automodule:: delphin.tfs
6 |
7 | This module defines the :class:`FeatureStructure` and
8 | :class:`TypedFeatureStructure` classes, which model an attribute
9 | value matrix (AVM), with the latter including an associated
10 | type. They allow feature access through TDL-style dot notation
11 | regular dictionary keys.
12 |
13 | In addition, the :class:`TypeHierarchy` class implements a
14 | multiple-inheritance hierarchy with checks for type subsumption and
15 | compatibility.
16 |
17 |
18 | Classes
19 | -------
20 |
21 | .. autoclass:: FeatureStructure
22 | :members:
23 |
24 | .. autoclass:: TypedFeatureStructure
25 | :show-inheritance:
26 | :members:
27 |
28 | .. autoclass:: TypeHierarchy
29 | :show-inheritance:
30 | :members:
31 | :inherited-members:
32 |
--------------------------------------------------------------------------------
/tests/codecs/mrx_test.py:
--------------------------------------------------------------------------------
1 |
2 | import pytest
3 |
4 | from delphin.codecs import mrx
5 | from delphin.mrs import EP, MRS, HCons
6 |
7 |
8 | @pytest.fixture
9 | def it_rains_mrs():
10 | m = MRS(
11 | 'h0', 'e2',
12 | [EP('_rain_v_1', 'h1', {'ARG0': 'e2'})],
13 | [HCons.qeq('h0', 'h1')])
14 | return m
15 |
16 |
17 | @pytest.fixture
18 | def it_rains_heavily_mrs():
19 | m = MRS(
20 | 'h0', 'e2',
21 | [EP('_rain_v_1', 'h1', {'ARG0': 'e2'}),
22 | EP('_heavy_a_1', 'h1', {'ARG0': 'e3', 'ARG1': 'e2'})],
23 | [HCons.qeq('h0', 'h1')])
24 | return m
25 |
26 |
27 | def test_round_trip(it_rains_mrs, it_rains_heavily_mrs):
28 | assert mrx.decode(mrx.encode(it_rains_mrs)) == it_rains_mrs
29 | assert mrx.decode(mrx.encode(it_rains_mrs, indent=True)) == it_rains_mrs
30 | assert mrx.decode(mrx.encode(it_rains_heavily_mrs)) == it_rains_heavily_mrs
31 |
--------------------------------------------------------------------------------
/tests/scope_test.py:
--------------------------------------------------------------------------------
1 |
2 | from delphin import scope
3 |
4 |
5 | def test_conjoin():
6 | assert scope.conjoin(
7 | {'h0': [], 'h1': ['e2']}, []) == {'h0': [], 'h1': ['e2']}
8 | conj = scope.conjoin({'h0': [], 'h1': ['e2']}, [('h0', 'h1')])
9 | assert len(conj) == 1 and list(conj.values()) == [['e2']]
10 | conj = scope.conjoin({'h0': [], 'h1': ['e2'], 'h2': ['x4'], 'h3': ['q5']},
11 | [('h0', 'h1'), ('h2', 'h3')])
12 | assert len(conj) == 2
13 | vals = list(map(set, conj.values()))
14 | assert {'e2'} in vals
15 | assert {'x4', 'q5'} in vals
16 | conj = scope.conjoin({'h0': [], 'h1': ['e2'], 'h2': ['x4'], 'h3': ['q5']},
17 | [('h0', 'h1'), ('h1', 'h2'), ('h2', 'h3')])
18 | assert len(conj) == 1
19 | assert {'e2', 'x4', 'q5'} == set(next(iter(conj.values())))
20 |
21 |
22 | def test_tree_fragments():
23 | pass
24 |
25 |
26 | def test_representatives():
27 | pass
28 |
--------------------------------------------------------------------------------
/delphin/cli/select.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | Select data from [incr tsdb()] test suites.
4 | """
5 |
6 | import argparse
7 | import logging
8 |
9 | from delphin import tsdb
10 | from delphin.commands import select
11 |
12 | logger = logging.getLogger('delphin.commands')
13 |
14 | parser = argparse.ArgumentParser(add_help=False) # filled out below
15 |
16 | COMMAND_INFO = {
17 | 'name': 'select',
18 | 'help': 'Select data from [incr tsdb()] test suites',
19 | 'description': __doc__,
20 | 'parser': parser
21 | }
22 |
23 |
24 | def call_select(args):
25 | rows = select(
26 | args.QUERY,
27 | args.TESTSUITE)
28 | try:
29 | for row in rows:
30 | print(tsdb.join(row))
31 | except (BrokenPipeError):
32 | logger.info('broken pipe')
33 |
34 |
35 | # Arguments for the select command
36 | parser.set_defaults(func=call_select)
37 | parser.add_argument(
38 | 'QUERY', help='TSQL selection (e.g., \'i-input where readings = 0\')')
39 | parser.add_argument(
40 | 'TESTSUITE', help='path to the testsuite directory to select data from')
41 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2013 Michael Wayne Goodman and other contributors
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
--------------------------------------------------------------------------------
/.github/workflows/checks.yml:
--------------------------------------------------------------------------------
1 | name: tests
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | pull_request:
7 | branches: [main]
8 |
9 | jobs:
10 | lint:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v4
14 | - name: Set up Python
15 | uses: actions/setup-python@v4
16 | with:
17 | python-version: "3.9"
18 | - name: Install Hatch
19 | run: pipx install hatch
20 | - name: Lint
21 | run: hatch run dev:lint
22 | - name: Type Check
23 | run: hatch run dev:typecheck
24 | tests:
25 | runs-on: ${{ matrix.os }}
26 | strategy:
27 | matrix:
28 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
29 | os: [ubuntu-latest, windows-latest]
30 | steps:
31 | - uses: actions/checkout@v4
32 | - name: Set up Python ${{ matrix.python-version }}
33 | uses: actions/setup-python@v4
34 | with:
35 | python-version: ${{ matrix.python-version }}
36 | - name: Install Hatch
37 | run: pipx install hatch
38 | - name: Check Command and Version
39 | run: hatch run delphin -V
40 | - name: Test
41 | run: hatch run dev:test
42 |
--------------------------------------------------------------------------------
/delphin/eds/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | Elementary Dependency Structures ([EDS]_)
4 |
5 | .. [EDS] Stephan Oepen, Dan Flickinger, Kristina Toutanova, and
6 | Christopher D Manning. Lingo Redwoods. Research on Language and
7 | Computation, 2(4):575–596, 2004.;
8 |
9 | Stephan Oepen and Jan Tore Lønning. Discriminant-based MRS
10 | banking. In Proceedings of the 5th International Conference on
11 | Language Resources and Evaluation, pages 1250–1255, 2006.
12 |
13 | """
14 |
15 | # Default modules need to import the PyDelphin version
16 | from delphin.__about__ import __version__ # noqa: F401
17 | from delphin.eds._eds import (
18 | BOUND_VARIABLE_ROLE,
19 | EDS,
20 | PREDICATE_MODIFIER_ROLE,
21 | Node,
22 | )
23 | from delphin.eds._exceptions import (
24 | EDSError,
25 | EDSSyntaxError,
26 | EDSWarning,
27 | )
28 | from delphin.eds._operations import (
29 | find_predicate_modifiers,
30 | from_mrs,
31 | make_ids_unique,
32 | )
33 |
34 | __all__ = [
35 | 'BOUND_VARIABLE_ROLE',
36 | 'PREDICATE_MODIFIER_ROLE',
37 | 'EDS',
38 | 'Node',
39 | 'from_mrs',
40 | 'find_predicate_modifiers',
41 | 'make_ids_unique',
42 | 'EDSError',
43 | 'EDSSyntaxError',
44 | 'EDSWarning',
45 | ]
46 |
--------------------------------------------------------------------------------
/docs/api/delphin.hierarchy.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.hierarchy
3 | =================
4 |
5 | .. automodule:: delphin.hierarchy
6 |
7 | This module defines the :class:`MultiHierarchy` class for
8 | multiply-inheriting hierarchies. This class manages the insertion
9 | of new nodes into the hierarchy via the class constructor or the
10 | :meth:`MultiHierarchy.update` method, normalizing node identifiers
11 | (if a suitable normalization function is provided at
12 | instantiation), and inserting nodes in the appropriate order. It
13 | checks for some kinds of ill-formed hierarchies, such as cycles and
14 | redundant parentage and provides methods for testing for node
15 | compatibility and subsumption. For convenience, arbitrary data may
16 | be associated with node identifiers.
17 |
18 | While the class may be used directly, it is mainly used to support
19 | the :class:`~delphin.tfs.TypeHierarchy` class and the predicate,
20 | property, and variable hierarchies of :class:`~delphin.semi.SemI`
21 | instances.
22 |
23 | Classes
24 | -------
25 |
26 | .. autoclass:: MultiHierarchy
27 | :members:
28 |
29 | Exceptions
30 | ----------
31 |
32 | .. autoexception:: HierarchyError
33 | :show-inheritance:
34 |
--------------------------------------------------------------------------------
/delphin/dmrs/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | Dependency Minimal Recursion Semantics ([DMRS]_)
4 |
5 | .. [DMRS] Copestake, Ann. Slacker Semantics: Why superficiality,
6 | dependency and avoidance of commitment can be the right way to go.
7 | In Proceedings of the 12th Conference of the European Chapter of
8 | the Association for Computational Linguistics, pages 1–9.
9 | Association for Computational Linguistics, 2009.
10 | """
11 |
12 | # Default modules need to import the PyDelphin version
13 | from delphin.__about__ import __version__ # noqa: F401
14 | from delphin.dmrs._dmrs import (
15 | BARE_EQ_ROLE,
16 | CVARSORT,
17 | DMRS,
18 | EQ_POST,
19 | FIRST_NODE_ID,
20 | H_POST,
21 | HEQ_POST,
22 | NEQ_POST,
23 | RESTRICTION_ROLE,
24 | Link,
25 | Node,
26 | )
27 | from delphin.dmrs._exceptions import (
28 | DMRSError,
29 | DMRSSyntaxError,
30 | DMRSWarning,
31 | )
32 | from delphin.dmrs._operations import from_mrs
33 |
34 | __all__ = [
35 | 'FIRST_NODE_ID',
36 | 'RESTRICTION_ROLE',
37 | 'BARE_EQ_ROLE',
38 | 'EQ_POST',
39 | 'HEQ_POST',
40 | 'NEQ_POST',
41 | 'H_POST',
42 | 'CVARSORT',
43 | 'DMRS',
44 | 'Node',
45 | 'Link',
46 | 'from_mrs',
47 | 'DMRSError',
48 | 'DMRSSyntaxError',
49 | 'DMRSWarning',
50 | ]
51 |
--------------------------------------------------------------------------------
/docs/api/delphin.eds.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.eds
3 | ===========
4 |
5 | .. automodule:: delphin.eds
6 |
7 | Serialization Formats
8 | ---------------------
9 |
10 | .. toctree::
11 | :maxdepth: 1
12 |
13 | delphin.codecs.edsjson.rst
14 | delphin.codecs.eds.rst
15 | delphin.codecs.edspenman.rst
16 |
17 | Module Constants
18 | ----------------
19 |
20 | .. data:: BOUND_VARIABLE_ROLE
21 |
22 | The `BV` role used in edges to select the identifier of the node
23 | restricted by the quantifier.
24 |
25 | .. data:: PREDICATE_MODIFIER_ROLE
26 |
27 | The `ARG1` role used as a default role when inserting edges for
28 | predicate modification.
29 |
30 | Classes
31 | -------
32 |
33 | .. autoclass:: EDS
34 | :show-inheritance:
35 | :members:
36 |
37 | .. autoclass:: Node
38 | :show-inheritance:
39 | :members:
40 |
41 | Module Functions
42 | ----------------
43 |
44 | .. autofunction:: from_mrs
45 | .. autofunction:: find_predicate_modifiers
46 | .. autofunction:: make_ids_unique
47 |
48 | Exceptions
49 | ----------
50 |
51 | .. autoexception:: EDSError
52 | :show-inheritance:
53 |
54 | .. autoexception:: EDSSyntaxError
55 | :show-inheritance:
56 |
57 | .. autoexception:: EDSWarning
58 | :show-inheritance:
59 |
--------------------------------------------------------------------------------
/tests/codecs/conftest.py:
--------------------------------------------------------------------------------
1 |
2 | import pytest
3 |
4 | from delphin import mrs
5 | from delphin.lnk import Lnk
6 |
7 |
8 | @pytest.fixture
9 | def nearly_all_dogs_bark_mrs():
10 | return mrs.MRS(
11 | top='h0',
12 | index='e2',
13 | rels=[
14 | mrs.EP('_nearly_x_deg', 'h4',
15 | args={'ARG0': 'e5', 'ARG1': 'u6'},
16 | lnk=Lnk('<0:6>')),
17 | mrs.EP('_all_q', 'h4',
18 | args={'ARG0': 'x3', 'RSTR': 'h7', 'BODY': 'h8'},
19 | lnk=Lnk('<7:10>')),
20 | mrs.EP('_dog_n_1', 'h9',
21 | args={'ARG0': 'x3'},
22 | lnk=Lnk('<11:15>')),
23 | mrs.EP('_bark_v_1', 'h1',
24 | args={'ARG0': 'e2', 'ARG1': 'x3'},
25 | lnk=Lnk('<16:20>')),
26 | ],
27 | hcons=[mrs.HCons.qeq('h0', 'h1'), mrs.HCons.qeq('h7', 'h9')],
28 | icons=[],
29 | variables={
30 | 'e2': {'SF': 'prop', 'TENSE': 'pres', 'MOOD': 'indicative',
31 | 'PROG': '-', 'PERF': '-'},
32 | 'e5': {'SF': 'prop', 'TENSE': 'untensed', 'MOOD': 'indicative',
33 | 'PROG': '-', 'PERF': '-'},
34 | 'x3': {'PERS': '3', 'NUM': 'pl', 'IND': '+', 'PT': 'pt'},
35 | },
36 | lnk=Lnk('<0:21>'),
37 | surface='Nearly all dogs bark.',
38 | identifier='10'
39 | )
40 |
--------------------------------------------------------------------------------
/delphin/cli/compare.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | Compare MRS results in test and gold [incr tsdb()] testsuites.
4 |
5 | Graph isomorphism is used to determine if two MRSs are equivalent and
6 | the results show how many unique MRSs exist in the test and gold
7 | testsuites and how many are shared.
8 | """
9 |
10 | import argparse
11 |
12 | from delphin.commands import compare
13 |
14 | parser = argparse.ArgumentParser(add_help=False) # filled out below
15 |
16 | COMMAND_INFO = {
17 | 'name': 'compare',
18 | 'help': 'Compare MRS results across test suites',
19 | 'description': __doc__,
20 | 'parser': parser
21 | }
22 |
23 |
24 | def call_compare(args):
25 | template = '{id}\t<{test},{shared},{gold}>'
26 | if args.verbosity > 0:
27 | template += '\t{input}'
28 | for result in compare(
29 | args.TESTSUITE,
30 | args.GOLD,
31 | select=args.select):
32 | print(template.format(**result))
33 |
34 |
35 | parser.set_defaults(func=call_compare)
36 | parser.add_argument(
37 | 'TESTSUITE', help='path to the current test-suite directory')
38 | parser.add_argument(
39 | 'GOLD', help='path to the gold test-suite directory')
40 | parser.add_argument(
41 | '--select',
42 | metavar='QUERY',
43 | default='item.i-id item.i-input result.mrs',
44 | help=('TSQL query for selecting (id, input, mrs) triples from '
45 | 'TESTSUITE and GOLD (default: \'i-id i-input mrs\')'))
46 |
--------------------------------------------------------------------------------
/docs/api/delphin.cli.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.cli
3 | ===========
4 |
5 | Command-line Interface Modules
6 |
7 | The `delphin.cli` package is a `namespace package
8 | `_ for modules that define
9 | command-line interfaces. Each module under `delphin.cli` must have a
10 | dictionary named ``COMMAND_INFO`` and defined as follows:
11 |
12 | .. code-block:: python
13 |
14 | COMMAND_INFO = {
15 | 'name': 'command-name', # Required
16 | 'help': 'short help message', # Optional
17 | 'description': 'long description', # Optional
18 | 'parser': parser, # Required
19 | }
20 |
21 | The ``name`` field is the subcommand (e.g., :command:`delphin
22 | command-name`) and the ``parser`` field is a
23 | :py:class:`argparse.ArgumentParser` instance that specifies available
24 | arguments. Some common options, such as ``--verbose`` (``-v``),
25 | ``--quiet`` (``-q``), and ``--version`` (``-V``) will be created
26 | automatically by PyDelphin. This parser should also specify a ``func``
27 | callable attribute that is called when the subcommand is used. Thus,
28 | the recommended way to create ``parser`` is as follows:
29 |
30 | .. code-block:: python
31 |
32 | parser = argparse.ArgumentParser(add_help=False)
33 | parser.set_defaults(func=my_function)
34 |
35 | All of the default commands in :mod:`delphin.commands` define their
36 | command-line interface in the ``delphin.cli`` namespace.
37 |
--------------------------------------------------------------------------------
/delphin/mrs/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Minimal Recursion Semantics ([MRS]_).
3 |
4 | .. [MRS] Copestake, Ann, Dan Flickinger, Carl Pollard,
5 | and Ivan A. Sag. "Minimal recursion semantics: An introduction."
6 | Research on language and computation 3, no. 2-3 (2005): 281-332.
7 | """
8 |
9 | # Default modules need to import the PyDelphin version
10 | from delphin.__about__ import __version__ # noqa: F401
11 | from delphin.mrs._exceptions import MRSError, MRSSyntaxError
12 | from delphin.mrs._mrs import (
13 | BODY_ROLE,
14 | CONSTANT_ROLE,
15 | EP,
16 | INTRINSIC_ROLE,
17 | MRS,
18 | RESTRICTION_ROLE,
19 | HCons,
20 | ICons,
21 | )
22 | from delphin.mrs._operations import (
23 | compare_bags,
24 | from_dmrs,
25 | has_complete_intrinsic_variables,
26 | has_intrinsic_variable_property,
27 | has_unique_intrinsic_variables,
28 | is_connected,
29 | is_isomorphic,
30 | is_well_formed,
31 | plausibly_scopes,
32 | )
33 |
34 | __all__ = [
35 | 'INTRINSIC_ROLE',
36 | 'RESTRICTION_ROLE',
37 | 'BODY_ROLE',
38 | 'CONSTANT_ROLE',
39 | 'MRS',
40 | 'EP',
41 | 'HCons',
42 | 'ICons',
43 | 'is_connected',
44 | 'has_intrinsic_variable_property',
45 | 'has_complete_intrinsic_variables',
46 | 'has_unique_intrinsic_variables',
47 | 'is_well_formed',
48 | 'plausibly_scopes',
49 | 'is_isomorphic',
50 | 'compare_bags',
51 | 'from_dmrs',
52 | 'MRSError',
53 | 'MRSSyntaxError',
54 | ]
55 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 |
2 | # PyDelphin Documentation
3 |
4 | This subdirectory contains the content and configuration files for
5 | PyDelphin's documentation. The official documentation is built by
6 | [Read The Docs](https://readthedocs.org/), but you may want to build
7 | locally to make sure it can build without errors. In order to build
8 | the documentation locally, install PyDelphin with the `[docs]` or
9 | `[dev]` extras to get the necessary packages. It is recommended that
10 | you use a virtual environment for this.
11 |
12 | ```console
13 | $ python3 -m venv py3 && source py3/bin/activate # recommended
14 | $ pip install path/to/pydelphin[docs]
15 | ```
16 |
17 | For more information, see the documentation about [installing from
18 | source][] and [installing extra dependencies][]. After these steps
19 | complete, you should be able to build the documentation.
20 |
21 | [installing from source]: https://pydelphin.readthedocs.io/en/latest/guides/setup.html#installing-from-source
22 | [installing extra dependencies]: https://pydelphin.readthedocs.io/en/latest/guides/setup.html#installing-extra-dependencies
23 |
24 | ## Building the documentation
25 |
26 | After the dependencies have been installed, run `make html`:
27 |
28 | ```console
29 | $ cd path/to/pydelphin/docs/
30 | $ make html
31 | ```
32 |
33 | The documentation is then available at `_build/html/index.html`.
34 |
35 | ## Testing documentation coverage
36 |
37 | First run
38 |
39 | ```console
40 | $ make coverage
41 | ```
42 |
43 | Then inspect `_build/coverage/python.txt`.
44 |
--------------------------------------------------------------------------------
/docs/api/delphin.scope.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.scope
3 | =============
4 |
5 | .. automodule:: delphin.scope
6 |
7 | While the predicate-argument structure of a semantic representation
8 | is a directed-acyclic graph, the quantifier scope is a tree
9 | overlayed on the edges of that graph. In a fully scope-resolved
10 | structure, there is one tree spanning the entire graph, but in
11 | underspecified representations like MRS, there are multiple
12 | subtrees that span the graph nodes but are not all connected
13 | together. The components are then connected via qeq constraints
14 | which specify a partial ordering for the tree such that quantifiers
15 | may float in between the nodes connected by qeqs.
16 |
17 | Each node in the scope tree (called a *scopal position*) may
18 | encompass multiple nodes in the predicate-argument graph. Nodes
19 | that share a scopal position are said to be in a *conjunction*.
20 |
21 | The dependency representations EDS and DMRS develop the idea of
22 | scope representatives (called *representative nodes* or sometimes
23 | *heads*), whereby a single node is selected from a conjunction to
24 | represent the conjunction as a whole.
25 |
26 | Classes
27 | -------
28 |
29 | .. autoclass:: ScopingSemanticStructure
30 | :show-inheritance:
31 | :members:
32 |
33 | Module Functions
34 | ----------------
35 |
36 | .. autofunction:: conjoin
37 | .. autofunction:: descendants
38 | .. autofunction:: representatives
39 |
40 | Exceptions
41 | ----------
42 |
43 | .. autoexception:: ScopeError
44 | :show-inheritance:
45 |
--------------------------------------------------------------------------------
/tests/codecs/dmrx_test.py:
--------------------------------------------------------------------------------
1 |
2 | import pytest
3 |
4 | from delphin.codecs import dmrx
5 | from delphin.dmrs import DMRS, Node
6 |
7 |
8 | @pytest.fixture
9 | def empty_dmrs():
10 | return DMRS()
11 |
12 |
13 | @pytest.fixture
14 | def it_rains_dmrs():
15 | d = DMRS(
16 | 10, 10,
17 | nodes=[Node(10, '_rain_v_1', 'e', {'TENSE': 'pres'})],
18 | links=[])
19 | return d
20 |
21 |
22 | def test_round_trip(empty_dmrs, it_rains_dmrs):
23 | assert dmrx.decode(dmrx.encode(empty_dmrs)) == empty_dmrs
24 | assert dmrx.decode(dmrx.encode(empty_dmrs, indent=True)) == empty_dmrs
25 |
26 | assert dmrx.decode(dmrx.encode(it_rains_dmrs)) == it_rains_dmrs
27 | assert dmrx.decode(dmrx.encode(it_rains_dmrs)) == it_rains_dmrs
28 |
29 |
30 | def test_no_properties(it_rains_dmrs):
31 | d = dmrx.decode(dmrx.encode(it_rains_dmrs))
32 | assert d.nodes[0].properties == {'TENSE': 'pres'}
33 | d = dmrx.decode(dmrx.encode(it_rains_dmrs, properties=False))
34 | assert d.nodes[0].properties == {}
35 |
36 |
37 | def test_case_sensitivity_issue_333(it_rains_dmrs):
38 | # https://github.com/delph-in/pydelphin/issues/333
39 | s = dmrx.encode(it_rains_dmrs)
40 | assert 'tense="pres"' in s
41 | d = dmrx.decode(
42 | ''
43 | ''
44 | ''
45 | ''
46 | ''
47 | ''
48 | )
49 | assert d.nodes[0].predicate == '_rain_v_1'
50 | assert d.nodes[0].type == 'e'
51 | assert d.nodes[0].properties == {'TENSE': 'pres'}
52 |
--------------------------------------------------------------------------------
/delphin/exceptions.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | Basic exception and warning classes for PyDelphin.
4 | """
5 |
6 | # Default modules need to import the PyDelphin version
7 | from delphin.__about__ import __version__ # noqa: F401
8 |
9 |
10 | class PyDelphinException(Exception):
11 | """The base class for PyDelphin exceptions."""
12 | def __init__(self, *args, **kwargs):
13 | super(PyDelphinException, self).__init__(*args, **kwargs)
14 |
15 |
16 | class PyDelphinWarning(Warning):
17 | """The base class for PyDelphin warnings."""
18 | def __init__(self, *args, **kwargs):
19 | super(PyDelphinWarning, self).__init__(*args, **kwargs)
20 |
21 |
22 | class PyDelphinSyntaxError(PyDelphinException):
23 | def __init__(self, message=None, filename=None,
24 | lineno=None, offset=None, text=None):
25 | self.message = message
26 | self.filename = filename
27 | self.lineno = lineno
28 | self.offset = offset
29 | self.text = text
30 |
31 | def __str__(self):
32 | parts = []
33 | if self.filename is not None:
34 | parts.append(f'File "{self.filename}"')
35 | if self.lineno is not None:
36 | parts.append(f'line {self.lineno}')
37 | if self.offset is not None:
38 | parts.append(f'character {self.offset}')
39 | if parts:
40 | parts = ['', ' ' + ', '.join(parts)]
41 | if self.text is not None:
42 | parts.append(' ' + self.text)
43 | if self.offset is not None:
44 | parts.append(' ' + (' ' * self.offset) + '^')
45 | if self.message is not None:
46 | parts.append(f'{type(self).__name__}: {self.message}')
47 | return '\n'.join(parts)
48 |
--------------------------------------------------------------------------------
/tests/ace_test.py:
--------------------------------------------------------------------------------
1 |
2 | import io
3 |
4 | import pytest
5 |
6 | from delphin import ace
7 |
8 |
9 | @pytest.fixture
10 | def ace_mismatch():
11 | return ('version mismatch: '
12 | 'this is ACE version 0.9.29, but this grammar image '
13 | 'was compiled by ACE version 0.9.27')
14 |
15 |
16 | def mock_popen(pid=None, returncode=None, stdout=None, stderr=None):
17 |
18 | class MockedPopen():
19 | def __init__(self, args, **kwargs):
20 | self.args = args
21 | self.pid = pid
22 | self.returncode = returncode
23 | self.stdin = io.StringIO()
24 | self.stdout = stdout
25 | self.stderr = stderr
26 |
27 | def poll(self):
28 | return self.returncode
29 |
30 | def wait(self, timeout=None):
31 | return self.returncode
32 |
33 | def communicate(self, input=None, timeout=None):
34 | return (self.stdout.read(), self.stderr.read())
35 |
36 | def send_signal(self, signal):
37 | pass
38 |
39 | def terminate(self):
40 | pass
41 |
42 | def kill(self):
43 | pass
44 |
45 | return MockedPopen
46 |
47 |
48 | def test_start(ace_mismatch, tmp_path, monkeypatch):
49 | popen = mock_popen(
50 | pid=10,
51 | returncode=255,
52 | stdout=io.StringIO(),
53 | stderr=io.StringIO(ace_mismatch))
54 | grm = tmp_path / 'grm.dat'
55 | grm.write_text('')
56 | with monkeypatch.context() as m:
57 | m.setattr(ace, 'Popen', popen)
58 | m.setattr(ace, '_ace_version', lambda x: (0, 9, 29))
59 | with pytest.raises(ace.ACEProcessError):
60 | ace.ACEParser(str(grm))
61 | with pytest.raises(ace.ACEProcessError):
62 | ace.parse(str(grm), 'Dogs sleep.')
63 |
--------------------------------------------------------------------------------
/tests/variable_test.py:
--------------------------------------------------------------------------------
1 |
2 | import pytest
3 |
4 | from delphin import variable
5 |
6 |
7 | def test_split():
8 | assert variable.split('x1') == ('x', '1')
9 | assert variable.split('event10') == ('event', '10')
10 | assert variable.split('ref-ind2') == ('ref-ind', '2')
11 | with pytest.raises(ValueError):
12 | variable.split('x')
13 | with pytest.raises(ValueError):
14 | variable.split('1')
15 | with pytest.raises(ValueError):
16 | variable.split('1x')
17 |
18 |
19 | def test_type():
20 | assert variable.type('x1') == 'x'
21 | assert variable.type('event10') == 'event'
22 | assert variable.type('ref-ind2') == 'ref-ind'
23 | with pytest.raises(ValueError):
24 | variable.type('x')
25 | # and sort alias
26 | assert variable.sort('x1') == 'x'
27 |
28 |
29 | def test_id():
30 | assert variable.id('x1') == 1
31 | assert variable.id('event10') == 10
32 | assert variable.id('ref-ind2') == 2
33 | with pytest.raises(ValueError):
34 | variable.id('1')
35 |
36 |
37 | def test_is_valid():
38 | assert variable.is_valid('h3')
39 | assert variable.is_valid('ref-ind12')
40 | assert not variable.is_valid('x')
41 | assert not variable.is_valid('1')
42 | assert not variable.is_valid('x 1')
43 |
44 |
45 | class TestVariableFactory():
46 | def test_init(self):
47 | vf = variable.VariableFactory()
48 | assert vf.vid == 1
49 | assert len(vf.store) == 0
50 | vf = variable.VariableFactory(starting_vid=5)
51 | assert vf.vid == 5
52 | assert len(vf.store) == 0
53 |
54 | def test_new(self):
55 | vf = variable.VariableFactory()
56 | v = vf.new('x')
57 | assert v == 'x1'
58 | assert vf.vid == 2
59 | assert vf.store == {'x1': []}
60 | v = vf.new('e', [('PROP', 'VAL')])
61 | assert v == 'e2'
62 | assert vf.vid == 3
63 | assert vf.store == {'x1': [], 'e2': [('PROP', 'VAL')]}
64 |
--------------------------------------------------------------------------------
/docs/api/delphin.mrs.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.mrs
3 | ===========
4 |
5 | .. automodule:: delphin.mrs
6 |
7 | Serialization Formats
8 | ---------------------
9 |
10 | .. toctree::
11 | :maxdepth: 1
12 |
13 | delphin.codecs.indexedmrs.rst
14 | delphin.codecs.mrsjson.rst
15 | delphin.codecs.mrsprolog.rst
16 | delphin.codecs.mrx.rst
17 | delphin.codecs.simplemrs.rst
18 |
19 | Module Constants
20 | ----------------
21 |
22 | .. data:: INTRINSIC_ROLE
23 |
24 | The `ARG0` role that is associated with the intrinsic variable
25 | (:data:`EP.iv`).
26 |
27 | .. data:: RESTRICTION_ROLE
28 |
29 | The `RSTR` role used to select the restriction of a quantifier.
30 |
31 | .. data:: BODY_ROLE
32 |
33 | The `BODY` role used to select the body of a quantifier.
34 |
35 | .. data:: CONSTANT_ROLE
36 |
37 | The `CARG` role used to encode the constant value
38 | (:data:`EP.carg`) associated with certain kinds of predications,
39 | such as named entities, numbers, etc.
40 |
41 | Classes
42 | -------
43 |
44 | .. autoclass:: MRS
45 | :show-inheritance:
46 | :members:
47 |
48 | .. autoclass:: EP
49 | :show-inheritance:
50 | :members:
51 |
52 | .. autoclass:: HCons
53 | :members:
54 |
55 | .. autoclass:: ICons
56 | :members:
57 |
58 | Module Functions
59 | ----------------
60 |
61 | .. autofunction:: is_connected
62 | .. autofunction:: has_intrinsic_variable_property
63 | .. autofunction:: has_complete_intrinsic_variables
64 | .. autofunction:: has_unique_intrinsic_variables
65 | .. autofunction:: is_well_formed
66 | .. autofunction:: plausibly_scopes
67 | .. autofunction:: is_isomorphic
68 | .. autofunction:: compare_bags
69 | .. autofunction:: from_dmrs
70 |
71 | Exceptions
72 | ----------
73 |
74 | .. autoexception:: MRSError
75 | :show-inheritance:
76 |
77 | .. autoexception:: MRSSyntaxError
78 | :show-inheritance:
79 |
--------------------------------------------------------------------------------
/docs/api/delphin.repp.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.repp
3 | ============
4 |
5 | .. automodule:: delphin.repp
6 |
7 | A Regular-Expression Preprocessor [REPP]_ is a method of applying a
8 | system of regular expressions for transformation and tokenization
9 | while retaining character indices from the original input string.
10 |
11 | .. [REPP] Rebecca Dridan and Stephan Oepen. Tokenization: Returning
12 | to a long solved problem---a survey, contrastive
13 | experiment, recommendations, and toolkit. In Proceedings
14 | of the 50th Annual Meeting of the Association for
15 | Computational Linguistics (Volume 2: Short Papers), pages
16 | 378–382, Jeju Island, Korea, July 2012. Association for
17 | Computational Linguistics. URL
18 | http://www.aclweb.org/anthology/P12-2074.
19 |
20 | .. note::
21 |
22 | Requires ``regex`` (https://bitbucket.org/mrabarnett/mrab-regex/),
23 | for advanced regular expression features such as group-local inline
24 | flags. Without it, PyDelphin will fall back to the :py:mod:`re`
25 | module in the standard library which may give some unexpected
26 | results. The ``regex`` library, however, will not parse unescaped
27 | brackets in character classes without resorting to a compatibility
28 | mode (see `this issue`_ for the ERG), and PyDelphin will warn if
29 | this happens. The ``regex`` dependency is satisfied if you install
30 | PyDelphin with the ``[repp]`` extra (see :doc:`../guides/setup`).
31 |
32 | .. _this issue: https://github.com/delph-in/erg/issues/17
33 |
34 |
35 | Module Constants
36 | ----------------
37 |
38 | .. autodata:: DEFAULT_TOKENIZER
39 |
40 |
41 | Classes
42 | -------
43 |
44 | .. autoclass:: REPP
45 | :members:
46 |
47 | .. autoclass:: REPPResult(string, startmap, endmap)
48 | :members:
49 |
50 | .. autoclass:: REPPStep(input, output, operation, applied, startmap, endmap)
51 | :members:
52 |
53 |
54 | Exceptions
55 | ----------
56 |
57 | .. autoexception:: REPPError
58 | :show-inheritance:
59 |
60 | .. autoexception:: REPPWarning
61 | :show-inheritance:
62 |
--------------------------------------------------------------------------------
/delphin/cli/repp.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | Tokenize sentences using a Regular Expression PreProcessor (REPP).
4 |
5 | This front-end to the delphin.repp module makes it easy to tokenize
6 | inputs from a testsuite, a file of sentences, or sentences on stdin,
7 | and can present the results in a variety of formats. It also visualizes
8 | the application of REPP rules with the --trace option, which can be
9 | useful for debugging REPP modules.
10 | """
11 |
12 | import argparse
13 | import sys
14 |
15 | from delphin.commands import repp
16 |
17 | parser = argparse.ArgumentParser(add_help=False) # filled out below
18 |
19 | COMMAND_INFO = {
20 | 'name': 'repp',
21 | 'help': 'Tokenize sentences using REPP',
22 | 'description': __doc__,
23 | 'parser': parser
24 | }
25 |
26 |
27 | def call_repp(args):
28 | color = (args.color == 'always'
29 | or (args.color == 'auto' and sys.stdout.isatty()))
30 | return repp(
31 | args.FILE or sys.stdin,
32 | config=args.config,
33 | module=args.m,
34 | active=args.a,
35 | format=args.format,
36 | color=color,
37 | trace_level=1 if args.trace else 0)
38 |
39 |
40 | parser.set_defaults(func=call_repp)
41 | parser.add_argument(
42 | 'FILE', nargs='?', help='an input file')
43 | group = parser.add_mutually_exclusive_group()
44 | group.add_argument(
45 | '-c', '--config', metavar='PATH',
46 | help='a .set configuration file')
47 | group.add_argument(
48 | '-m', metavar='PATH', help='the main .rpp file')
49 | parser.add_argument(
50 | '-a', action='append', metavar='NAME',
51 | help='activate an external module')
52 | parser.add_argument(
53 | '-f', '--format',
54 | metavar='FMT',
55 | choices=('string', 'line', 'triple', 'yy'),
56 | default='yy',
57 | help='output token format')
58 | parser.add_argument(
59 | '--color',
60 | metavar='WHEN',
61 | default='auto',
62 | help='(auto|always|never) use ANSI color (default: auto)')
63 | parser.add_argument(
64 | '--trace', action='store_true',
65 | help='print each step that modifies an input string')
66 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Build and Publish to PyPI or TestPyPI
2 |
3 | # Adapted from https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
4 |
5 | on:
6 | push
7 |
8 | jobs:
9 | build:
10 | name: Build distribution
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v4
14 | - name: Set up Python
15 | uses: actions/setup-python@v4
16 | with:
17 | python-version: "3.x"
18 | - name: Install Hatch
19 | run: pipx install hatch
20 | - name: Build
21 | run: hatch build
22 | - name: Store the distribution packages
23 | uses: actions/upload-artifact@v4
24 | with:
25 | name: python-package-distributions
26 | path: dist/
27 |
28 | publish-to-pypi:
29 | name: Publish distributions to PyPI
30 | if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
31 | needs:
32 | - build
33 | runs-on: ubuntu-latest
34 | environment:
35 | name: pypi
36 | url: https://pypi.org/p/PyDelphin
37 | permissions:
38 | id-token: write # IMPORTANT: mandatory for trusted publishing
39 | steps:
40 | - name: Download the dists
41 | uses: actions/download-artifact@v4.1.7
42 | with:
43 | name: python-package-distributions
44 | path: dist/
45 | - name: Publish to PyPI
46 | uses: pypa/gh-action-pypi-publish@release/v1
47 |
48 | publish-to-testpypi:
49 | name: Publish distributions to TestPyPI
50 | needs:
51 | - build
52 | runs-on: ubuntu-latest
53 | environment:
54 | name: testpypi
55 | url: https://test.pypi.org/p/PyDelphin
56 | permissions:
57 | id-token: write # IMPORTANT: mandatory for trusted publishing
58 | steps:
59 | - name: Download the dists
60 | uses: actions/download-artifact@v4.1.7
61 | with:
62 | name: python-package-distributions
63 | path: dist/
64 | - name: Publish to TestPyPI
65 | uses: pypa/gh-action-pypi-publish@release/v1
66 | with:
67 | repository-url: https://test.pypi.org/legacy/
68 | skip-existing: true
69 |
--------------------------------------------------------------------------------
/docs/api/delphin.dmrs.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.dmrs
3 | ============
4 |
5 | .. automodule:: delphin.dmrs
6 |
7 | Serialization Formats
8 | ---------------------
9 |
10 | .. toctree::
11 | :maxdepth: 1
12 |
13 | delphin.codecs.dmrsjson.rst
14 | delphin.codecs.dmrspenman.rst
15 | delphin.codecs.dmrx.rst
16 | delphin.codecs.simpledmrs.rst
17 |
18 | Module Constants
19 | ----------------
20 |
21 | .. data:: FIRST_NODE_ID
22 |
23 | The node identifier `10000` which is conventionally the first
24 | identifier used in a DMRS structure. This constant is mainly
25 | used for DMRS conversion or serialization.
26 |
27 | .. data:: RESTRICTION_ROLE
28 |
29 | The `RSTR` role used in links to select the restriction of a
30 | quantifier.
31 |
32 | .. data:: EQ_POST
33 |
34 | The `EQ` post-slash label on links that indicates the endpoints
35 | of a link share a scope.
36 |
37 | .. data:: NEQ_POST
38 |
39 | The `NEQ` post-slash label on links that indicates the endpoints
40 | of a link do not share a scope.
41 |
42 | .. data:: HEQ_POST
43 |
44 | The `HEQ` post-slash label on links that indicates the
45 | :data:`~Link.start` node of a link immediately outscopes the
46 | :data:`~Link.end` node.
47 |
48 | .. data:: H_POST
49 |
50 | The `H` post-slash label on links that indicates the
51 | :data:`~Link.start` node of a link is qeq to the
52 | :data:`~Link.end` node (i.e., :data:`~Link.start` scopes over
53 | :data:`~Link.end`, but not necessarily immediately).
54 |
55 | .. data:: CVARSORT
56 |
57 | The `cvarsort` dictionary key in :data:`Node.sortinfo` that
58 | accesses the node's :data:`~Node.type`.
59 |
60 | Classes
61 | -------
62 |
63 | .. autoclass:: DMRS
64 | :show-inheritance:
65 | :members:
66 |
67 | .. autoclass:: Node
68 | :show-inheritance:
69 | :members:
70 |
71 | .. autoclass:: Link
72 | :show-inheritance:
73 | :members:
74 |
75 | Module Functions
76 | ----------------
77 |
78 | .. autofunction:: from_mrs
79 |
80 | Exceptions
81 | ----------
82 |
83 | .. autoexception:: DMRSSyntaxError
84 | :show-inheritance:
85 |
86 | .. autoexception:: DMRSWarning
87 | :show-inheritance:
88 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.mrsprolog.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.mrsprolog
3 | ========================
4 |
5 | .. automodule:: delphin.codecs.mrsprolog
6 |
7 | Example:
8 |
9 | * *The new chef whose soup accidentally spilled quit and left.*
10 |
11 | ::
12 |
13 | psoa(h0,e2,
14 | [rel('_the_q',h4,
15 | [attrval('ARG0',x3),
16 | attrval('RSTR',h5),
17 | attrval('BODY',h6)]),
18 | rel('_new_a_1',h7,
19 | [attrval('ARG0',e8),
20 | attrval('ARG1',x3)]),
21 | rel('_chef_n_1',h7,
22 | [attrval('ARG0',x3)]),
23 | rel('def_explicit_q',h9,
24 | [attrval('ARG0',x10),
25 | attrval('RSTR',h11),
26 | attrval('BODY',h12)]),
27 | rel('poss',h13,
28 | [attrval('ARG0',e14),
29 | attrval('ARG1',x10),
30 | attrval('ARG2',x3)]),
31 | rel('_soup_n_1',h13,
32 | [attrval('ARG0',x10)]),
33 | rel('_accidental_a_1',h7,
34 | [attrval('ARG0',e15),
35 | attrval('ARG1',e16)]),
36 | rel('_spill_v_1',h7,
37 | [attrval('ARG0',e16),
38 | attrval('ARG1',x10),
39 | attrval('ARG2',i17)]),
40 | rel('_quit_v_1',h1,
41 | [attrval('ARG0',e18),
42 | attrval('ARG1',x3),
43 | attrval('ARG2',i19)]),
44 | rel('_and_c',h1,
45 | [attrval('ARG0',e2),
46 | attrval('ARG1',e18),
47 | attrval('ARG2',e20)]),
48 | rel('_leave_v_1',h1,
49 | [attrval('ARG0',e20),
50 | attrval('ARG1',x3),
51 | attrval('ARG2',i21)])],
52 | hcons([qeq(h0,h1),qeq(h5,h7),qeq(h11,h13)]))
53 |
54 |
55 | Serialization Functions
56 | -----------------------
57 |
58 | .. function:: dump(ms, destination, properties=True, lnk=True, indent=False, encoding='utf-8')
59 |
60 | See the :func:`dump` codec API documentation.
61 |
62 | .. function:: dumps(ms, properties=True, lnk=True, indent=False)
63 |
64 | See the :func:`dumps` codec API documentation.
65 |
66 | .. function:: encode(m, properties=True, lnk=True, indent=False)
67 |
68 | See the :func:`encode` codec API documentation.
69 |
--------------------------------------------------------------------------------
/tests/edm_test.py:
--------------------------------------------------------------------------------
1 |
2 | from typing import Tuple
3 |
4 | from delphin.codecs import eds
5 | from delphin.edm import compute
6 |
7 | golds = eds.loads('''
8 | {e2:
9 | _1:proper_q<0:3>[BV x3]
10 | x3:named<0:3>("Kim"){x PERS 3, NUM sg, IND +}[]
11 | e10:_study_v_1<4:11>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x3]
12 | e12:_for_p<12:15>{e SF prop, TENSE untensed, MOOD indicative, PROG -, PERF -}[ARG1 e10, ARG2 x13]
13 | e2:_and_c<16:19>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[L-HNDL e10, L-INDEX e10, R-HNDL e14, R-INDEX e14]
14 | e14:_pass_v_1<20:26>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x3, ARG2 x13]
15 | _2:def_explicit_q<27:30>[BV x13]
16 | e20:poss<27:30>{e SF prop, TENSE untensed, MOOD indicative, PROG -, PERF -}[ARG1 x13, ARG2 x21]
17 | _3:pronoun_q<27:30>[BV x21]
18 | x21:pron<27:30>{x PERS 3, NUM sg, GEND f, PT std}[]
19 | x13:_test_n_of<31:36>{x PERS 3, NUM sg, IND +}[]}
20 | ''')
21 |
22 | tests = eds.loads('''
23 | {e9:
24 | _1:proper_q<0:3>[BV x3]
25 | x3:named<0:3>("Kim"){x PERS 3, NUM sg, IND +}[]
26 | e9:_study_v_1<4:11>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x3]
27 | e11:_for_p<12:15>{e SF prop, TENSE untensed, MOOD indicative, PROG -, PERF -}[ARG1 e9, ARG2 x12]
28 | e2:_and_c<16:19>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 e9, ARG2 e13]
29 | e13:_pass_v_1<20:26>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x3, ARG2 x12]
30 | _2:def_explicit_q<27:30>[BV x12]
31 | e18:poss<27:30>{e SF prop, TENSE untensed, MOOD indicative, PROG -, PERF -}[ARG1 x12, ARG2 x19]
32 | _3:pronoun_q<27:30>[BV x19]
33 | x19:pron<27:30>{x PERS 3, NUM sg, GEND f, IND +, PT std}[]
34 | x12:_test_n_of<31:36>{x PERS 3, NUM sg, IND +}[]}
35 | ''')
36 |
37 |
38 | def edm_sig3(*args, **kwargs) -> Tuple[float, float, float]:
39 | p, r, f = compute(*args, **kwargs)
40 | return round(p, 3), round(r, 3), round(f, 3)
41 |
42 |
43 | def test_edm_from_eds():
44 | assert edm_sig3(golds, tests) == (0.934, 0.919, 0.927)
45 | assert edm_sig3(golds, tests, name_weight=0) == (0.920, 0.902, 0.911)
46 | assert edm_sig3(golds, tests, argument_weight=0) == (0.959, 0.979, 0.969)
47 | assert edm_sig3(golds, tests, constant_weight=0) == (0.933, 0.918, 0.926)
48 | assert edm_sig3(golds, tests, top_weight=0) == (0.950, 0.934, 0.942)
49 |
--------------------------------------------------------------------------------
/docs/api/delphin.predicate.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.predicate
3 | =================
4 |
5 | .. automodule:: delphin.predicate
6 |
7 | Semantic predicates are atomic symbols representing semantic
8 | entities or constructions. For example, in the `English Resource
9 | Grammar `_, `_mouse_n_1` is the
10 | predicate for the word *mouse*, but it is underspecified for
11 | lexical semantics---it could be an animal, a computer's pointing
12 | device, or something else. Another example from the ERG is
13 | `compound`, which is used to link two compounded nouns, such as for
14 | *mouse pad*.
15 |
16 | There are two main categories of predicates: **abstract** and
17 | **surface**. In form, abstract predicates do not begin with an
18 | underscore and in usage they often correspond to semantic
19 | constructions that are not represented by a token in the input,
20 | such as the `compound` example above. Surface predicates, in
21 | contrast, are the semantic representation of surface (i.e.,
22 | lexical) tokens, such as the `_mouse_n_1` example above. In form,
23 | they must always begin with a single underscore, and have two or
24 | three components: lemma, part-of-speech, and (optionally) sense.
25 |
26 | .. seealso::
27 | - The DELPH-IN wiki about predicates:
28 | https://github.com/delph-in/docs/wiki/PredicateRfc
29 |
30 | In DELPH-IN there is the concept of "real predicates" which are
31 | surface predicates decomposed into their lemma, part-of-speech, and
32 | sense, but in PyDelphin (as of `v1.0.0`_) predicates are always
33 | simple strings. However, this module has functions for composing
34 | and decomposing predicates from/to their components (the
35 | :func:`create` and :func:`split` functions, respectively). In
36 | addition, there are functions to normalize (:func:`normalize`) and
37 | validate (:func:`is_valid`, :func:`is_surface`,
38 | :func:`is_abstract`) predicate symbols.
39 |
40 | .. _v1.0.0: https://github.com/delph-in/pydelphin/releases/tag/v1.0.0
41 |
42 |
43 | Module Functions
44 | ----------------
45 |
46 | .. autofunction:: split
47 | .. autofunction:: create
48 | .. autofunction:: normalize
49 | .. autofunction:: is_valid
50 | .. autofunction:: is_surface
51 | .. autofunction:: is_abstract
52 |
53 |
54 | Exceptions
55 | ----------
56 |
57 | .. autoexception:: PredicateError
58 | :show-inheritance:
59 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/delphin/codecs/ace.py:
--------------------------------------------------------------------------------
1 | """
2 | Deserialization of MRSs in ACE's stdout protocols.
3 | """
4 |
5 | from pathlib import Path
6 |
7 | from delphin.codecs import simplemrs
8 | from delphin.util import SExpr
9 |
10 | CODEC_INFO = {
11 | 'representation': 'mrs',
12 | }
13 |
14 |
15 | def load(source):
16 | """
17 | Deserialize SimpleMRSs from ACE parsing output (handle or filename)
18 |
19 | Args:
20 | source (str, file): ACE parsing output as a filename or handle
21 | Returns:
22 | a list of MRS objects
23 | """
24 | if hasattr(source, 'read'):
25 | ms = list(_decode(source))
26 | else:
27 | source = Path(source).expanduser()
28 | with source.open() as fh:
29 | ms = list(_decode(fh))
30 | return ms
31 |
32 |
33 | def loads(s):
34 | """
35 | Deserialize SimpleMRSs from ACE parsing output (as a string)
36 |
37 | Args:
38 | s (str): ACE parsing output as a string
39 | Returns:
40 | a list of MRS objects
41 | """
42 | if hasattr(s, 'decode'):
43 | s = s.decode('utf-8')
44 | ms = list(_decode(s.splitlines()))
45 | return ms
46 |
47 |
48 | def decode(s):
49 | """
50 | Deserialize an MRS object from a SimpleMRS string.
51 | """
52 | if hasattr(s, 'decode'):
53 | s = s.decode('utf-8')
54 | ms = next(_decode(s.splitlines()))
55 | return ms
56 |
57 |
58 | # read simplemrs from ACE output
59 |
60 | def _decode(lines):
61 | surface = None
62 | newline = False
63 | for line in lines:
64 | if line.startswith('SENT: '):
65 | surface = line[6:].rstrip()
66 | # regular ACE output
67 | elif line.startswith('['):
68 | m = line.partition(' ; ')[0].strip()
69 | m = simplemrs.decode(m)
70 | m.surface = surface
71 | yield m
72 | # with --tsdb-stdout
73 | elif line.startswith('('):
74 | while line:
75 | data, remainder = SExpr.parse(line)
76 | line = remainder.lstrip()
77 | if len(data) == 2 and data[0] == ':results':
78 | for result in data[1]:
79 | for key, val in result:
80 | if key == ':mrs':
81 | yield simplemrs.decode(val)
82 | elif line == '\n':
83 | if newline:
84 | surface = None
85 | newline = False
86 | else:
87 | newline = True
88 | else:
89 | pass
90 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.eds.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.eds
3 | ==================
4 |
5 | .. automodule:: delphin.codecs.eds
6 |
7 | Example:
8 |
9 | * *The new chef whose soup accidentally spilled quit and left.*
10 |
11 | ::
12 |
13 | {e18:
14 | _1:_the_q<0:3>[BV x3]
15 | e8:_new_a_1<4:7>{e SF prop, TENSE untensed, MOOD indicative, PROG bool, PERF -}[ARG1 x3]
16 | x3:_chef_n_1<8:12>{x PERS 3, NUM sg, IND +}[]
17 | _2:def_explicit_q<13:18>[BV x10]
18 | e14:poss<13:18>{e SF prop, TENSE untensed, MOOD indicative, PROG -, PERF -}[ARG1 x10, ARG2 x3]
19 | x10:_soup_n_1<19:23>{x PERS 3, NUM sg}[]
20 | e15:_accidental_a_1<24:36>{e SF prop, TENSE untensed, MOOD indicative, PROG -, PERF -}[ARG1 e16]
21 | e16:_spill_v_1<37:44>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x10]
22 | e18:_quit_v_1<45:49>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x3]
23 | e2:_and_c<50:53>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 e18, ARG2 e20]
24 | e20:_leave_v_1<54:59>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x3]
25 | }
26 |
27 |
28 | Deserialization Functions
29 | -------------------------
30 |
31 | .. function:: load(source)
32 |
33 | See the :func:`load` codec API documentation.
34 |
35 | .. function:: loads(s)
36 |
37 | See the :func:`loads` codec API documentation.
38 |
39 | .. function:: decode(s)
40 |
41 | See the :func:`decode` codec API documentation.
42 |
43 |
44 | Serialization Functions
45 | -----------------------
46 |
47 | .. function:: dump(ms, destination, properties=True, lnk=True, show_status=False, indent=False, encoding='utf-8')
48 |
49 | See the :func:`dump` codec API documentation.
50 |
51 | **Extensions:**
52 |
53 | :param bool show_status: if `True`, indicate disconnected
54 | components
55 |
56 | .. function:: dumps(ms, properties=True, lnk=True, show_status=False, indent=False)
57 |
58 | See the :func:`dumps` codec API documentation.
59 |
60 | **Extensions:**
61 |
62 | :param bool show_status: if `True`, indicate disconnected
63 | components
64 |
65 | .. function:: encode(m, properties=True, lnk=True, show_status=False, indent=False)
66 |
67 | See the :func:`encode` codec API documentation.
68 |
69 | **Extensions:**
70 |
71 | :param bool show_status: if `True`, indicate disconnected
72 | components
73 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.simpledmrs.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.simpledmrs
3 | =========================
4 |
5 | .. automodule:: delphin.codecs.simpledmrs
6 |
7 | Example:
8 |
9 | * *The new chef whose soup accidentally spilled quit and left.*
10 |
11 | ::
12 |
13 | dmrs {
14 | ["The new chef whose soup accidentally spilled quit and left." top=10008 index=10009]
15 | 10000 [_the_q<0:3>];
16 | 10001 [_new_a_1<4:7> e SF=prop TENSE=untensed MOOD=indicative PROG=bool PERF=-];
17 | 10002 [_chef_n_1<8:12> x PERS=3 NUM=sg IND=+];
18 | 10003 [def_explicit_q<13:18>];
19 | 10004 [poss<13:18> e SF=prop TENSE=untensed MOOD=indicative PROG=- PERF=-];
20 | 10005 [_soup_n_1<19:23> x PERS=3 NUM=sg];
21 | 10006 [_accidental_a_1<24:36> e SF=prop TENSE=untensed MOOD=indicative PROG=- PERF=-];
22 | 10007 [_spill_v_1<37:44> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-];
23 | 10008 [_quit_v_1<45:49> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-];
24 | 10009 [_and_c<50:53> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-];
25 | 10010 [_leave_v_1<54:59> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-];
26 | 10000:RSTR/H -> 10002;
27 | 10001:ARG1/EQ -> 10002;
28 | 10003:RSTR/H -> 10005;
29 | 10004:ARG1/EQ -> 10005;
30 | 10004:ARG2/NEQ -> 10002;
31 | 10006:ARG1/EQ -> 10007;
32 | 10007:ARG1/NEQ -> 10005;
33 | 10008:ARG1/NEQ -> 10002;
34 | 10009:ARG1/EQ -> 10008;
35 | 10009:ARG2/EQ -> 10010;
36 | 10010:ARG1/NEQ -> 10002;
37 | 10007:MOD/EQ -> 10002;
38 | 10010:MOD/EQ -> 10008;
39 | }
40 |
41 |
42 | Deserialization Functions
43 | -------------------------
44 |
45 | .. function:: load(source)
46 |
47 | See the :func:`load` codec API documentation.
48 |
49 | .. function:: loads(s)
50 |
51 | See the :func:`loads` codec API documentation.
52 |
53 | .. function:: decode(s)
54 |
55 | See the :func:`decode` codec API documentation.
56 |
57 |
58 | Serialization Functions
59 | -----------------------
60 |
61 | .. function:: dump(ms, destination, properties=True, lnk=True, indent=False, encoding='utf-8')
62 |
63 | See the :func:`dump` codec API documentation.
64 |
65 | .. function:: dumps(ms, properties=True, lnk=True, indent=False)
66 |
67 | See the :func:`dumps` codec API documentation.
68 |
69 | .. function:: encode(m, properties=True, lnk=True, indent=False)
70 |
71 | See the :func:`encode` codec API documentation.
72 |
--------------------------------------------------------------------------------
/docs/api/delphin.itsdb.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.itsdb
3 | =============
4 |
5 | .. seealso::
6 |
7 | See :doc:`../guides/itsdb` for a more user-friendly introduction
8 |
9 |
10 | .. automodule:: delphin.itsdb
11 |
12 |
13 | .. note::
14 |
15 | This module implements high-level structures and operations on
16 | top of TSDB test suites. For the basic, low-level functionality,
17 | see :mod:`delphin.tsdb`. For complex queries of the databases,
18 | see :mod:`delphin.tsql`.
19 |
20 | [incr tsdb()] is a tool built on top of TSDB databases for the
21 | purpose of profiling and comparing grammar versions using test
22 | suites. This module is named after that tool as it also builds
23 | higher-level operations on top of TSDB test suites but it has a
24 | much narrower scope. The aim of this module is to assist users with
25 | creating, processing, or manipulating test suites.
26 |
27 | The typical test suite contains these files::
28 |
29 | testsuite/
30 | analysis fold item-set parse relations run tree
31 | decision item output phenomenon result score update
32 | edge item-phenomenon parameter preference rule set
33 |
34 |
35 | Test Suite Classes
36 | ------------------
37 |
38 | PyDelphin has three classes for working with [incr tsdb()] test suite
39 | databases:
40 |
41 | - :class:`TestSuite`
42 | - :class:`Table`
43 | - :class:`Row`
44 |
45 | .. autoclass:: TestSuite
46 | :show-inheritance:
47 | :members:
48 | :inherited-members:
49 |
50 | .. autoclass:: Table
51 | :show-inheritance:
52 | :members:
53 | :inherited-members:
54 |
55 | .. autoclass:: Row
56 | :members:
57 |
58 |
59 | Processing Test Suites
60 | ----------------------
61 |
62 | The :meth:`TestSuite.process` method takes an optional
63 | :class:`FieldMapper` object which manages the mapping of data in
64 | :class:`~delphin.interface.Response` objects from a
65 | :class:`~delphin.interface.Processor` to the tables and columns of a
66 | test suite. In most cases the user will not need to customize or
67 | instantiate these objects as the default works with standard [incr
68 | tsdb()] schemas, but :class:`FieldMapper` can be subclassed in order
69 | to handle non-standard schemas, e.g., for machine translation
70 | workflows.
71 |
72 | .. autoclass:: FieldMapper
73 |
74 | .. automethod:: map
75 | .. automethod:: cleanup
76 | .. automethod:: collect
77 |
78 | Utility Functions
79 | -----------------
80 |
81 | .. autofunction:: match_rows
82 |
83 | Exceptions
84 | ----------
85 |
86 | .. autoexception:: ITSDBError
87 | :show-inheritance:
88 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.simplemrs.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.simplemrs
3 | ========================
4 |
5 | .. automodule:: delphin.codecs.simplemrs
6 |
7 | SimpleMRS is a format for Minimal Recursion Semantics that aims to
8 | be readable equally by humans and machines.
9 |
10 | Example:
11 |
12 | * *The new chef whose soup accidentally spilled quit and left.*
13 |
14 | .. code:: simplemrs
15 |
16 | [ TOP: h0
17 | INDEX: e2 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ]
18 | RELS: < [ _the_q<0:3> LBL: h4 ARG0: x3 [ x PERS: 3 NUM: sg IND: + ] RSTR: h5 BODY: h6 ]
19 | [ _new_a_1<4:7> LBL: h7 ARG0: e8 [ e SF: prop TENSE: untensed MOOD: indicative PROG: bool PERF: - ] ARG1: x3 ]
20 | [ _chef_n_1<8:12> LBL: h7 ARG0: x3 ]
21 | [ def_explicit_q<13:18> LBL: h9 ARG0: x10 [ x PERS: 3 NUM: sg ] RSTR: h11 BODY: h12 ]
22 | [ poss<13:18> LBL: h13 ARG0: e14 [ e SF: prop TENSE: untensed MOOD: indicative PROG: - PERF: - ] ARG1: x10 ARG2: x3 ]
23 | [ _soup_n_1<19:23> LBL: h13 ARG0: x10 ]
24 | [ _accidental_a_1<24:36> LBL: h7 ARG0: e15 [ e SF: prop TENSE: untensed MOOD: indicative PROG: - PERF: - ] ARG1: e16 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] ]
25 | [ _spill_v_1<37:44> LBL: h7 ARG0: e16 ARG1: x10 ARG2: i17 ]
26 | [ _quit_v_1<45:49> LBL: h1 ARG0: e18 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] ARG1: x3 ARG2: i19 ]
27 | [ _and_c<50:53> LBL: h1 ARG0: e2 ARG1: e18 ARG2: e20 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] ]
28 | [ _leave_v_1<54:59> LBL: h1 ARG0: e20 ARG1: x3 ARG2: i21 ] >
29 | HCONS: < h0 qeq h1 h5 qeq h7 h11 qeq h13 > ]
30 |
31 |
32 | Deserialization Functions
33 | -------------------------
34 |
35 | .. function:: load(source)
36 |
37 | See the :func:`load` codec API documentation.
38 |
39 | .. function:: loads(s)
40 |
41 | See the :func:`loads` codec API documentation.
42 |
43 | .. function:: decode(s)
44 |
45 | See the :func:`decode` codec API documentation.
46 |
47 |
48 | Serialization Functions
49 | -----------------------
50 |
51 | .. function:: dump(ms, destination, properties=True, lnk=True, indent=False, encoding='utf-8')
52 |
53 | See the :func:`dump` codec API documentation.
54 |
55 | .. function:: dumps(ms, properties=True, lnk=True, indent=False)
56 |
57 | See the :func:`dumps` codec API documentation.
58 |
59 | .. function:: encode(m, properties=True, lnk=True, indent=False)
60 |
61 | See the :func:`encode` codec API documentation.
62 |
--------------------------------------------------------------------------------
/tests/eds_test.py:
--------------------------------------------------------------------------------
1 |
2 | import pytest
3 |
4 | from delphin.eds import EDS, EDSWarning, Node, from_mrs
5 | from delphin.mrs import EP, MRS, HCons
6 |
7 |
8 | @pytest.fixture
9 | def dogs_bark():
10 | return {
11 | 'top': 'e2',
12 | 'nodes': [Node('e2', '_bark_v_1', type='e', edges={'ARG1': 'x4'}),
13 | Node('_1', 'udef_q', edges={'BV': 'x4'}),
14 | Node('x4', '_dog_n_1', type='x')]
15 | }
16 |
17 |
18 | @pytest.fixture
19 | def dogs_bark_mrs():
20 | return MRS(
21 | top='h0',
22 | index='e2',
23 | rels=[EP('_bark_v_1', label='h1', args={'ARG0': 'e2', 'ARG1': 'x4'}),
24 | EP('udef_q', label='h3',
25 | args={'ARG0': 'x4', 'RSTR': 'h5', 'BODY': 'h6'}),
26 | EP('_dog_n_1', label='h7', args={'ARG0': 'x4'})],
27 | hcons=[HCons.qeq('h0', 'h1'), HCons.qeq('h5', 'h7')]
28 | )
29 |
30 |
31 | def test_empty_EDS():
32 | d = EDS()
33 | assert d.top is None
34 | assert d.nodes == []
35 |
36 |
37 | def test_basic_EDS(dogs_bark):
38 | d = EDS(**dogs_bark)
39 | assert d.top == 'e2'
40 | assert len(d.nodes) == 3
41 |
42 | assert d.nodes[0].predicate == '_bark_v_1'
43 | assert d.nodes[1].predicate == 'udef_q'
44 | assert d.nodes[2].predicate == '_dog_n_1'
45 |
46 | assert d.nodes[0].edges == {'ARG1': 'x4'}
47 | assert d.nodes[1].edges == {'BV': 'x4'}
48 | assert d.nodes[2].edges == {}
49 |
50 | assert len(d.edges) == 2
51 | assert d.edges[0] == ('e2', 'ARG1', 'x4')
52 | assert d.edges[1] == ('_1', 'BV', 'x4')
53 |
54 |
55 | def test_from_mrs(dogs_bark, dogs_bark_mrs):
56 | d = from_mrs(dogs_bark_mrs)
57 | e = EDS(**dogs_bark)
58 | assert d[d.top] == e[e.top] and d.nodes == e.nodes
59 | assert d == e
60 |
61 | # recover TOP from INDEX
62 | dogs_bark_mrs.top = None
63 | d = from_mrs(dogs_bark_mrs)
64 | e = EDS(**dogs_bark)
65 | assert d == e
66 |
67 | # no TOP or INDEX
68 | dogs_bark_mrs.index = None
69 | with pytest.warns(EDSWarning):
70 | d = from_mrs(dogs_bark_mrs)
71 | e = EDS(**{'top': None, 'nodes': dogs_bark['nodes']})
72 | assert d == e
73 |
74 | def test_from_mrs_broken_hcons_issue_319(dogs_bark_mrs):
75 | # broken top
76 | dogs_bark_mrs.rels[0].label = 'h99'
77 | with pytest.warns(EDSWarning):
78 | d = from_mrs(dogs_bark_mrs)
79 | assert d.top == 'e2'
80 |
81 | # it probably rained
82 | m = MRS(
83 | top='h0',
84 | index='e2',
85 | rels=[EP('_probable_a_1', label='h1', args={'ARG0': 'i4', 'ARG1': 'h5'}),
86 | EP('_rain_v_1', label='h6', args={'ARG0': 'e2'})],
87 | hcons=[HCons.qeq('h0', 'h1'), HCons.qeq('h5', 'h6')]
88 | )
89 | # no warning normally
90 | e = from_mrs(m)
91 | # broken hcons
92 | m.rels[1].label = 'h99'
93 | with pytest.warns(EDSWarning):
94 | d = from_mrs(m)
95 | assert len(d.nodes) == 2
96 | assert len(d.arguments()['i4']) == 0
97 |
--------------------------------------------------------------------------------
/delphin/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """
4 | Entry-point for the 'delphin' command.
5 | """
6 |
7 | import argparse
8 | import importlib
9 | import logging
10 | import os
11 | import sys
12 |
13 | import delphin.cli
14 | from delphin import util
15 |
16 | # Default modules need to import the PyDelphin version
17 | from delphin.__about__ import __version__
18 | from delphin.exceptions import PyDelphinException
19 |
20 | logging.basicConfig() # just use defaults here
21 | logger = logging.getLogger(__name__) # for this module
22 |
23 |
24 | def main():
25 | args = parser.parse_args()
26 |
27 | if not hasattr(args, 'func'):
28 | sys.exit(parser.print_help())
29 |
30 | # global init
31 | if args.quiet:
32 | args.verbosity = 0
33 | sys.stdout.close()
34 | sys.stdout = open(os.devnull, 'w')
35 | else:
36 | args.verbosity = min(args.verbosity, 3)
37 |
38 | logging.getLogger('delphin').setLevel(
39 | logging.ERROR - (args.verbosity * 10))
40 |
41 | try:
42 | args.func(args)
43 | except PyDelphinException as exc:
44 | if logger.isEnabledFor(logging.DEBUG):
45 | logger.exception('an error has occurred; see below')
46 | else:
47 | sys.exit(str(exc))
48 |
49 |
50 | parser = argparse.ArgumentParser(
51 | prog='delphin',
52 | description='PyDelphin command-line interface',
53 | )
54 | parser.add_argument(
55 | '-V', '--version', action='version', version='%(prog)s ' + __version__)
56 |
57 |
58 | # Arguments for all commands
59 | common_parser = argparse.ArgumentParser(add_help=False)
60 | common_parser.add_argument(
61 | '-v',
62 | '--verbose',
63 | action='count',
64 | dest='verbosity',
65 | default=0,
66 | help='increase verbosity')
67 | common_parser.add_argument(
68 | '-q',
69 | '--quiet',
70 | action='store_true',
71 | help='suppress output on and ')
72 |
73 |
74 | # Dynamically add subparsers from delphin.cli namespace
75 | subparser = parser.add_subparsers(title='available subcommands', metavar='')
76 | for _name, fullname in util.namespace_modules(delphin.cli).items():
77 | try:
78 | mod = importlib.import_module(fullname)
79 | except ImportError:
80 | logger.exception('could not import %s', fullname)
81 | continue
82 |
83 | try:
84 | INFO = mod.COMMAND_INFO
85 | except AttributeError:
86 | logger.exception('%s does not define COMMAND_INFO', fullname)
87 | continue
88 |
89 | try:
90 | subparser.add_parser(
91 | INFO['name'],
92 | help=INFO.get('help'),
93 | parents=[common_parser, INFO['parser']],
94 | formatter_class=argparse.RawDescriptionHelpFormatter,
95 | description=INFO.get('description'),
96 | )
97 | except KeyError:
98 | logger.exception('required info missing')
99 |
100 |
101 | if __name__ == '__main__':
102 | main()
103 |
--------------------------------------------------------------------------------
/docs/api/delphin.interface.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.interface
3 | =================
4 |
5 | .. automodule:: delphin.interface
6 |
7 | This module manages the communication between data providers, namely
8 | processors like `ACE `_ or
9 | remote services like the `DELPH-IN Web API
10 | `_, and user code or
11 | storage backends, namely [incr tsdb()] :doc:`test suites
12 | `. An interface sends requests to a provider, then
13 | receives and interprets the response. The interface may also detect
14 | and deserialize supported DELPH-IN formats if the appropriate modules
15 | are available.
16 |
17 |
18 | Classes
19 | -------
20 |
21 | .. autoclass:: Processor
22 | :members:
23 |
24 | .. autoclass:: Response
25 | :members:
26 |
27 | .. autoclass:: Result
28 | :members:
29 |
30 |
31 | Exceptions
32 | ----------
33 |
34 | .. autoexception:: InterfaceError
35 | :show-inheritance:
36 |
37 |
38 | .. _preprocessor-example:
39 |
40 | Wrapping a Processor for Preprocessing
41 | --------------------------------------
42 |
43 | The :class:`~delphin.interface.Processor` class can be used to
44 | implement a preprocessor that maintains the same interface as the
45 | underlying processor. The following example wraps an
46 | :class:`~delphin.ace.ACEParser` instance of the
47 | `English Resource Grammar `_ with a
48 | :class:`~delphin.repp.REPP` instance.
49 |
50 | >>> from delphin import interface
51 | >>> from delphin import ace
52 | >>> from delphin import repp
53 | >>>
54 | >>> class REPPWrapper(interface.Processor):
55 | ... def __init__(self, cpu, rpp):
56 | ... self.cpu = cpu
57 | ... self.task = cpu.task
58 | ... self.rpp = rpp
59 | ... def process_item(self, datum, keys=None):
60 | ... preprocessed_datum = str(self.rpp.tokenize(datum))
61 | ... return self.cpu.process_item(preprocessed_datum, keys=keys)
62 | ...
63 | >>> # The preprocessor can be used like a normal Processor:
64 | >>> rpp = repp.REPP.from_config('../../grammars/erg/pet/repp.set')
65 | >>> grm = '../../grammars/erg-2018-x86-64-0.9.30.dat'
66 | >>> with ace.ACEParser(grm, cmdargs=['-y']) as _cpu:
67 | ... cpu = REPPWrapper(_cpu, rpp)
68 | ... response = cpu.process_item('Abrams hired Browne.')
69 | ... for result in response.results():
70 | ... print(result.mrs())
71 | ...
72 |
73 |
74 |
75 | NOTE: parsed 1 / 1 sentences, avg 1173k, time 0.00986s
76 |
77 | A similar technique could be used to manage external processes, such
78 | as `MeCab `_ for morphological
79 | segmentation of Japanese for `Jacy
80 | `_. It could also be
81 | used to make a postprocessor, a backoff mechanism in case an input
82 | fails to parse, etc.
83 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "PyDelphin"
7 | dynamic = ["version"]
8 | description = "Libraries and scripts for DELPH-IN data"
9 | readme = "README.md"
10 | requires-python = ">=3.9"
11 | license = "MIT"
12 | authors = [
13 | {name = "Michael Wayne Goodman", email = "goodman.m.w@gmail.com"}
14 | ]
15 | keywords = ["nlp", "semantics", "hpsg", "delph-in", "linguistics"]
16 | classifiers = [
17 | "Development Status :: 5 - Production/Stable",
18 | "Environment :: Console",
19 | "Intended Audience :: Developers",
20 | "Intended Audience :: Information Technology",
21 | "Intended Audience :: Science/Research",
22 | "License :: OSI Approved :: MIT License",
23 | "Programming Language :: Python :: 3",
24 | "Programming Language :: Python :: 3.9",
25 | "Programming Language :: Python :: 3.10",
26 | "Programming Language :: Python :: 3.11",
27 | "Programming Language :: Python :: 3.12",
28 | "Programming Language :: Python :: 3.13",
29 | "Topic :: Scientific/Engineering :: Information Analysis",
30 | "Topic :: Software Development :: Libraries :: Python Modules",
31 | "Topic :: Text Processing :: Linguistic",
32 | "Topic :: Utilities",
33 | ]
34 | dependencies = [
35 | "penman",
36 | "progress",
37 | "Pygments",
38 | ]
39 |
40 | [project.optional-dependencies]
41 | web = [
42 | "falcon",
43 | "httpx",
44 | ]
45 | repp = [
46 | "regex"
47 | ]
48 |
49 | [project.scripts]
50 | delphin = "delphin.main:main"
51 |
52 | [project.urls]
53 | Homepage = "https://github.com/delph-in/pydelphin"
54 | Documentation = "https://pydelphin.readthedocs.io"
55 | Changelog = "https://github.com/delph-in/pydelphin/blob/main/CHANGELOG.md"
56 |
57 | [tool.hatch.version]
58 | path = "delphin/__about__.py"
59 |
60 | [tool.hatch.build.targets.sdist]
61 | exclude = [
62 | "/.github",
63 | ]
64 | [tool.hatch.build.targets.wheel]
65 | packages = ["delphin"]
66 |
67 | [tool.hatch.envs.dev]
68 | dependencies = [
69 | "pytest",
70 | "ruff",
71 | "mypy",
72 | ]
73 | [tool.hatch.envs.dev.scripts]
74 | test = "pytest {args:.}"
75 | lint = "ruff check {args:delphin/}"
76 | typecheck = "mypy --namespace-packages --explicit-package-bases --ignore-missing-imports --disable-error-code=method-assign {args:delphin/}"
77 |
78 | [tool.hatch.envs.docs]
79 | dependencies = [
80 | "sphinx",
81 | "sphinx-copybutton",
82 | "furo",
83 | "httpx",
84 | "falcon",
85 | ]
86 | [tool.hatch.envs.docs.scripts]
87 | build = "make -C docs html"
88 | clean = "make -C docs clean"
89 |
90 | [tool.ruff]
91 | target-version = "py39"
92 | line-length = 79
93 |
94 | [tool.ruff.lint]
95 | select = [
96 | "B", # flake8-bugbear
97 | "E", # pycodestyle errors
98 | "I", # isort
99 | "F", # Pyflakes
100 | "W", # pycodestyle warnings
101 | ]
102 |
103 | [tool.ruff.lint.isort]
104 | combine-as-imports = true
105 | force-wrap-aliases = true
106 |
107 | [tool.ruff.format]
108 | quote-style = "single"
109 |
110 | [tool.mypy]
111 | python_version = "3.9"
112 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.indexedmrs.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.indexedmrs
3 | =========================
4 |
5 | .. automodule:: delphin.codecs.indexedmrs
6 |
7 | The Indexed MRS format does not include role names such as `ARG1`,
8 | `ARG2`, etc., so the order of the arguments in a predication is
9 | important. For this reason, serialization with the Indexed MRS
10 | format requires the use of a SEM-I (see the :mod:`delphin.semi`
11 | module).
12 |
13 | Example:
14 |
15 | * *The new chef whose soup accidentally spilled quit and left.*
16 |
17 | ::
18 |
19 | < h0, e2:PROP:PAST:INDICATIVE:-:-,
20 | { h4:_the_q<0:3>(x3:3:SG:GENDER:+:PT, h5, h6),
21 | h7:_new_a_1<4:7>(e8:PROP:UNTENSED:INDICATIVE:BOOL:-, x3),
22 | h7:_chef_n_1<8:12>(x3),
23 | h9:def_explicit_q<13:18>(x10:3:SG:GENDER:BOOL:PT, h11, h12),
24 | h13:poss<13:18>(e14:PROP:UNTENSED:INDICATIVE:-:-, x10, x3),
25 | h13:_soup_n_1<19:23>(x10),
26 | h7:_accidental_a_1<24:36>(e15:PROP:UNTENSED:INDICATIVE:-:-, e16:PROP:PAST:INDICATIVE:-:-),
27 | h7:_spill_v_1<37:44>(e16, x10, i17),
28 | h1:_quit_v_1<45:49>(e18:PROP:PAST:INDICATIVE:-:-, x3, i19),
29 | h1:_and_c<50:53>(e2, e18, e20:PROP:PAST:INDICATIVE:-:-),
30 | h1:_leave_v_1<54:59>(e20, x3, i21) },
31 | { h0 qeq h1,
32 | h5 qeq h7,
33 | h11 qeq h13 } >
34 |
35 |
36 | Deserialization Functions
37 | -------------------------
38 |
39 | .. function:: load(source, semi)
40 |
41 | See the :func:`load` codec API documentation.
42 |
43 | **Extensions:**
44 |
45 | :param SemI semi: the semantic interface for the grammar
46 | that produced the MRS
47 |
48 | .. function:: loads(s, semi)
49 |
50 | See the :func:`loads` codec API documentation.
51 |
52 | **Extensions:**
53 |
54 | :param SemI semi: the semantic interface for the grammar
55 | that produced the MRS
56 |
57 | .. function:: decode(s, semi)
58 |
59 | See the :func:`decode` codec API documentation.
60 |
61 | **Extensions:**
62 |
63 | :param SemI semi: the semantic interface for the grammar
64 | that produced the MRS
65 |
66 | Serialization Functions
67 | -----------------------
68 |
69 | .. function:: dump(ms, destination, semi, properties=True, lnk=True, indent=False, encoding='utf-8')
70 |
71 | See the :func:`dump` codec API documentation.
72 |
73 | **Extensions:**
74 |
75 | :param SemI semi: the semantic interface for the grammar
76 | that produced the MRS
77 |
78 | .. function:: dumps(ms, semi, properties=True, lnk=True, indent=False)
79 |
80 | See the :func:`dumps` codec API documentation.
81 |
82 | **Extensions:**
83 |
84 | :param SemI semi: the semantic interface for the grammar
85 | that produced the MRS
86 |
87 | .. function:: encode(m, semi, properties=True, lnk=True, indent=False)
88 |
89 | See the :func:`encode` codec API documentation.
90 |
91 | **Extensions:**
92 |
93 | :param SemI semi: the semantic interface for the grammar
94 | that produced the MRS
95 |
--------------------------------------------------------------------------------
/docs/api/delphin.lnk.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.lnk
3 | =================
4 |
5 | .. automodule:: delphin.lnk
6 |
7 | In DELPH-IN semantic representations, entities are aligned to the
8 | input surface string is through the so-called "lnk" (pronounced
9 | "link") values. There are four types of lnk values which align to the
10 | surface in different ways:
11 |
12 | * Character spans (also called "characterization pointers"); e.g.,
13 | `<0:4>`
14 |
15 | * Token indices; e.g., `<0 1 3>`
16 |
17 | * Chart vertex spans; e.g., `<0#2>`
18 |
19 | * Edge identifier; e.g., `<@42>`
20 |
21 | The latter two are unlikely to be encountered by users. Chart vertices
22 | were used by the `PET`_ parser but are now essentially deprecated and
23 | edge identifiers are only used internally in the `LKB`_ for
24 | generation. I will therefore focus on the first two kinds.
25 |
26 | .. _`PET`: https://github.com/delph-in/docs/wiki/PetTop
27 | .. _`LKB`: https://github.com/delph-in/docs/wiki/LkbTop
28 |
29 | Character spans (sometimes called "characterization pointers") are by
30 | far the most commonly used type---possibly even the only type most
31 | users will encounter. These spans indicate the positions *between*
32 | characters in the input string that correspond to a semantic entity,
33 | similar to how Python and Perl do string indexing. For example,
34 | `<0:4>` would capture the first through fourth characters---a span
35 | that would correspond to the first word in a sentence like "Dogs
36 | bark". These spans assume the input is a flat, or linear, string and
37 | can only select contiguous chunks. Character spans are used by REPP
38 | (the Regular Expression PreProcessor; see :mod:`delphin.repp`) to
39 | track the surface alignment prior to string changes introduced by
40 | tokenization.
41 |
42 | Token indices select input tokens rather than characters. This method,
43 | though not widely used, is more suitable for input sources that are
44 | not flat strings (e.g., a lattice of automatic speech recognition
45 | (ASR) hypotheses), or where non-contiguous sequences are needed (e.g.,
46 | from input containing markup or other noise).
47 |
48 | .. note::
49 |
50 | Much of this background is from comments in the `LKB`_ source code:
51 | See: http://svn.emmtee.net/trunk/lingo/lkb/src/mrs/lnk.lisp
52 |
53 | Support for lnk values in PyDelphin is rather simple. The :class:`Lnk`
54 | class is able to parse lnk strings and model the contents for
55 | serialization of semantic representations. In addition, semantic
56 | entities such as DMRS :class:`Nodes ` and MRS
57 | :class:`EPs ` have `cfrom` and `cto` attributes which
58 | are the start and end pointers for character spans (defaulting to `-1`
59 | if a character span is not specified for the entity).
60 |
61 |
62 | Classes
63 | -------
64 |
65 | .. autoclass:: Lnk
66 | :members:
67 | .. autoclass:: LnkMixin
68 | :members:
69 |
70 |
71 | Exceptions
72 | ----------
73 |
74 | .. autoexception:: LnkError
75 | :show-inheritance:
76 |
--------------------------------------------------------------------------------
/delphin/cli/mkprof.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | This command creates testsuites. There are four usage patterns:
4 |
5 | delphin mkprof --input=sentences.txt --relations=RELATIONS ...
6 | delphin mkprof --relations=RELATIONS ... < sentences.txt
7 | delphin mkprof --source=TESTSUITE ...
8 | delphin mkprof --refresh ...
9 |
10 | The first two read sentences (one per line; '*' in the first column
11 | indicates ungrammaticality) from --input or and --relations
12 | is required. The second two use an existing testsuite; --relations
13 | defaults to that of --source; --refresh reads and overwrites DEST.
14 |
15 | By default, testsuites are skeletons as from the `mkprof` utility of
16 | `art`, where the tsdb-core files (e.g., 'item') are non-empty but all
17 | other tables exist as empty files. The --full option, with --source,
18 | will copy a full profile, while the --skeleton option will only write
19 | the tsdb-core files and 'relations' file.
20 | """
21 |
22 | import argparse
23 |
24 | from delphin.commands import mkprof
25 |
26 | parser = argparse.ArgumentParser(add_help=False) # filled out below
27 |
28 | COMMAND_INFO = {
29 | 'name': 'mkprof',
30 | 'help': 'Create [incr tsdb()] test suites',
31 | 'description': __doc__,
32 | 'parser': parser
33 | }
34 |
35 |
36 | def call_mkprof(args):
37 | return mkprof(
38 | args.DEST,
39 | source=args.source or args.input,
40 | schema=args.relations,
41 | where=args.where,
42 | delimiter=args.delimiter,
43 | refresh=args.refresh,
44 | skeleton=args.skeleton,
45 | full=args.full,
46 | gzip=args.gzip)
47 |
48 |
49 | parser.set_defaults(func=call_mkprof)
50 | parser.add_argument(
51 | 'DEST', help='directory for the destination (output) testsuite')
52 |
53 | grp1 = parser.add_mutually_exclusive_group()
54 | grp1.add_argument(
55 | '-s', '--source', metavar='DIR', help='path to a testsuite directory')
56 | grp1.add_argument(
57 | '--refresh',
58 | action='store_true',
59 | help='overwrite DEST (works with --relations or --gzip)')
60 | grp1.add_argument(
61 | '-i',
62 | '--input',
63 | metavar='TXT',
64 | help='file of test sentences (* sents are ungrammatical)')
65 |
66 | parser.add_argument(
67 | '--where', metavar='CONDITION',
68 | help=('filter records in the testsuite with a TSQL condition '
69 | '(e.g., \'i-length <= 10 && readings > 0\')'))
70 | parser.add_argument(
71 | '-r',
72 | '--relations',
73 | metavar='FILE',
74 | help='relations file to use for destination testsuite')
75 | parser.add_argument(
76 | '--delimiter',
77 | metavar='C',
78 | help=('split input lines with delimiter C; if C="@", split as a '
79 | 'TSDB record; a header of field names is required')
80 | )
81 |
82 | grp2 = parser.add_mutually_exclusive_group()
83 | grp2.add_argument(
84 | '--full',
85 | action='store_true',
86 | help='write all tables (must be used with --source)')
87 | grp2.add_argument(
88 | '--skeleton',
89 | action='store_true',
90 | help='write only tsdb-core files for skeletons')
91 |
92 | parser.add_argument(
93 | '-z', '--gzip', action='store_true', help='compress table files with gzip')
94 |
--------------------------------------------------------------------------------
/delphin/cli/process.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | Use a processor (namely ACE) to process each item in the [incr tsdb()]
4 | testsuite given by --source (TESTSUITE if --source is not given). For
5 | standard [incr tsdb()] schemata, input items given by the following
6 | selectors for each task (configurable via the --select option):
7 |
8 | * parse: i-input
9 | * transfer: mrs
10 | * generate: mrs
11 |
12 | In addition, the following TSQL condition is applied if --source is a
13 | standard [incr tsdb()] profile and --all-items is not used:
14 |
15 | where i-wf != 2
16 | """
17 |
18 | import argparse
19 | import shlex
20 |
21 | from delphin.commands import process
22 |
23 | parser = argparse.ArgumentParser(add_help=False) # filled out below
24 |
25 | COMMAND_INFO = {
26 | 'name': 'process',
27 | 'help': 'Process [incr tsdb()] test suites using ACE',
28 | 'description': __doc__,
29 | 'parser': parser
30 | }
31 |
32 |
33 | def call_process(args):
34 | return process(
35 | args.grammar,
36 | args.TESTSUITE,
37 | source=args.source,
38 | select=args.select,
39 | generate=args.generate,
40 | transfer=args.transfer,
41 | full_forest=args.full_forest,
42 | options=shlex.split(args.options),
43 | all_items=args.all_items,
44 | result_id=args.p,
45 | gzip=args.gzip,
46 | executable=args.executable)
47 |
48 |
49 | # process subparser
50 | parser.set_defaults(func=call_process)
51 | parser.add_argument(
52 | 'TESTSUITE', help='target testsuite'
53 | )
54 | parser.add_argument(
55 | '-g', '--grammar', metavar='GRM', required=True,
56 | help='compiled grammar image'
57 | )
58 | parser.add_argument(
59 | '-o', '--options', metavar='OPTIONS', type=str, default='',
60 | help='ACE options (see https://github.com/delph-in/docs/wiki/AceOptions)'
61 | )
62 | parser.add_argument(
63 | '-s', '--source', metavar='PATH',
64 | help='source testsuite; if unset, set to TESTSUITE'
65 | )
66 | parser.add_argument(
67 | '--select', metavar='QUERY',
68 | help=('TSQL query for selecting processor inputs (e.g., '
69 | '\'i-input where i-length < 10\'; see above for defaults)')
70 | )
71 | parser.add_argument(
72 | '--all-items', action='store_true',
73 | help='don\'t exclude ignored items (i-wf==2) in parsing'
74 | )
75 | grp1 = parser.add_mutually_exclusive_group()
76 | grp1.add_argument(
77 | '-e', '--generate', action='store_true',
78 | help='generation mode (--source is strongly encouraged)'
79 | )
80 | grp1.add_argument(
81 | '-t', '--transfer', action='store_true',
82 | help='transfer mode (--source is strongly encouraged)'
83 | )
84 | grp1.add_argument(
85 | '--full-forest', action='store_true',
86 | help='full-forest parsing mode (record the full parse chart)'
87 | )
88 | parser.add_argument(
89 | '-p', metavar='RID',
90 | help=('transfer or generate from result with result-id=RID; '
91 | 'short for adding \'where result-id==RID\' to --select')
92 | )
93 | parser.add_argument(
94 | '-z', '--gzip', action='store_true', help='compress table files with gzip')
95 | parser.add_argument(
96 | '--executable', metavar='PATH', default='ace',
97 | help='path to ACE executable (default: ace)')
98 |
--------------------------------------------------------------------------------
/docs/api/delphin.semi.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.semi
3 | ============
4 |
5 | .. automodule:: delphin.semi
6 |
7 | Semantic interfaces (SEM-Is) describe the inventory of semantic
8 | components in a grammar, including variables, properties, roles,
9 | and predicates. This information can be used for validating
10 | semantic structures or for filling out missing information in
11 | incomplete representations.
12 |
13 | .. seealso::
14 | The following DELPH-IN wikis contain more information:
15 |
16 | - Technical specifications: https://github.com/delph-in/docs/wiki/SemiRfc
17 | - Overview and usage: https://github.com/delph-in/docs/wiki/RmrsSemi
18 |
19 |
20 | Loading a SEM-I from a File
21 | ---------------------------
22 |
23 | The :func:`load` module function is used to read the regular
24 | file-based SEM-I definitions, but there is also a dictionary
25 | representation which may be useful for JSON serialization, e.g.,
26 | for an HTTP API that makes use of SEM-Is. See
27 | :meth:`SemI.to_dict()` for the later.
28 |
29 | .. autofunction:: load
30 |
31 |
32 | The SemI Class
33 | --------------
34 |
35 | The main class modeling a semantic interface is :class:`SemI`. The
36 | predicate synopses have enough complexity that two more subclasses
37 | are used to make inspection easier: :class:`Synopsis` contains the
38 | role information for an individual predicate synopsis, and each role
39 | is modeled with a :class:`SynopsisRole` class.
40 |
41 | .. autoclass:: SemI
42 |
43 | The data in the SEM-I can be directly inspected via the
44 | :attr:`variables`, :attr:`properties`, :attr:`roles`, and
45 | :attr:`predicates` attributes.
46 |
47 | >>> smi = semi.load('../grammars/erg/etc/erg.smi')
48 | >>> smi.variables['e']
49 |
50 | >>> smi.variables['e'].parents
51 | ['i']
52 | >>> smi.variables['e'].data
53 | [('SF', 'sf'), ('TENSE', 'tense'), ('MOOD', 'mood'), ('PROG', 'bool'), ('PERF', 'bool')]
54 | >>> 'sf' in smi.properties
55 | True
56 | >>> smi.roles['ARG0']
57 | 'i'
58 | >>> for synopsis in smi.predicates['can_able'].data:
59 | ... print(', '.join('{0.name} {0.value}'.format(roledata)
60 | ... for roledata in synopsis))
61 | ...
62 | ARG0 e, ARG1 i, ARG2 p
63 | >>> smi.predicates.descendants('some_q')
64 | ['_another_q', '_many+a_q', '_an+additional_q', '_what+a_q', '_such+a_q', '_some_q_indiv', '_some_q', '_a_q']
65 |
66 | Note that the variables, properties, and predicates are
67 | :class:`~delphin.tfs.TypeHierarchy` objects.
68 |
69 | .. automethod:: find_synopsis
70 | .. automethod:: from_dict
71 | .. automethod:: to_dict
72 |
73 | .. autoclass:: Synopsis(roles)
74 | :members:
75 |
76 | .. autoclass:: SynopsisRole(name, value, properties=None, optional=False)
77 |
78 |
79 | Exceptions and Warnings
80 | -----------------------
81 |
82 | .. autoexception:: SemIError
83 | :show-inheritance:
84 |
85 | .. autoexception:: SemISyntaxError
86 | :show-inheritance:
87 |
88 | .. autoexception:: SemIWarning
89 | :show-inheritance:
90 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | Examples of behavior that contributes to creating a positive environment include:
10 |
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 |
17 | Examples of unacceptable behavior by participants include:
18 |
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 |
25 | ## Our Responsibilities
26 |
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 |
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | ## Scope
32 |
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 |
35 | ## Enforcement
36 |
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at goodman.m.w@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 |
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 |
41 | ## Attribution
42 |
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 |
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.dmrstikz.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.dmrstikz
3 | =======================
4 |
5 | .. automodule:: delphin.codecs.dmrstikz
6 |
7 | This requires LaTeX and the `tikz-dependency
8 | `_ package.
9 |
10 | Example:
11 |
12 | * *The new chef whose soup accidentally spilled quit and left.*
13 |
14 | ::
15 |
16 | \documentclass{standalone}
17 |
18 | \usepackage{tikz-dependency}
19 | \usepackage{relsize}
20 |
21 | %%%
22 | %%% style for dmrs graph
23 | %%%
24 | \depstyle{dmrs}{edge unit distance=1.5ex,
25 | label style={above, scale=.9, opacity=0, text opacity=1},
26 | baseline={([yshift=-0.7\baselineskip]current bounding box.north)}}
27 | %%% set text opacity=0 to hide text, opacity = 0 to hide box
28 | \depstyle{root}{edge unit distance=3ex, label style={opacity=1}}
29 | \depstyle{arg}{edge above}
30 | \depstyle{rstr}{edge below, dotted, label style={text opacity=1}}
31 | \depstyle{eq}{edge below, label style={text opacity=1}}
32 | \depstyle{icons}{edge below, dashed}
33 | \providecommand{\named}{}
34 | \renewcommand{\named}{named}
35 |
36 | %%% styles for predicates and roles (from mrs.sty)
37 | \providecommand{\spred}{}
38 | \renewcommand{\spred}[1]{\mbox{\textsf{#1}}}
39 | \providecommand{\srl}{}
40 | \renewcommand{\srl}[1]{\mbox{\textsf{\smaller #1}}}
41 | %%%
42 |
43 | \begin{document}
44 | \begin{dependency}[dmrs]
45 | \begin{deptext}[column sep=10pt]
46 | \spred{\_the\_q} \& % node 1
47 | \spred{\_new\_a\_1} \& % node 2
48 | \spred{\_chef\_n\_1} \& % node 3
49 | \spred{def\_explicit\_q} \& % node 4
50 | \spred{poss} \& % node 5
51 | \spred{\_soup\_n\_1} \& % node 6
52 | \spred{\_accidental\_a\_1} \& % node 7
53 | \spred{\_spill\_v\_1} \& % node 8
54 | \spred{\_quit\_v\_1} \& % node 9
55 | \spred{\_and\_c} \& % node 10
56 | \spred{\_leave\_v\_1} \\ % node 11
57 | \end{deptext}
58 | \deproot[root]{9}{\srl{TOP}}
59 | \depedge[rstr]{1}{3}{\srl{RSTR/H}}
60 | \depedge[eq]{2}{3}{\srl{ARG1/EQ}}
61 | \depedge[rstr]{4}{6}{\srl{RSTR/H}}
62 | \depedge[eq]{5}{6}{\srl{ARG1/EQ}}
63 | \depedge[arg]{5}{3}{\srl{ARG2/NEQ}}
64 | \depedge[eq]{7}{8}{\srl{ARG1/EQ}}
65 | \depedge[arg]{8}{6}{\srl{ARG1/NEQ}}
66 | \depedge[arg]{9}{3}{\srl{ARG1/NEQ}}
67 | \depedge[eq]{10}{9}{\srl{ARG1/EQ}}
68 | \depedge[eq]{10}{11}{\srl{ARG2/EQ}}
69 | \depedge[arg]{11}{3}{\srl{ARG1/NEQ}}
70 | \depedge[eq]{8}{3}{\srl{MOD/EQ}}
71 | \depedge[eq]{11}{9}{\srl{MOD/EQ}}
72 | % \depedge[icons]{f}{t}{FOCUS}
73 | \end{dependency}
74 |
75 | \end{document}
76 |
77 | This renders as the following:
78 |
79 | .. image:: ../_static/dmrs-tikz-pdf.png
80 |
81 |
82 | Serialization Functions
83 | -----------------------
84 |
85 | .. function:: dump(ms, destination, properties=True, lnk=True, indent=False, encoding='utf-8')
86 |
87 | See the :func:`dump` codec API documentation.
88 |
89 | .. function:: dumps(ms, properties=True, lnk=True, indent=False)
90 |
91 | See the :func:`dumps` codec API documentation.
92 |
93 | .. function:: encode(m, properties=True, lnk=True, indent=False)
94 |
95 | See the :func:`encode` codec API documentation.
96 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.edspenman.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.edspenman
3 | ========================
4 |
5 | .. automodule:: delphin.codecs.edspenman
6 |
7 | Example:
8 |
9 | * *The new chef whose soup accidentally spilled quit and left.*
10 |
11 | ::
12 |
13 | (e18 / _quit_v_1
14 | :lnk "<45:49>"
15 | :type e
16 | :sf prop
17 | :tense past
18 | :mood indicative
19 | :prog -
20 | :perf -
21 | :ARG1 (x3 / _chef_n_1
22 | :lnk "<8:12>"
23 | :type x
24 | :pers 3
25 | :num sg
26 | :ind +
27 | :BV-of (_1 / _the_q
28 | :lnk "<0:3>")
29 | :ARG1-of (e8 / _new_a_1
30 | :lnk "<4:7>"
31 | :type e
32 | :sf prop
33 | :tense untensed
34 | :mood indicative
35 | :prog bool
36 | :perf -)
37 | :ARG2-of (e14 / poss
38 | :lnk "<13:18>"
39 | :type e
40 | :sf prop
41 | :tense untensed
42 | :mood indicative
43 | :prog -
44 | :perf -
45 | :ARG1 (x10 / _soup_n_1
46 | :lnk "<19:23>"
47 | :type x
48 | :pers 3
49 | :num sg
50 | :BV-of (_2 / def_explicit_q
51 | :lnk "<13:18>")
52 | :ARG1-of (e16 / _spill_v_1
53 | :lnk "<37:44>"
54 | :type e
55 | :sf prop
56 | :tense past
57 | :mood indicative
58 | :prog -
59 | :perf -
60 | :ARG1-of (e15 / _accidental_a_1
61 | :lnk "<24:36>"
62 | :type e
63 | :sf prop
64 | :tense untensed
65 | :mood indicative
66 | :prog -
67 | :perf -)))))
68 | :ARG1-of (e2 / _and_c
69 | :lnk "<50:53>"
70 | :type e
71 | :sf prop
72 | :tense past
73 | :mood indicative
74 | :prog -
75 | :perf -
76 | :ARG2 (e20 / _leave_v_1
77 | :lnk "<54:59>"
78 | :type e
79 | :sf prop
80 | :tense past
81 | :mood indicative
82 | :prog -
83 | :perf -
84 | :ARG1 x3)))
85 |
86 |
87 | Deserialization Functions
88 | -------------------------
89 |
90 | .. function:: load(source)
91 |
92 | See the :func:`load` codec API documentation.
93 |
94 | .. function:: loads(s)
95 |
96 | See the :func:`loads` codec API documentation.
97 |
98 | .. function:: decode(s)
99 |
100 | See the :func:`decode` codec API documentation.
101 |
102 |
103 | Serialization Functions
104 | -----------------------
105 |
106 | .. function:: dump(ms, destination, properties=True, lnk=True, indent=False, encoding='utf-8')
107 |
108 | See the :func:`dump` codec API documentation.
109 |
110 | .. function:: dumps(ms, properties=True, lnk=True, indent=False)
111 |
112 | See the :func:`dumps` codec API documentation.
113 |
114 | .. function:: encode(m, properties=True, lnk=True, indent=False)
115 |
116 | See the :func:`encode` codec API documentation.
117 |
118 | Complementary Functions
119 | -----------------------
120 |
121 | .. autofunction:: from_triples
122 | .. autofunction:: to_triples
123 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.dmrspenman.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.dmrspenman
3 | =========================
4 |
5 | .. automodule:: delphin.codecs.dmrspenman
6 |
7 | Example:
8 |
9 | * *The new chef whose soup accidentally spilled quit and left.*
10 |
11 | ::
12 |
13 | (e9 / _quit_v_1
14 | :lnk "<45:49>"
15 | :cvarsort e
16 | :sf prop
17 | :tense past
18 | :mood indicative
19 | :prog -
20 | :perf -
21 | :ARG1-NEQ (x3 / _chef_n_1
22 | :lnk "<8:12>"
23 | :cvarsort x
24 | :pers 3
25 | :num sg
26 | :ind +
27 | :RSTR-H-of (q1 / _the_q
28 | :lnk "<0:3>")
29 | :ARG1-EQ-of (e2 / _new_a_1
30 | :lnk "<4:7>"
31 | :cvarsort e
32 | :sf prop
33 | :tense untensed
34 | :mood indicative
35 | :prog bool
36 | :perf -)
37 | :ARG2-NEQ-of (e5 / poss
38 | :lnk "<13:18>"
39 | :cvarsort e
40 | :sf prop
41 | :tense untensed
42 | :mood indicative
43 | :prog -
44 | :perf -
45 | :ARG1-EQ (x6 / _soup_n_1
46 | :lnk "<19:23>"
47 | :cvarsort x
48 | :pers 3
49 | :num sg
50 | :RSTR-H-of (q4 / def_explicit_q
51 | :lnk "<13:18>")))
52 | :MOD-EQ-of (e8 / _spill_v_1
53 | :lnk "<37:44>"
54 | :cvarsort e
55 | :sf prop
56 | :tense past
57 | :mood indicative
58 | :prog -
59 | :perf -
60 | :ARG1-NEQ x6
61 | :ARG1-EQ-of (e7 / _accidental_a_1
62 | :lnk "<24:36>"
63 | :cvarsort e
64 | :sf prop
65 | :tense untensed
66 | :mood indicative
67 | :prog -
68 | :perf -)))
69 | :ARG1-EQ-of (e10 / _and_c
70 | :lnk "<50:53>"
71 | :cvarsort e
72 | :sf prop
73 | :tense past
74 | :mood indicative
75 | :prog -
76 | :perf -
77 | :ARG2-EQ (e11 / _leave_v_1
78 | :lnk "<54:59>"
79 | :cvarsort e
80 | :sf prop
81 | :tense past
82 | :mood indicative
83 | :prog -
84 | :perf -
85 | :ARG1-NEQ x3
86 | :MOD-EQ e9)))
87 |
88 |
89 | Deserialization Functions
90 | -------------------------
91 |
92 | .. function:: load(source)
93 |
94 | See the :func:`load` codec API documentation.
95 |
96 | .. function:: loads(s)
97 |
98 | See the :func:`loads` codec API documentation.
99 |
100 | .. function:: decode(s)
101 |
102 | See the :func:`decode` codec API documentation.
103 |
104 | Serialization Functions
105 | -----------------------
106 |
107 | .. function:: dump(ms, destination, properties=True, lnk=True, indent=False, encoding='utf-8')
108 |
109 | See the :func:`dump` codec API documentation.
110 |
111 | .. function:: dumps(ms, properties=True, lnk=True, indent=False)
112 |
113 | See the :func:`dumps` codec API documentation.
114 |
115 | .. function:: encode(m, properties=True, lnk=True, indent=False)
116 |
117 | See the :func:`encode` codec API documentation.
118 |
119 | Complementary Functions
120 | -----------------------
121 |
122 | .. autofunction:: from_triples
123 | .. autofunction:: to_triples
124 |
--------------------------------------------------------------------------------
/tests/predicate_test.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from delphin import predicate
4 |
5 |
6 | def test_split():
7 | split = predicate.split
8 | # normalized and string type
9 | assert split('_dog_n_1') == split('"_dog_n_1_rel"') == ('dog', 'n', '1')
10 | # some odd variations (some are not strictly well-formed)
11 | assert split('_24/7_a_1_rel') == ('24/7', 'a', '1')
12 | assert split('_a+bit_q_rel') == ('a+bit', 'q', None)
13 | assert split('_A$_n_1_rel') == ('A$', 'n', '1')
14 | assert split('_only_child_n_1_rel') == ('only_child', 'n', '1')
15 | # also (attempt to) split abstract predicates
16 | assert split('pron_rel') == ('pron', None, None)
17 | assert split('udef_q_rel') == ('udef', 'q', None)
18 | assert split('coord') == ('coord', None, None)
19 | assert split('some_relation') == ('some', None, 'relation')
20 |
21 |
22 | def test_create():
23 | create = predicate.create
24 | assert create('dog', 'n', '1') == '_dog_n_1'
25 | assert create('some', 'q') == '_some_q'
26 | with pytest.raises(TypeError):
27 | create('pron')
28 | with pytest.raises(predicate.PredicateError):
29 | create('lemma', 'b')
30 | with pytest.raises(predicate.PredicateError):
31 | create('lemma space', 'a')
32 | with pytest.raises(predicate.PredicateError):
33 | create('lemma', 'a', 'sense space')
34 |
35 |
36 | def test_normalize():
37 | nps = predicate.normalize
38 | assert nps('pron_rel') == 'pron'
39 | assert nps('pron_rel_rel') == 'pron_rel' # i hope nobody does this
40 | assert nps('"udef_q_rel"') == 'udef_q'
41 | assert nps('\'udef_q_rel') == 'udef_q'
42 | assert nps('_dog_n_1_rel') == '_dog_n_1'
43 | assert nps('_DELPH-IN_n_1') == '_delph-in_n_1'
44 |
45 |
46 | def test_is_valid():
47 | ivps = predicate.is_valid
48 | # valid
49 | assert ivps('pron_rel')
50 | assert ivps('\'pron_rel') # single open qoute
51 | assert ivps('"pron_rel"') # surrounding double-quotes
52 | assert ivps('udef_q_rel')
53 | assert ivps('"_dog_n_1_rel"')
54 | assert ivps('"_ad+hoc_a_1_rel"')
55 | assert ivps('"_look_v_up-at_rel"')
56 | assert ivps('_24/7_a_1_rel')
57 | assert ivps('_a+bit_q_rel')
58 | assert ivps('_A$_n_1_rel')
59 | assert ivps('coord')
60 | assert ivps('_dog_n_1')
61 | assert ivps('_dog_n')
62 | # invalid
63 | assert not ivps('_dog_rel')
64 | assert not ivps('_dog_1_rel')
65 | assert not ivps('_only_child_n_1_rel')
66 |
67 |
68 | def test_is_surface():
69 | is_s = predicate.is_surface
70 | # valid
71 | assert not is_s('pron_rel')
72 | assert not is_s('\'pron_rel') # single open qoute
73 | assert not is_s('"pron_rel"') # surrounding double-quotes
74 | assert not is_s('udef_q')
75 | assert is_s('"_dog_n_1_rel"')
76 | assert is_s('"_ad+hoc_a_1_rel"')
77 | assert is_s('"_look_v_up-at_rel"')
78 | assert is_s('_24/7_a_1_rel')
79 | assert is_s('_a+bit_q_rel')
80 | assert is_s('_A$_n_1_rel')
81 | assert not is_s('coord')
82 | assert is_s('_dog_n_1')
83 | assert is_s('_dog_n')
84 | # invalid
85 | assert not is_s('_dog_rel')
86 | assert not is_s('_dog_1_rel')
87 | assert not is_s('_only_child_n_1_rel')
88 | assert not is_s('_a space_n_1')
89 |
90 |
91 | def test_is_abstract():
92 | is_a = predicate.is_abstract
93 | # valid
94 | assert is_a('pron_rel')
95 | assert is_a('\'pron_rel') # single open qoute
96 | assert is_a('"pron_rel"') # surrounding double-quotes
97 | assert is_a('udef_q')
98 | assert is_a('coord')
99 | assert not is_a('"_dog_n_1_rel"')
100 | # invalid
101 | assert not is_a('a space_n_1')
102 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. PyDelphin documentation master file, created by
2 | sphinx-quickstart on Mon Jun 11 22:08:46 2018.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | PyDelphin
7 | =========
8 |
9 | .. sidebar:: Quick Links
10 |
11 | - `Project page `_
12 | - `How to contribute `_
13 | - `Report a bug `_
14 | - `Changelog `_
15 | - `Code of conduct `_
16 | - `License (MIT) `_
17 |
18 | .. toctree::
19 | :maxdepth: 1
20 | :caption: Guides:
21 |
22 | guides/setup.rst
23 | guides/walkthrough.rst
24 | guides/semantics.rst
25 | guides/ace.rst
26 | guides/commands.rst
27 | guides/itsdb.rst
28 | guides/edm.rst
29 | guides/developer.rst
30 |
31 | .. toctree::
32 | :maxdepth: 1
33 | :caption: API Reference:
34 | :hidden:
35 |
36 | api/delphin.ace.rst
37 | api/delphin.cli.rst
38 | api/delphin.codecs.rst
39 | api/delphin.commands.rst
40 | api/delphin.derivation.rst
41 | api/delphin.dmrs.rst
42 | api/delphin.edm.rst
43 | api/delphin.eds.rst
44 | api/delphin.exceptions.rst
45 | api/delphin.hierarchy.rst
46 | api/delphin.highlight.rst
47 | api/delphin.interface.rst
48 | api/delphin.itsdb.rst
49 | api/delphin.lnk.rst
50 | api/delphin.predicate.rst
51 | api/delphin.mrs.rst
52 | api/delphin.repp.rst
53 | api/delphin.sembase.rst
54 | api/delphin.scope.rst
55 | api/delphin.semi.rst
56 | api/delphin.tdl.rst
57 | api/delphin.tfs.rst
58 | api/delphin.tokens.rst
59 | api/delphin.tsdb.rst
60 | api/delphin.tsql.rst
61 | api/delphin.variable.rst
62 | api/delphin.vpm.rst
63 | api/delphin.web.rst
64 |
65 | API Reference:
66 | --------------
67 |
68 | Core API
69 | ''''''''
70 |
71 | - :doc:`api/delphin.exceptions`
72 | - :doc:`api/delphin.hierarchy` -- Multiple-inheritance hierarchies
73 | - :doc:`api/delphin.codecs` -- Serialization codecs
74 | - :doc:`api/delphin.commands`
75 |
76 |
77 | Interfacing External Tools
78 | ''''''''''''''''''''''''''
79 |
80 | - :doc:`api/delphin.interface`
81 | - :doc:`api/delphin.ace` -- ACE
82 | - :doc:`api/delphin.web` -- DELPH-IN Web API
83 |
84 | Tokenization
85 | ''''''''''''
86 |
87 | - :doc:`api/delphin.lnk` -- Surface alignment
88 | - :doc:`api/delphin.repp` -- Regular Expression Preprocessor
89 | - :doc:`api/delphin.tokens` -- YY token lattices
90 |
91 | Syntax
92 | ''''''
93 |
94 | - :doc:`api/delphin.derivation` -- UDF/UDX derivation trees
95 |
96 | Semantics
97 | '''''''''
98 |
99 | - :doc:`api/delphin.dmrs` -- Dependency Minimal Recursion Semantics
100 | - :doc:`api/delphin.edm` -- Elementary Dependency Matching
101 | - :doc:`api/delphin.eds` -- Elementary Dependency Structures
102 | - :doc:`api/delphin.predicate` -- Semantic predicates
103 | - :doc:`api/delphin.mrs` -- Minimal Recursion Semantics
104 | - :doc:`api/delphin.sembase`
105 | - :doc:`api/delphin.semi` -- Semantic Interface (or model)
106 | - :doc:`api/delphin.scope` -- Scope operations
107 | - :doc:`api/delphin.variable`
108 | - :doc:`api/delphin.vpm` -- Variable property mapping
109 |
110 | Test Suites
111 | '''''''''''
112 |
113 | - :doc:`api/delphin.itsdb` -- [incr tsdb()]
114 | - :doc:`api/delphin.tsdb` -- Test Suite Database
115 | - :doc:`api/delphin.tsql` -- Test Suite Query Language
116 |
117 |
118 | Grammars
119 | ''''''''
120 |
121 | - :doc:`api/delphin.tdl` -- Type Description Language
122 | - :doc:`api/delphin.tfs` -- Typed feature structures
123 |
124 | Miscellaneous
125 | '''''''''''''
126 |
127 | - :doc:`api/delphin.highlight` -- Pygments highlighters for TDL and MRS
128 |
129 |
130 | Indices and tables
131 | ==================
132 |
133 | * :ref:`genindex`
134 | * :ref:`modindex`
135 | * :ref:`search`
136 |
137 |
--------------------------------------------------------------------------------
/docs/api/delphin.web.server.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.web.server
3 | ==================
4 |
5 | .. automodule:: delphin.web.server
6 |
7 | This module provides classes and functions that implement a subset of
8 | the DELPH-IN Web API DELPH-IN Web API described here:
9 |
10 | https://github.com/delph-in/docs/wiki/ErgApi
11 |
12 | .. note::
13 |
14 | Requires Falcon (https://falcon.readthedocs.io/). This dependency
15 | is satisfied if you install PyDelphin with the ``[web]`` extra (see
16 | :doc:`../guides/setup`).
17 |
18 | In addition to the parsing API, this module also provides support for
19 | generation and for browsing [incr tsdb()] test suites. In order to
20 | use it, you will need a WSGI server such as `gunicorn`_, `mod_wsgi`_
21 | for `Apache2`_, etc. You then write a WSGI stub for the server to use,
22 | such as the following example:
23 |
24 | .. code-block:: python
25 |
26 | # file: wsgi.py
27 |
28 | import falcon
29 |
30 | from delphin.web import server
31 |
32 | application = falcon.App()
33 |
34 | server.configure(
35 | application,
36 | parser='~/grammars/erg-2018-x86-64-0.9.30.dat',
37 | generator='~/grammars/erg-2018-x86-64-0.9.30.dat',
38 | testsuites={
39 | 'gold': [
40 | {'name': 'mrs', 'path': '~/grammars/erg/tsdb/gold/mrs'}
41 | ]
42 | }
43 | )
44 |
45 | You can then run a local instance using, for instance, `gunicorn`_:
46 |
47 | .. code-block:: console
48 |
49 | $ gunicorn wsgi
50 | [2019-07-12 16:03:28 +0800] [29920] [INFO] Starting gunicorn 19.9.0
51 | [2019-07-12 16:03:28 +0800] [29920] [INFO] Listening at: http://127.0.0.1:8000 (29920)
52 | [2019-07-12 16:03:28 +0800] [29920] [INFO] Using worker: sync
53 | [2019-07-12 16:03:28 +0800] [29923] [INFO] Booting worker with pid: 29923
54 |
55 | And make requests with, for instance, :command:`curl`:
56 |
57 | .. code-block:: console
58 |
59 | $ curl 'http://127.0.0.1:8000/parse?input=Abrams%20slept.&mrs' -v
60 | * Trying 127.0.0.1...
61 | * TCP_NODELAY set
62 | * Connected to 127.0.0.1 (127.0.0.1) port 8000 (#0)
63 | > GET /parse?input=Abrams%20slept.&mrs HTTP/1.1
64 | > Host: 127.0.0.1:8000
65 | > User-Agent: curl/7.61.0
66 | > Accept: */*
67 | >
68 | < HTTP/1.1 200 OK
69 | < Server: gunicorn/19.9.0
70 | < Date: Fri, 12 Jul 2019 08:04:29 GMT
71 | < Connection: close
72 | < content-type: application/json
73 | < content-length: 954
74 | <
75 | * Closing connection 0
76 | {"input": "Abrams slept.", "readings": 1, "results": [{"result-id": 0, "mrs": {"top": "h0", "index": "e2", "relations": [{"label": "h4", "predicate": "proper_q", "arguments": {"ARG0": "x3", "RSTR": "h5", "BODY": "h6"}, "lnk": {"from": 0, "to": 6}}, {"label": "h7", "predicate": "named", "arguments": {"CARG": "Abrams", "ARG0": "x3"}, "lnk": {"from": 0, "to": 6}}, {"label": "h1", "predicate": "_sleep_v_1", "arguments": {"ARG0": "e2", "ARG1": "x3"}, "lnk": {"from": 7, "to": 13}}], "constraints": [{"relation": "qeq", "high": "h0", "low": "h1"}, {"relation": "qeq", "high": "h5", "low": "h7"}], "variables": {"e2": {"type": "e", "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"}}, "x3": {"type": "x", "properties": {"PERS": "3", "NUM": "sg", "IND": "+"}}, "h5": {"type": "h"}, "h6": {"type": "h"}, "h0": {"type": "h"}, "h1": {"type": "h"}, "h7": {"type": "h"}, "h4": {"type": "h"}}}}], "tcpu": 7, "pedges": 17}
77 |
78 | .. _gunicorn: https://gunicorn.org/
79 | .. _mod_wsgi: https://modwsgi.readthedocs.io/
80 | .. _Apache2: https://httpd.apache.org/
81 |
82 | Module Functions
83 | ----------------
84 |
85 | .. autofunction:: configure
86 |
87 |
88 | Server Application Classes
89 | --------------------------
90 |
91 | .. autoclass:: ProcessorServer
92 | :members:
93 |
94 | .. autoclass:: ParseServer
95 | :show-inheritance:
96 | :members:
97 |
98 | .. autoclass:: GenerationServer
99 | :show-inheritance:
100 | :members:
101 |
102 | .. autoclass:: TestSuiteServer
103 | :members:
104 |
--------------------------------------------------------------------------------
/tests/codecs/edsnative_test.py:
--------------------------------------------------------------------------------
1 |
2 | import pytest
3 |
4 | from delphin.codecs import eds as edsnative
5 | from delphin.eds import EDS, Node
6 |
7 |
8 | @pytest.fixture
9 | def dogs_bark_from_mrs():
10 | return {
11 | 'top': 'e2',
12 | 'nodes': [Node('e2', '_bark_v_1', type='e', edges={'ARG1': 'x4'}),
13 | Node('_1', 'udef_q', edges={'BV': 'x4'}),
14 | Node('x4', '_dog_n_1', type='x')]
15 | }
16 |
17 |
18 | def test_decode():
19 | e = edsnative.decode(
20 | '{e2:\n'
21 | ' e2:_rain_v_1<3:9>{e SF prop, TENSE pres}[]\n'
22 | '}'
23 | )
24 | assert e.top == 'e2'
25 | assert len(e.nodes) == 1
26 | assert len(e.edges) == 0
27 | assert e.nodes[0].properties == {'SF': 'prop', 'TENSE': 'pres'}
28 |
29 | e = edsnative.decode(
30 | '{e2: (fragmented)\n'
31 | '|e5:_nearly_x_deg<0:6>[]\n'
32 | ' _1:_every_q<7:12>[BV x3]\n'
33 | ' x3:_dog_n_1<13:16>[]\n'
34 | ' e2:_bark_v_1<17:24>[ARG1 x3]\n'
35 | '}'
36 | )
37 | assert e.top == 'e2'
38 | assert len(e.nodes) == 4
39 | assert len(e.edges) == 2
40 | assert e.nodes[3].predicate == '_bark_v_1'
41 |
42 |
43 | def test_decode_no_top():
44 | e = edsnative.decode(
45 | '{:\n'
46 | ' e2:_bark_v_1{e}[ARG1 x4]\n'
47 | ' _1:udef_q[BV x4]\n'
48 | ' x4:_dog_n_1{x}[]\n'
49 | '}'
50 | )
51 | assert e.top is None
52 | assert len(e.nodes) == 3
53 | # without initial colon
54 | e = edsnative.decode(
55 | '{\n'
56 | ' e2:_bark_v_1{e}[ARG1 x4]\n'
57 | ' _1:udef_q[BV x4]\n'
58 | ' x4:_dog_n_1{x}[]\n'
59 | '}'
60 | )
61 | assert e.top is None
62 | assert len(e.nodes) == 3
63 | # without newlines
64 | e = edsnative.decode(
65 | '{:e2:_bark_v_1{e}[ARG1 x4] _1:udef_q[BV x4] x4:_dog_n_1{x}[]}'
66 | )
67 | assert e.top is None
68 | assert len(e.nodes) == 3
69 | # with anonymous top (supposedly)
70 | e = edsnative.decode(
71 | '{_: e2:_bark_v_1{e}[ARG1 x4] _1:udef_q[BV x4] x4:_dog_n_1{x}[]}'
72 | )
73 | assert e.top == '_'
74 |
75 |
76 | def test_decode_identifier():
77 | e = edsnative.decode(
78 | '#123\n'
79 | '{e2:\n'
80 | ' e2:_rain_v_1<3:9>{e SF prop, TENSE pres}[]\n'
81 | '}'
82 | )
83 | assert e.identifier == '123'
84 | e = edsnative.decode(
85 | '#123 {e2: e2:_rain_v_1<3:9>{e SF prop, TENSE pres}[] }'
86 | )
87 | assert e.identifier == '123'
88 |
89 |
90 | def test_encode(dogs_bark_from_mrs):
91 | assert edsnative.encode(EDS()) == '{\n}'
92 | assert edsnative.encode(EDS(), indent=False) == '{}'
93 | d = EDS(**dogs_bark_from_mrs)
94 | assert edsnative.encode(d, indent=False) == (
95 | '{e2: e2:_bark_v_1{e}[ARG1 x4] _1:udef_q[BV x4] x4:_dog_n_1{x}[]}')
96 | assert edsnative.encode(d) == (
97 | '{e2:\n'
98 | ' e2:_bark_v_1{e}[ARG1 x4]\n'
99 | ' _1:udef_q[BV x4]\n'
100 | ' x4:_dog_n_1{x}[]\n'
101 | '}')
102 |
103 |
104 | def test_encode_no_top(dogs_bark_from_mrs):
105 | d = EDS(**dogs_bark_from_mrs)
106 | d.top = None
107 | assert edsnative.encode(d, indent=False) == (
108 | '{e2:_bark_v_1{e}[ARG1 x4] _1:udef_q[BV x4] x4:_dog_n_1{x}[]}')
109 | assert edsnative.encode(d) == (
110 | '{\n'
111 | ' e2:_bark_v_1{e}[ARG1 x4]\n'
112 | ' _1:udef_q[BV x4]\n'
113 | ' x4:_dog_n_1{x}[]\n'
114 | '}')
115 |
116 |
117 | def test_encode_identifier(dogs_bark_from_mrs):
118 | assert edsnative.encode(EDS(identifier='123'), indent=False) == '#123 {}'
119 | d = EDS(**dogs_bark_from_mrs)
120 | d.identifier = '123'
121 | assert edsnative.encode(d, indent=False) == (
122 | '#123 {e2: e2:_bark_v_1{e}[ARG1 x4] _1:udef_q[BV x4] x4:_dog_n_1{x}[]}'
123 | )
124 | assert edsnative.encode(d) == (
125 | '#123\n'
126 | '{e2:\n'
127 | ' e2:_bark_v_1{e}[ARG1 x4]\n'
128 | ' _1:udef_q[BV x4]\n'
129 | ' x4:_dog_n_1{x}[]\n'
130 | '}')
131 |
--------------------------------------------------------------------------------
/delphin/variable.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions for working with MRS variables.
3 | """
4 |
5 | import re
6 | from collections.abc import Iterable
7 | from typing import Optional
8 |
9 | # Default modules need to import the PyDelphin version
10 | from delphin.__about__ import __version__ # noqa: F401
11 |
12 | UNSPECIFIC = 'u' # also 'unbound'; previously 'unknown'
13 | INDIVIDUAL = 'i'
14 | INSTANCE_OR_HANDLE = 'p'
15 | EVENTUALITY = 'e'
16 | INSTANCE = 'x'
17 | HANDLE = 'h'
18 |
19 |
20 | # Functions
21 |
22 | _variable_re = re.compile(r'^([-\w]*[^\s\d])(\d+)$')
23 |
24 |
25 | def split(var: str) -> tuple[str, str]:
26 | """
27 | Split a valid variable string into its variable type and id.
28 |
29 | Note that, unlike :func:`id`, the id is returned as a string.
30 |
31 | Examples:
32 | >>> variable.split('h3')
33 | ('h', '3')
34 | >>> variable.split('ref-ind12')
35 | ('ref-ind', '12')
36 | """
37 | match = _variable_re.match(var)
38 | if match is None:
39 | raise ValueError(f'Invalid variable string: {var!s}')
40 | else:
41 | return match.group(1), match.group(2)
42 |
43 |
44 | def type(var: str) -> str:
45 | """
46 | Return the type (i.e., sort) of a valid variable string.
47 |
48 | :func:`sort` is an alias for :func:`type`.
49 |
50 | Examples:
51 | >>> variable.type('h3')
52 | 'h'
53 | >>> variable.type('ref-ind12')
54 | 'ref-ind'
55 | """
56 | return split(var)[0]
57 |
58 |
59 | sort = type #: :func:`sort` is an alias for :func:`type`.
60 |
61 |
62 | def id(var: str) -> int:
63 | """
64 | Return the integer id of a valid variable string.
65 |
66 | Examples:
67 | >>> variable.id('h3')
68 | 3
69 | >>> variable.id('ref-ind12')
70 | 12
71 | """
72 | return int(split(var)[1])
73 |
74 |
75 | def is_valid(var: str) -> bool:
76 | """
77 | Return `True` if *var* is a valid variable string.
78 |
79 | Examples:
80 | >>> variable.is_valid('h3')
81 | True
82 | >>> variable.is_valid('ref-ind12')
83 | True
84 | >>> variable.is_valid('x')
85 | False
86 | """
87 | return _variable_re.match(var) is not None
88 |
89 |
90 | class VariableFactory:
91 | """
92 | Simple class to produce variables by incrementing the variable id.
93 |
94 | This class is intended to be used when creating an MRS from a
95 | variable-less representation like DMRS where the variable types
96 | are known but no variable id is assigned.
97 |
98 | Args:
99 | starting_vid (int): the id of the first variable
100 | Attributes:
101 | vid (int): the id of the next variable produced by :meth:`new`
102 | index (dict): a mapping of ids to variables
103 | store (dict): a mapping of variables to associated properties
104 | """
105 |
106 | vid: int
107 | index: dict[int, str] # vid: var
108 | store: dict[str, list[tuple[str, str]]] # var: [(prop, val)]
109 |
110 | def __init__(self, starting_vid: int = 1):
111 | self.vid = starting_vid
112 | self.index = {}
113 | self.store = {}
114 |
115 | def new(
116 | self,
117 | type: Optional[str],
118 | properties: Optional[Iterable[tuple[str, str]]] = None,
119 | ) -> str:
120 | """
121 | Create a new variable for the given *type*.
122 |
123 | Args:
124 | type (str): the type of the variable to produce
125 | properties (list): properties to associate with the variable
126 | Returns:
127 | A (variable, properties) tuple
128 | """
129 | if type is None:
130 | type = UNSPECIFIC
131 | # find next available vid
132 | vid, index = self.vid, self.index
133 | while vid in index:
134 | vid += 1
135 | varstring = f'{type}{vid}'
136 | index[vid] = varstring
137 | if properties is None:
138 | properties = []
139 | self.store[varstring] = list(properties)
140 | self.vid = vid + 1
141 | return varstring
142 |
--------------------------------------------------------------------------------
/docs/api/delphin.web.client.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.web.client
3 | ==================
4 |
5 | .. automodule:: delphin.web.client
6 |
7 | This module provides classes and functions for making requests to
8 | servers that implement the DELPH-IN Web API described here:
9 |
10 | https://github.com/delph-in/docs/wiki/ErgApi
11 |
12 | .. note::
13 |
14 | Requires `httpx` (https://www.python-httpx.org/). This
15 | dependency is satisfied if you install PyDelphin with the ``[web]``
16 | extra (see :doc:`../guides/setup`).
17 |
18 | Basic access is available via the :func:`parse`,
19 | :func:`parse_from_iterable`, :func:`generate`, and
20 | :func:`generate_from_iterable` functions:
21 |
22 | >>> from delphin.web import client
23 | >>> url = 'http://erg.delph-in.net/rest/0.9/'
24 | >>> client.parse('Abrams slept.', server=url)
25 | Response({'input': 'Abrams slept.', 'readings': 1, 'results': [{'result-id': 0}], 'tcpu': 7, 'pedges': 17})
26 | >>> client.parse_from_iterable(['Abrams slept.', 'It rained.'], server=url)
27 |
28 | >>> client.generate('[ LTOP: h0 INDEX: e2 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] RELS: < [ proper_q<0:6> LBL: h4 ARG0: x3 [ x PERS: 3 NUM: sg IND: + ] RSTR: h5 BODY: h6 ] [ named<0:6> LBL: h7 CARG: "Abrams" ARG0: x3 ] [ _sleep_v_1<7:13> LBL: h1 ARG0: e2 ARG1: x3 ] > HCONS: < h0 qeq h1 h5 qeq h7 > ICONS: < > ]')
29 | Response({'input': '[ LTOP: h0 INDEX: e2 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] RELS: < [ proper_q<0:6> LBL: h4 ARG0: x3 [ x PERS: 3 NUM: sg IND: + ] RSTR: h5 BODY: h6 ] [ named<0:6> LBL: h7 CARG: "Abrams" ARG0: x3 ] [ _sleep_v_1<7:13> LBL: h1 ARG0: e2 ARG1: x3 ] > HCONS: < h0 qeq h1 h5 qeq h7 > ICONS: < > ]', 'readings': 1, 'results': [{'result-id': 0, 'surface': 'Abrams slept.'}], 'tcpu': 8, 'pedges': 59})
30 |
31 |
32 | If the `server` parameter is not provided to `parse()`, the default
33 | ERG server (as used above) is used by default. Request parameters
34 | (described at https://github.com/delph-in/docs/wiki/ErgApi) can be
35 | provided via the `params` argument.
36 |
37 | These functions instantiate and use subclasses of :class:`Client`,
38 | which manages the connections to a server. They can also be used
39 | directly:
40 |
41 | >>> parser = web.Parser(server=url)
42 | >>> parser.interact('Dogs chase cats.')
43 | Response({'input': 'Dogs chase cats.', ...
44 | >>> generator = web.Generator(server=url)
45 | >>> generator.interact('[ LTOP: h0 INDEX: e2 ...')
46 | Response({'input': '[ LTOP: h0 INDEX: e2 ...', ...)
47 |
48 | The server responds with JSON data, which PyDelphin parses to a
49 | dictionary. The responses from are then wrapped in
50 | :class:`~delphin.interface.Response` objects, which provide two
51 | methods for inspecting the results. The :meth:`Response.result()
52 | ` method takes a parameter `i` and
53 | returns the *i*\\ th result (0-indexed), and the
54 | :meth:`Response.results() ` method
55 | returns the list of all results. The benefit of using these methods is
56 | that they wrap the result dictionary in a
57 | :class:`~delphin.interface.Result` object, which provides methods for
58 | automatically deserializing derivations, EDS, MRS, or DMRS data. For
59 | example:
60 |
61 | >>> r = parser.interact('Dogs chase cats', params={'mrs':'json'})
62 | >>> r.result(0)
63 | Result({'result-id': 0, 'score': 0.5938, ...
64 | >>> r.result(0)['mrs']
65 | {'variables': {'h1': {'type': 'h'}, 'x6': ...
66 | >>> r.result(0).mrs()
67 |
68 |
69 | If PyDelphin does not support deserialization for a format provided by
70 | the server (e.g. LaTeX output), the :class:`~delphin.interface.Result`
71 | object raises a :exc:`TypeError`.
72 |
73 |
74 | Client Functions
75 | ----------------
76 |
77 | .. autofunction:: parse
78 | .. autofunction:: parse_from_iterable
79 |
80 | .. autofunction:: generate
81 | .. autofunction:: generate_from_iterable
82 |
83 |
84 | Client Classes
85 | --------------
86 |
87 | .. autoclass:: Client
88 | :show-inheritance:
89 | :members:
90 |
91 | .. autoclass:: Parser
92 | :show-inheritance:
93 | :members:
94 |
95 | .. autoclass:: Generator
96 | :show-inheritance:
97 | :members:
98 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to contribute
2 |
3 | The easiest way to contribute to PyDelphin is to try it out and enter
4 | bug reports and feature requests. If you're contributing code, fork
5 | the repository and make pull requests to the `main` branch.
6 |
7 |
8 | ## Filing issues
9 |
10 | File issues here: https://github.com/delph-in/pydelphin/issues
11 |
12 | Please use the issue tracker for:
13 |
14 | * bug reports
15 | * feature requests
16 | * documentation requests
17 |
18 | Questions about PyDelphin can be asked on the [DELPH-IN Discourse
19 | site](https://delphinqa.ling.washington.edu/) in the "PyDelphin Tools"
20 | category.
21 |
22 | For bug reports, please provide the following, if possible:
23 |
24 | * a minimal working example
25 | * version of PyDelphin (and relevant dependencies)
26 |
27 | ```python
28 | >>> from delphin.__about__ import __version__
29 | >>> __version__ # distribution version
30 | '1.10.0'
31 | >>> from delphin import mrs
32 | >>> mrs.__version__ # package version
33 | '1.10.0'
34 | ```
35 | * Python version (e.g. 3.9, 3.10, etc.)
36 |
37 | For feature requests, please provide a use case for the feature.
38 |
39 |
40 | ## Submitting code
41 |
42 | Please follow these guidelines for code and repository changes:
43 |
44 | * [PEP8](https://www.python.org/dev/peps/pep-0008/) style guidelines
45 | * [GitHub Flow](https://guides.github.com/introduction/flow/)
46 | branching model
47 | * [Semantic Versioning](http://semver.org/)
48 | * PyDelphin is object-oriented in many cases, but avoid unnecessary
49 | classes when standard Python data structures are sufficient
50 | * In implementing DELPH-IN formalisms and formats, aim first to be
51 | correct and complete (according to documentation at
52 | https://github.com/delph-in/docs/wiki/; if a wiki doesn't exist,
53 | it's a good idea to make one), and secondly convenient. Avoid adding
54 | features that aren't part of the spec and would have limited
55 | utility.
56 | * PyDelphin is primarily a library, not an application, so application
57 | code in general belongs in separate repositories. Applications can,
58 | however, make use of the `delphin` namespace.
59 | * API documentation is generated from the code and uses docstrings, so
60 | provide descriptive docstrings for all modules, classs, methods, and
61 | functions. Follow [Google-style docstrings] and use
62 | [reStructuredText] for formatting.
63 |
64 | ### Testing the code
65 |
66 | PyDelphin uses [Hatch](https://hatch.pypa.io/) for managing builds and
67 | dependencies. Install Hatch and use the following commands for testing
68 | your code locally:
69 |
70 | ```console
71 | $ hatch shell # activate a virtual environment with PyDelphin installed
72 | $ hatch run dev:lint # lint the code
73 | $ hatch run dev:typecheck # type-check the code
74 | $ hatch run dev:test # run unit tests
75 | $ hatch build # build a source distribution and wheel
76 | ```
77 |
78 | Always run the linting, type-checking, and testing commands before
79 | committing. They will be run automatically on pull requests, but its
80 | convenient to make sure everything looks good locally before opening a
81 | pull request.
82 |
83 | ## Documentation
84 |
85 | The documentation resides in the `docs/` subdirectory, which contains
86 | all content for the guides and some structural content for the API
87 | reference. The bulk of the content for the API reference is in the
88 | docstrings of the modules, classes, and functions of the code
89 | itself. Therefore, all *public* modules, classes, methods, and
90 | functions should have docstrings and should not have a name with a
91 | leading underscore, as otherwise they will not appear in the
92 | documentation.
93 |
94 | The API reference and tutorials are written in [reStructuredText]
95 | and generated using [Sphinx] on the [Read the Docs] service.
96 | Repository files, such as the README, CHANGELOG, and CONTRIBUTING
97 | files, are written in [Markdown].
98 |
99 | To build the documentation, run the following command:
100 |
101 | ```console
102 | $ hatch run docs:build
103 | ```
104 |
105 | Do not check in the generated documentation files (e.g., `*.html`);
106 | only documentation source files belong, as the rest will be
107 | generated automatically by [Read the Docs].
108 |
--------------------------------------------------------------------------------
/delphin/cli/convert.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | Convert DELPH-IN Semantics representations and formats.
4 |
5 | Use --list to see the available codecs. A codec name may be suffixed
6 | with "-lines" to enable line-based reading/writing, in which case the
7 | --indent option is ignored.
8 | """
9 |
10 | import argparse
11 | import sys
12 |
13 | from delphin import util
14 | from delphin.commands import convert
15 |
16 | parser = argparse.ArgumentParser(add_help=False) # filled out below
17 |
18 | COMMAND_INFO = {
19 | 'name': 'convert',
20 | 'help': 'Convert DELPH-IN Semantics representations',
21 | 'description': __doc__,
22 | 'parser': parser
23 | }
24 |
25 |
26 | def call_convert(args):
27 | if args.list:
28 | _list_codecs(args.verbosity > 0)
29 | else:
30 | color = (args.color == 'always'
31 | or (args.color == 'auto' and sys.stdout.isatty()))
32 | if args.indent and args.indent is not True:
33 | if args.indent.lower() in ('no', 'none'):
34 | args.indent = None
35 | else:
36 | args.indent = int(args.indent)
37 | print(convert(
38 | args.PATH,
39 | vars(args)['from'], # vars() to avoid syntax error
40 | args.to,
41 | properties=(not args.no_properties),
42 | lnk=(not args.no_lnk),
43 | color=color,
44 | indent=args.indent,
45 | select=args.select,
46 | # below are format-specific kwargs
47 | show_status=args.show_status,
48 | predicate_modifiers=args.predicate_modifiers,
49 | semi=args.semi))
50 |
51 |
52 | def _list_codecs(verbose):
53 | codecs = util.inspect_codecs()
54 |
55 | for rep, data in sorted(codecs.items()):
56 | print(rep.upper())
57 | for name, mod, description in sorted(data):
58 | print('\t{:12s}\t{}/{}\t{}'.format(
59 | name,
60 | 'r' if hasattr(mod, 'load') else '-',
61 | 'w' if hasattr(mod, 'dump') else '-',
62 | description))
63 |
64 |
65 | # Arguments for the convert command
66 | parser.set_defaults(func=call_convert)
67 | parser.add_argument(
68 | 'PATH',
69 | nargs='?',
70 | help=('file with representations to convert or testsuite directory '
71 | 'from which result.mrs will be selected; if not given, '
72 | ' is read as though it were a file'))
73 | parser.add_argument(
74 | '--list',
75 | action='store_true',
76 | help='list the available codecs and capabilities')
77 | parser.add_argument(
78 | '-f',
79 | '--from',
80 | metavar='FMT',
81 | default='simplemrs',
82 | help='original representation (default: simplemrs)')
83 | parser.add_argument(
84 | '-t',
85 | '--to',
86 | metavar='FMT',
87 | default='simplemrs',
88 | help='target representation (default: simplemrs)')
89 | parser.add_argument(
90 | '--no-properties',
91 | action='store_true',
92 | help='suppress morphosemantic properties')
93 | parser.add_argument(
94 | '--no-lnk',
95 | action='store_true',
96 | help='suppress lnk surface alignments and surface strings')
97 | parser.add_argument(
98 | '--indent',
99 | metavar='N',
100 | nargs='?',
101 | default=True,
102 | help='format with explicit indent N ("no" for no newlines)')
103 | parser.add_argument(
104 | '--color',
105 | metavar='WHEN',
106 | default='auto',
107 | help='(auto|always|never) use ANSI color (default: auto)')
108 | parser.add_argument(
109 | '--select',
110 | metavar='QUERY',
111 | default='result.mrs',
112 | help=('TSQL query for selecting MRS data when PATH points to '
113 | 'a testsuite directory (default: result.mrs)'))
114 | parser.add_argument(
115 | '--show-status',
116 | action='store_true',
117 | help='(--to=eds only) annotate disconnected graphs and nodes')
118 | parser.add_argument(
119 | '--predicate-modifiers',
120 | action='store_true',
121 | help='(--to=eds* only) attempt to join disconnected graphs')
122 | parser.add_argument(
123 | '--no-predicate-modifiers',
124 | dest='predicate_modifiers',
125 | action='store_false',
126 | help='(--to=eds* only) do not use predicate modification')
127 | parser.add_argument(
128 | '--sem-i', '--semi',
129 | dest='semi',
130 | metavar='PATH',
131 | help='(--to=indexedmrs only) path to a SEM-I')
132 |
--------------------------------------------------------------------------------
/docs/api/delphin.variable.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.variable
3 | ================
4 |
5 | .. automodule:: delphin.variable
6 |
7 | This module contains functions to inspect the type and identifier
8 | of variables (:func:`split`, :func:`type`, :func:`id`) and check if
9 | a variable string is well-formed (:func:`is_valid`). It
10 | additionally has constants for the standard variable types:
11 | :data:`UNSPECIFIC`, :data:`INDIVIDUAL`, :data:`INSTANCE_OR_HANDLE`,
12 | :data:`EVENTUALITY`, :data:`INSTANCE`, and :data:`HANDLE`. Finally,
13 | the :class:`VariableFactory` class may be useful for tasks like
14 | DMRS to MRS conversion for managing the creation of new variables.
15 |
16 | Variables in MRS
17 | ----------------
18 |
19 | Variables are a concept in Minimal Recursion Semantics coming from
20 | formal semantics. Consider this logical form for a sentence like
21 | "the dog barks"::
22 |
23 | ∃x(dog(x) ^ bark(x))
24 |
25 | Here *x* is a variable that represents an entity that has the
26 | properties that it is a dog and it is barking. Davidsonian
27 | semantics introduce variables for events as well::
28 |
29 | ∃e∃x(dog(x) ^ bark(e, x))
30 |
31 | MRS uses variables in a similar way to Davidsonian semantics,
32 | except that events are not explicitly quantified. That might look
33 | like the following (if we ignore quantifier scope
34 | underspecification)::
35 |
36 | the(x4) [dog(x4)] {bark(e2, x4)}
37 |
38 | "Variables" are also used for scope handles and labels, as in this
39 | minor modification that indicates the scope handles::
40 |
41 | h3:the(x4) [h6:dog(x4)] {h1:bark(e2, x4)}
42 |
43 | There is some confusion of terminology here. Sometimes "variable"
44 | is contrasted with "handle" to mean an instance (`x`) or
45 | eventuality (`e`) variable, but in this module "variable" means the
46 | identifiers used for instances, eventualities, handles, and their
47 | supertypes.
48 |
49 | The form of MRS variables is the concatenation of a variable *type*
50 | (also called a *sort*) with a variable *id*. For example, the
51 | variable type `e` and id `2` form the variable `e2`. Generally in
52 | MRS the variable ids, regardless of the type, are unique, so for
53 | instance one would not see `x2` and `e2` in the same structure.
54 |
55 | The variable types are arranged in a hierarchy. While the most
56 | accurate variable type hierarchy for a particular grammar is
57 | obtained via its SEM-I (see :mod:`delphin.semi`), in practice the
58 | standard hierarchy given below is used by all DELPH-IN
59 | grammars. The hierarchy in TDL would look like this (with an ASCII
60 | rendering in comments on the right):
61 |
62 | .. code-block:: tdl
63 |
64 | u := *top*. ; u
65 | i := u. ; / \
66 | p := u. ; i p
67 | e := i. ; / \ / \
68 | x := i & p. ; e x h
69 | h := p.
70 |
71 | In PyDelphin the equivalent hierarchy could be created as follows
72 | with a :class:`delphin.hierarchy.MultiHierarchy`:
73 |
74 | >>> from delphin import hierarchy
75 | >>> h = hierarchy.MultiHierarchy(
76 | ... '*top*',
77 | ... {'u': '*top*',
78 | ... 'i': 'u',
79 | ... 'p': 'u',
80 | ... 'e': 'i',
81 | ... 'x': 'i p',
82 | ... 'h': 'p'}
83 | ... )
84 |
85 |
86 | Module Constants
87 | ----------------
88 |
89 | .. data:: UNSPECIFIC
90 |
91 | `u` -- The unspecific (or unbound) top-level variable type.
92 |
93 | .. data:: INDIVIDUAL
94 |
95 | `i` -- The variable type that generalizes over eventualities and
96 | instances.
97 |
98 | .. data:: INSTANCE_OR_HANDLE
99 |
100 | `p` -- The variable type that generalizes over instances and
101 | handles.
102 |
103 | .. data:: EVENTUALITY
104 |
105 | `e` -- The variable type for events and other eventualities
106 | (adjectives, adverbs, prepositions, etc.).
107 |
108 | .. data:: INSTANCE
109 |
110 | `x` -- The variable type for instances and nominal things.
111 |
112 | .. data:: HANDLE
113 |
114 | `h` -- The variable type for scope handles and labels.
115 |
116 | Module Functions
117 | ----------------
118 |
119 | .. autofunction:: split
120 | .. autofunction:: type
121 | .. autofunction:: sort
122 | .. autofunction:: id
123 | .. autofunction:: is_valid
124 |
125 | Classes
126 | -------
127 |
128 | .. autoclass:: VariableFactory
129 | :members:
130 |
--------------------------------------------------------------------------------
/tests/lnk_test.py:
--------------------------------------------------------------------------------
1 |
2 | import pytest
3 |
4 | from delphin.lnk import Lnk, LnkError, LnkMixin
5 |
6 |
7 | class TestLnk():
8 | def test_raw_init(self):
9 | with pytest.raises(TypeError):
10 | Lnk()
11 | # don't allow just any Lnk type
12 | with pytest.raises(LnkError):
13 | Lnk('lnktype', (0, 1))
14 |
15 | def test__eq__(self):
16 | assert Lnk.default() == Lnk.default()
17 | assert Lnk.default() != Lnk.charspan(0, 1)
18 | assert Lnk.charspan(0, 1) == Lnk.charspan(0, 1)
19 | assert Lnk.charspan(0, 1) != Lnk.charspan(0, 2)
20 | assert Lnk.charspan(0, 1) != Lnk.chartspan(0, 1)
21 |
22 | def test__bool__(self):
23 | assert not Lnk.default()
24 | assert not Lnk.charspan(-1, -1)
25 | assert Lnk.charspan(0, 0)
26 | assert Lnk.chartspan(0, 0)
27 | assert Lnk.tokens([])
28 | assert Lnk.edge(0)
29 |
30 | def testDefault(self):
31 | lnk = Lnk.default()
32 | assert lnk.type == Lnk.UNSPECIFIED
33 | assert str(lnk) == ''
34 | repr(lnk) # no error
35 |
36 | def testCharSpanLnk(self):
37 | lnk = Lnk.charspan(0, 1)
38 | assert lnk.type == Lnk.CHARSPAN
39 | assert lnk.data == (0, 1)
40 | assert str(lnk) == '<0:1>'
41 | assert lnk == Lnk(str(lnk))
42 | repr(lnk) # no error
43 | lnk = Lnk.charspan('0', '1')
44 | assert lnk.data == (0, 1)
45 | with pytest.raises(TypeError):
46 | Lnk.charspan(1)
47 | with pytest.raises(TypeError):
48 | Lnk.charspan([1, 2])
49 | with pytest.raises(TypeError):
50 | Lnk.charspan(1, 2, 3)
51 | with pytest.raises(ValueError):
52 | Lnk.charspan('a', 'b')
53 |
54 | def testChartSpanLnk(self):
55 | lnk = Lnk.chartspan(0, 1)
56 | assert lnk.type == Lnk.CHARTSPAN
57 | assert lnk.data == (0, 1)
58 | assert str(lnk) == '<0#1>'
59 | assert lnk == Lnk(str(lnk))
60 | repr(lnk) # no error
61 | lnk = Lnk.chartspan('0', '1')
62 | assert lnk.data == (0, 1)
63 | with pytest.raises(TypeError):
64 | Lnk.chartspan(1)
65 | with pytest.raises(TypeError):
66 | Lnk.chartspan([1, 2])
67 | with pytest.raises(TypeError):
68 | Lnk.chartspan(1, 2, 3)
69 | with pytest.raises(ValueError):
70 | Lnk.chartspan('a', 'b')
71 |
72 | def testTokensLnk(self):
73 | lnk = Lnk.tokens([1, 2, 3])
74 | assert lnk.type == Lnk.TOKENS
75 | assert lnk.data == (1, 2, 3)
76 | assert str(lnk) == '<1 2 3>'
77 | assert lnk == Lnk(str(lnk))
78 | repr(lnk) # no error
79 | lnk = Lnk.tokens(['1'])
80 | assert lnk.data == (1,)
81 | # empty tokens list might be invalid, but accept for now
82 | lnk = Lnk.tokens([])
83 | assert lnk.data == tuple()
84 | with pytest.raises(TypeError):
85 | Lnk.tokens(1)
86 | with pytest.raises(ValueError):
87 | Lnk.tokens(['a', 'b'])
88 |
89 | def testEdgeLnk(self):
90 | lnk = Lnk.edge(1)
91 | assert lnk.type == Lnk.EDGE
92 | assert lnk.data == 1
93 | assert str(lnk) == '<@1>'
94 | assert lnk == Lnk(str(lnk))
95 | repr(lnk) # no error
96 | lnk = Lnk.edge('1')
97 | assert lnk.data == 1
98 | with pytest.raises(TypeError):
99 | Lnk.edge(None)
100 | with pytest.raises(TypeError):
101 | Lnk.edge((1,))
102 | with pytest.raises(ValueError):
103 | Lnk.edge('a')
104 |
105 |
106 | class TestLnkMixin():
107 | def test_inherit(self):
108 | class NoLnk(LnkMixin):
109 | pass
110 | n = NoLnk()
111 | assert n.cfrom == -1
112 | assert n.cto == -1
113 |
114 | class WithNoneLnk(LnkMixin):
115 | def __init__(self):
116 | self.lnk = None
117 | n = WithNoneLnk()
118 | assert n.cfrom == -1
119 | assert n.cto == -1
120 |
121 | class WithNonCharspanLnk(LnkMixin):
122 | def __init__(self):
123 | self.lnk = Lnk.chartspan(0, 1)
124 | n = WithNonCharspanLnk()
125 | assert n.cfrom == -1
126 | assert n.cto == -1
127 |
128 | class WithCharspanLnk(LnkMixin):
129 | def __init__(self):
130 | self.lnk = Lnk.charspan(0, 1)
131 | n = WithCharspanLnk()
132 | assert n.cfrom == 0
133 |
--------------------------------------------------------------------------------
/delphin/codecs/mrsprolog.py:
--------------------------------------------------------------------------------
1 | """
2 | Serialization functions for the MRS-Prolog format.
3 | """
4 |
5 | from pathlib import Path
6 |
7 | from delphin.mrs import CONSTANT_ROLE
8 | from delphin.sembase import role_priority
9 |
10 | CODEC_INFO = {
11 | 'representation': 'mrs',
12 | }
13 |
14 |
15 | def dump(ms, destination, properties=True, lnk=True,
16 | indent=False, encoding='utf-8'):
17 | """
18 | Serialize MRS objects to the Prolog representation and write to a file.
19 |
20 | Args:
21 | ms: an iterator of MRS objects to serialize
22 | destination: filename or file object where data will be written
23 | properties: if `True`, encode variable properties
24 | lnk: if `False`, suppress surface alignments and strings
25 | indent (bool, int): if `True` or an integer value, add
26 | newlines and indentation
27 | encoding (str): if *destination* is a filename, write to the
28 | file with the given encoding; otherwise it is ignored
29 | """
30 | text = dumps(ms, properties=properties, lnk=lnk, indent=indent)
31 | if hasattr(destination, 'write'):
32 | print(text, file=destination)
33 | else:
34 | destination = Path(destination).expanduser()
35 | with destination.open('w', encoding=encoding) as fh:
36 | print(text, file=fh)
37 |
38 |
39 | def dumps(ms, properties=True, lnk=True, indent=False):
40 | """
41 | Serialize MRS objects to the Prolog representation
42 |
43 | Args:
44 | ms: an iterator of MRS objects to serialize
45 | properties: if `True`, encode variable properties
46 | lnk: if `False`, suppress surface alignments and strings
47 | indent (bool, int): if `True` or an integer value, add
48 | newlines and indentation
49 | Returns:
50 | the Prolog string representation of a corpus of MRSs
51 | """
52 | return _encode(ms, properties=properties, lnk=lnk, indent=indent)
53 |
54 |
55 | def encode(m, properties=True, lnk=True, indent=False):
56 | """
57 | Serialize a MRS object to a Prolog string.
58 |
59 | Args:
60 | m: an MRS object
61 | properties (bool): if `False`, suppress variable properties
62 | lnk: if `False`, suppress surface alignments and strings
63 | indent (bool, int): if `True` or an integer value, add
64 | newlines and indentation
65 | Returns:
66 | a Prolog-serialization of the MRS object
67 | """
68 | return _encode_mrs(m, properties, lnk, indent)
69 |
70 |
71 | def _encode(ms, properties, lnk, indent):
72 | if indent is not None and indent is not False:
73 | delim = '\n'
74 | else:
75 | delim = ' '
76 | return delim.join(_encode_mrs(m, properties, lnk, indent) for m in ms)
77 |
78 |
79 | def _encode_mrs(m, properties, lnk, indent):
80 | pl = 'psoa({topvars},{_}[{rels}],{_}hcons([{hcons}]){icons})'
81 | plvc = '{reln}({left},{right})'
82 | # pre-compute the various indent levels
83 | if indent is None or indent is False:
84 | _, __, ___, ____ = '', ',', '', ','
85 | else:
86 | if indent is True:
87 | indent = 2
88 | _ = '\n' + (' ' * indent)
89 | __ = ',' + _ + (' ' * len('['))
90 | ___ = _ + (' ' * len('[rel('))
91 | ____ = __ + (' ' * len('rel(['))
92 |
93 | topvars = [str(m.top)]
94 | if m.index is not None:
95 | topvars.append(str(m.index))
96 | rels = [_encode_rel(rel, ___, ____) for rel in m.rels]
97 | icons = ''
98 | if m.icons:
99 | icons = ',{_}icons([{ics}])'.format(
100 | _=_,
101 | ics=','.join(
102 | plvc.format(reln=ic.relation, left=ic.left, right=ic.right)
103 | for ic in m.icons
104 | )
105 | )
106 | return pl.format(
107 | topvars=','.join(topvars),
108 | rels=__.join(rels),
109 | hcons=','.join(
110 | plvc.format(reln=hc.relation, left=hc.hi, right=hc.lo)
111 | for hc in m.hcons
112 | ),
113 | icons=icons,
114 | _=_,
115 | ___=___
116 | )
117 |
118 |
119 | def _encode_rel(ep, ___, ____):
120 | args = []
121 | plav = "attrval('{}',{})"
122 | for role in sorted(ep.args, key=role_priority):
123 | val = ep.args[role]
124 | if role == CONSTANT_ROLE:
125 | val = "'{}'".format(val)
126 | args.append(plav.format(role, val))
127 | return "rel('{pred}',{lbl},{___}[{attrvals}])".format(
128 | pred=ep.predicate,
129 | lbl=ep.label,
130 | ___=___,
131 | attrvals=____.join(args))
132 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.edsjson.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.edsjson
3 | ======================
4 |
5 | .. automodule:: delphin.codecs.edsjson
6 |
7 | Example:
8 |
9 | * *The new chef whose soup accidentally spilled quit and left.*
10 |
11 | ::
12 |
13 | {
14 | "top": "e18",
15 | "nodes": {
16 | "_1": {
17 | "label": "_the_q",
18 | "edges": {"BV": "x3"},
19 | "lnk": {"from": 0, "to": 3}
20 | },
21 | "e8": {
22 | "label": "_new_a_1",
23 | "edges": {"ARG1": "x3"},
24 | "lnk": {"from": 4, "to": 7},
25 | "type": "e",
26 | "properties": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "bool", "PERF": "-"}
27 | },
28 | "x3": {
29 | "label": "_chef_n_1",
30 | "edges": {},
31 | "lnk": {"from": 8, "to": 12},
32 | "type": "x",
33 | "properties": {"PERS": "3", "NUM": "sg", "IND": "+"}
34 | },
35 | "_2": {
36 | "label": "def_explicit_q",
37 | "edges": {"BV": "x10"},
38 | "lnk": {"from": 13, "to": 18}
39 | },
40 | "e14": {
41 | "label": "poss",
42 | "edges": {"ARG1": "x10", "ARG2": "x3"},
43 | "lnk": {"from": 13, "to": 18},
44 | "type": "e",
45 | "properties": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-"}
46 | },
47 | "x10": {
48 | "label": "_soup_n_1",
49 | "edges": {},
50 | "lnk": {"from": 19, "to": 23},
51 | "type": "x",
52 | "properties": {"PERS": "3", "NUM": "sg"}
53 | },
54 | "e15": {
55 | "label": "_accidental_a_1",
56 | "edges": {"ARG1": "e16"},
57 | "lnk": {"from": 24, "to": 36},
58 | "type": "e",
59 | "properties": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-"}
60 | },
61 | "e16": {
62 | "label": "_spill_v_1",
63 | "edges": {"ARG1": "x10"},
64 | "lnk": {"from": 37, "to": 44},
65 | "type": "e",
66 | "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"}
67 | },
68 | "e18": {
69 | "label": "_quit_v_1",
70 | "edges": {"ARG1": "x3"},
71 | "lnk": {"from": 45, "to": 49},
72 | "type": "e",
73 | "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"}
74 | },
75 | "e2": {
76 | "label": "_and_c",
77 | "edges": {"ARG1": "e18", "ARG2": "e20"},
78 | "lnk": {"from": 50, "to": 53},
79 | "type": "e",
80 | "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"}
81 | },
82 | "e20": {
83 | "label": "_leave_v_1",
84 | "edges": {"ARG1": "x3"},
85 | "lnk": {"from": 54, "to": 59},
86 | "type": "e",
87 | "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"}
88 | }
89 | }
90 | }
91 |
92 |
93 | Module Constants
94 | ----------------
95 |
96 | .. data:: HEADER
97 |
98 | `'['`
99 |
100 | .. data:: JOINER
101 |
102 | `','`
103 |
104 | .. data:: FOOTER
105 |
106 | `']'`
107 |
108 | Deserialization Functions
109 | -------------------------
110 |
111 | .. function:: load(source)
112 |
113 | See the :func:`load` codec API documentation.
114 |
115 | .. function:: loads(s)
116 |
117 | See the :func:`loads` codec API documentation.
118 |
119 | .. function:: decode(s)
120 |
121 | See the :func:`decode` codec API documentation.
122 |
123 | Serialization Functions
124 | -----------------------
125 |
126 | .. function:: dump(ms, destination, properties=True, lnk=True, indent=False, encoding='utf-8')
127 |
128 | See the :func:`dump` codec API documentation.
129 |
130 | .. function:: dumps(ms, properties=True, lnk=True, indent=False)
131 |
132 | See the :func:`dumps` codec API documentation.
133 |
134 | .. function:: encode(m, properties=True, lnk=True, indent=False)
135 |
136 | See the :func:`encode` codec API documentation.
137 |
138 | Complementary Functions
139 | -----------------------
140 |
141 | .. autofunction:: from_dict
142 | .. autofunction:: to_dict
143 |
144 |
--------------------------------------------------------------------------------
/docs/guides/edm.rst:
--------------------------------------------------------------------------------
1 | Elementary Dependency Matching
2 | ==============================
3 |
4 | :wiki:`Elementary Dependency Matching `
5 | (EDM; `Dridan and Oepen, 2011`_) is a metric for comparing two
6 | semantic dependency graphs that annotate the same sentence. It
7 | requires that each node is aligned to a character span in the original
8 | sentence.
9 |
10 | .. seealso::
11 |
12 | The :mod:`delphin.edm` module is the programmatic interface for the
13 | EDM functionality, while this guide describes the command-line
14 | interface.
15 |
16 | .. tip::
17 |
18 | The smatch metric (`Cai and Knight, 2013
19 | `_) is essentially the same
20 | except that instead of relying on surface-aligned nodes it finds a
21 | mapping of nodes that optimizes the number of matching triples. The
22 | search uses stochastic hill-climbing, whereas EDM gives
23 | deterministic results. EDS and DMRS representations can be used
24 | with the `smatch tool `_ if
25 | they have been serialized to the PENMAN format (see
26 | :mod:`delphin.codecs.edspenman` and
27 | :mod:`delphin.codecs.dmrspenman`).
28 |
29 | Command-line Usage
30 | ------------------
31 |
32 | The :command:`edm` subcommand provides a simple interface for
33 | computing EDM for EDS, DMRS, or MRS representations. The basic usage
34 | is:
35 |
36 | .. code-block:: console
37 |
38 | $ delphin edm GOLD TEST
39 |
40 | ``GOLD`` and ``TEST`` may be files containing serialized semantic
41 | representations or :wiki:`[incr tsdb()] ` test suites
42 | containing parsed analyses.
43 |
44 | For example:
45 |
46 | .. code-block:: console
47 |
48 | $ delphin edm gold.eds test.eds
49 | Precision: 0.9344262295081968
50 | Recall: 0.9193548387096774
51 | F-score: 0.9268292682926829
52 |
53 | Per-item information can be printed by increasing the logging
54 | verbosity to the ``INFO`` level (``-vv``). Weights for the different
55 | classes of triples can be adjusted with ``-A`` for argument structure,
56 | ``-N`` for node names, ``-P`` for node properties, ``-C`` for
57 | constants, and ``-T`` for graph tops. Try ``delphin edm --help`` for
58 | more information.
59 |
60 | Differences from Dridan and Oepen, 2011
61 | ---------------------------------------
62 |
63 | Following the `mtool`_ implementation, :mod:`delphin.edm` treats
64 | constant arguments (``CARG``) as independent triples, however, unlike
65 | mtool, they get their own category and weight. This implementation
66 | also follows mtool in checking if the graph tops are the same, also
67 | with their own category and weight. One can therefore get the same
68 | results as `Dridan and Oepen, 2011`_ by setting the weights for
69 | top-triples and constant-triples to 0:
70 |
71 | .. code-block:: console
72 |
73 | $ delphin edm -C0 -T0 GOLD TEST
74 |
75 | Sometimes it helps to ignore missing items on the gold side, the test
76 | side, or both. Missing items can occur when ``GOLD`` or ``TEST`` are
77 | files with different numbers of representations, or when they are
78 | :wiki:`[incr tsdb()] ` test suites with different numbers of
79 | analyses per item. For example, to ignore pairs where the gold
80 | representation is missing, do the following:
81 |
82 | .. code-block:: console
83 |
84 | $ delphin edm --ignore-missing=gold GOLD TEST
85 |
86 | Relevance to non-EDS Semantic Representations
87 | ---------------------------------------------
88 |
89 | While EDM was designed for the semantic dependencies extracted from
90 | Elementary Dependency Structures (:wiki:`EDS `), it can be
91 | used for other representations as long as they have surface alignments
92 | for the nodes. This implementation can natively work with a variety
93 | of DELPH-IN representations and :doc:`formats <../api/delphin.codecs>`
94 | via the ``--format`` option, including those for Minimal Recursion
95 | Semantics (:wiki:`MRS `) and Dependency Minimal Recursion
96 | Semantics (:wiki:`DMRS `). Non-DELPH-IN representations are
97 | also possible as long as they can be serialized into one of these
98 | formats.
99 |
100 | Other Implementations
101 | ---------------------
102 |
103 | #. Rebecca Dridan's original Perl version (see the :wiki:`wiki
104 | `):
105 | #. `mtool`_: created for the 2019 CoNLL shared task on `Meaning
106 | Representation Parsing `_
107 | #. As part of :wiki:`[incr tsdb()] `
108 | #. As part of `DeepDeepParser `_
109 |
110 | .. _Dridan and Oepen, 2011: https://aclanthology.org/W11-2927/
111 | .. _mtool: https://github.com/cfmrp/mtool
112 |
--------------------------------------------------------------------------------
/docs/api/delphin.tsql.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.tsql
3 | ============
4 |
5 | .. seealso::
6 |
7 | The :ref:`select-tutorial` command is a quick way to query test
8 | suites with TSQL queries.
9 |
10 | .. automodule:: delphin.tsql
11 |
12 | .. note::
13 |
14 | This module deals with queries of TSDB databases. For basic,
15 | low-level access to the databases, see :mod:`delphin.tsdb`. For
16 | high-level operations and structures on top of the databases,
17 | see :mod:`delphin.itsdb`.
18 |
19 | This module implements a subset of TSQL, namely the 'select' (or
20 | 'retrieve') queries for extracting data from test suites. The
21 | general form of a select query is::
22 |
23 | [select] [from ] [where ]*
24 |
25 | For example, the following selects item identifiers that took more
26 | than half a second to parse::
27 |
28 | select i-id from item where total > 500
29 |
30 | The `select` string is necessary when querying with the generic
31 | :func:`query` function, but is implied and thus disallowed when
32 | using the :func:`select` function.
33 |
34 | The `` is a list of space-separated field names (e.g.,
35 | `i-id i-input mrs`), or the special string `*` which selects all
36 | columns from the joined relations.
37 |
38 | The optional `from` clause provides a list of relation names (e.g.,
39 | `item parse result`) that are joined on shared keys. The `from`
40 | clause is required when `*` is used for the projection, but it can
41 | also be used to select columns from non-standard relations (e.g.,
42 | `i-id from output`). Alternatively, qualified names (e.g.,
43 | `item.i-id`) can specify both the column and the relation at the
44 | same time.
45 |
46 | The `where` clause provide conditions for filtering the list of
47 | results. Conditions are binary operations that take a column or
48 | data specifier on the left side and an integer (e.g., `10`), a date
49 | (e.g., `2018-10-07`), or a string (e.g., `"sleep"`) on the right
50 | side of the operator. The allowed conditions are:
51 |
52 | ================ ======================================
53 | Condition Form
54 | ================ ======================================
55 | Regex match `` ~ "regex"``
56 | Regex fail `` !~ "regex"``
57 | Equality `` = (integer|date|"string")``
58 | Inequality `` != (integer|date|"string")``
59 | Less-than `` < (integer|date)``
60 | Less-or-equal `` <= (integer|date)``
61 | Greater-than `` > (integer|date)``
62 | Greater-or-equal `` >= (integer|date)``
63 | ================ ======================================
64 |
65 | Boolean operators can be used to join multiple conditions or for
66 | negation:
67 |
68 | =========== =====================================
69 | Operation Form
70 | =========== =====================================
71 | Disjunction ``X | Y``, ``X || Y``, or ``X or Y``
72 | Conjunction ``X & Y``, ``X && Y``, or ``X and Y``
73 | Negation ``!X`` or ``not X``
74 | =========== =====================================
75 |
76 | Normally, disjunction scopes over conjunction, but parentheses may
77 | be used to group clauses, so the following are equivalent::
78 |
79 | ... where i-id = 10 or i-id = 20 and i-input ~ "[Dd]og"
80 | ... where i-id = 10 or (i-id = 20 and i-input ~ "[Dd]og")
81 |
82 | Multiple `where` clauses may also be used as a conjunction that
83 | scopes over disjunction, so the following are equivalent::
84 |
85 | ... where (i-id = 10 or i-id = 20) and i-input ~ "[Dd]og"
86 | ... where i-id = 10 or i-id = 20 where i-input ~ "[Dd]og"
87 |
88 | This facilitates query construction, where a user may want to apply
89 | additional global constraints by appending new conditions to the
90 | query string.
91 |
92 | PyDelphin has several differences to standard TSQL:
93 |
94 | * `select *` requires a `from` clause
95 | * `select * from item result` does not also include columns from
96 | the intervening `parse` relation
97 | * `select i-input from result` returns a matching `i-input` for
98 | every row in `result`, rather than only the unique rows
99 |
100 | PyDelphin also adds some features to standard TSQL:
101 |
102 | * qualified column names (e.g., `item.i-id`)
103 | * multiple `where` clauses (as described above)
104 |
105 |
106 | Module Functions
107 | ----------------
108 |
109 | .. autofunction:: inspect_query
110 | .. autofunction:: query
111 | .. autofunction:: select
112 |
113 | Exceptions
114 | ----------
115 |
116 | .. autoexception:: TSQLSyntaxError
117 | :show-inheritance:
118 |
119 | .. autoexception:: TSQLError
120 | :show-inheritance:
121 |
--------------------------------------------------------------------------------
/tests/codecs/simplemrs_test.py:
--------------------------------------------------------------------------------
1 |
2 | from delphin.codecs import simplemrs
3 |
4 |
5 | def test_decode_nearly(nearly_all_dogs_bark_mrs):
6 | m = simplemrs.decode(
7 | '[ <0:21> "Nearly all dogs bark."'
8 | ' TOP: h0'
9 | ' INDEX: e2 [ e SF: prop TENSE: pres MOOD: indicative PROG: - PERF: - ]'
10 | ' RELS: <'
11 | ' [ _nearly_x_deg<0:6> LBL: h4 ARG0: e5 [ e SF: prop TENSE: untensed MOOD: indicative PROG: - PERF: - ] ARG1: u6 ]'
12 | ' [ _all_q<7:10> LBL: h4 ARG0: x3 [ x PERS: 3 NUM: pl IND: + PT: pt ] RSTR: h7 BODY: h8 ]'
13 | ' [ _dog_n_1<11:15> LBL: h9 ARG0: x3 ]'
14 | ' [ _bark_v_1<16:20> LBL: h1 ARG0: e2 ARG1: x3 ] >'
15 | ' HCONS: < h0 qeq h1 h7 qeq h9 > ]'
16 | )
17 | assert m == nearly_all_dogs_bark_mrs
18 |
19 |
20 | def test_encode_nearly(nearly_all_dogs_bark_mrs):
21 | assert simplemrs.encode(nearly_all_dogs_bark_mrs) == (
22 | '[ <0:21> "Nearly all dogs bark."'
23 | ' TOP: h0'
24 | ' INDEX: e2 [ e SF: prop TENSE: pres MOOD: indicative PROG: - PERF: - ]'
25 | ' RELS: <'
26 | ' [ _nearly_x_deg<0:6> LBL: h4 ARG0: e5 [ e SF: prop TENSE: untensed MOOD: indicative PROG: - PERF: - ] ARG1: u6 ]'
27 | ' [ _all_q<7:10> LBL: h4 ARG0: x3 [ x PERS: 3 NUM: pl IND: + PT: pt ] RSTR: h7 BODY: h8 ]'
28 | ' [ _dog_n_1<11:15> LBL: h9 ARG0: x3 ]'
29 | ' [ _bark_v_1<16:20> LBL: h1 ARG0: e2 ARG1: x3 ] >'
30 | ' HCONS: < h0 qeq h1 h7 qeq h9 > ]'
31 | )
32 |
33 | assert simplemrs.encode(nearly_all_dogs_bark_mrs, indent=True) == (
34 | '[ <0:21> "Nearly all dogs bark."\n'
35 | ' TOP: h0\n'
36 | ' INDEX: e2 [ e SF: prop TENSE: pres MOOD: indicative PROG: - PERF: - ]\n'
37 | ' RELS: < [ _nearly_x_deg<0:6> LBL: h4 ARG0: e5 [ e SF: prop TENSE: untensed MOOD: indicative PROG: - PERF: - ] ARG1: u6 ]\n'
38 | ' [ _all_q<7:10> LBL: h4 ARG0: x3 [ x PERS: 3 NUM: pl IND: + PT: pt ] RSTR: h7 BODY: h8 ]\n'
39 | ' [ _dog_n_1<11:15> LBL: h9 ARG0: x3 ]\n'
40 | ' [ _bark_v_1<16:20> LBL: h1 ARG0: e2 ARG1: x3 ] >\n'
41 | ' HCONS: < h0 qeq h1 h7 qeq h9 > ]'
42 | )
43 |
44 |
45 | def test_decode_issue_302():
46 | # https://github.com/delph-in/pydelphin/issues/302
47 |
48 | def assert_predicate(p):
49 | m = simplemrs.decode(
50 | '[ TOP: h0 RELS: < [ {}<1:2> LBL: h1 ] > HCONS: < h0 qeq h1 > ]'
51 | .format(p)
52 | )
53 | assert m.rels[0].predicate == p
54 |
55 | assert_predicate(r'_foo:bar_n_1')
56 | assert_predicate(r'_foo:bar_n')
57 | # assert_predicate(r'_+-]\?[/NN_u_unknown_rel"')
58 | # the following originally had NN but preds are case insensitive
59 | assert_predicate(r'_xml:tm/nn_u_unknown')
60 | assert_predicate(r'_24/7_n_1')
61 | assert_predicate(r'_foo LBL: h4 ARG0: x3 [ x PERS: 3 NUM: sg ] RSTR: h5 BODY: h6 ]'
71 | ' [ _blue_a_1<0:6> LBL: h7 ARG0: x3 ARG1: i8 ]'
72 | ' [ _in_p_loc<10:12> LBL: h1 ARG0: e2 ARG1: x3 ARG2: x9 [ x PERS: 3 NUM: sg IND: + ] ]'
73 | ' [ _this_q_dem<13:17> LBL: h10 ARG0: x9 RSTR: h11 BODY: h12 ]'
74 | ' [ _folder_n_of<18:25> LBL: h13 ARG0: x9 ARG1: i14 ] >'
75 | ' HCONS: < h0 qeq h1 h5 qeq h7 h11 qeq h13 > ]'
76 | )
77 | m.surface = '"Blue" is in this folder.'
78 | s = simplemrs.encode(m)
79 | assert '\\"Blue\\" is in this folder.' in s
80 | m2 = simplemrs.decode(s)
81 | assert m == m2
82 | assert m.surface == m2.surface
83 |
84 |
85 | def test_legacy_single_quote_predicates_issue_373():
86 | # https://github.com/delph-in/pydelphin/issues/373
87 | m = simplemrs.decode("[ RELS: < [ 'single+quoted LBL: h0 ] > ]")
88 | assert m.rels[0].predicate == "single+quoted"
89 |
90 |
91 | def test_quote_reserved_characters_issue_372():
92 | # https://github.com/delph-in/pydelphin/issues/372
93 |
94 | def assert_quoted(p: str, escape: bool = False):
95 | m = simplemrs.decode(f'[ RELS: < [ "{p}"<1:2> LBL: h0 ] > ]')
96 | _p = m.rels[0].predicate
97 | assert (_p.replace('"', r'\"') if escape else _p) == p
98 | s = simplemrs.encode(m)
99 | assert f'"{p}"' in s
100 | simplemrs.decode(s) # confirm it roundtrips without error
101 |
102 | assert_quoted("a space")
103 | assert_quoted("a:colon")
104 | assert_quoted(r'double\"quotes', escape=True)
105 | assert_quoted("single'quotes")
106 | assert_quoted("leftangle")
108 | assert_quoted("left[bracket")
109 | assert_quoted("right]bracket")
110 |
--------------------------------------------------------------------------------
/tests/codecs/simpledmrs_test.py:
--------------------------------------------------------------------------------
1 |
2 | import pytest
3 |
4 | from delphin.codecs import simpledmrs
5 | from delphin.dmrs import DMRS, Link, Node
6 | from delphin.lnk import Lnk
7 |
8 |
9 | @pytest.fixture
10 | def it_rains_heavily_dmrs():
11 | d = DMRS(
12 | 20, 10,
13 | nodes=[Node(10, '_rain_v_1', type='e', properties={'TENSE': 'past'}),
14 | Node(20, '_heavy_a_1', type='e')],
15 | links=[Link(20, 10, 'ARG1', 'EQ')])
16 | return d
17 |
18 |
19 | @pytest.fixture
20 | def abrams_barked_dmrs():
21 | d = DMRS(
22 | 30, 30,
23 | nodes=[Node(10, 'udef_q'),
24 | Node(20, 'named', type='x',
25 | carg='Abrams', lnk=Lnk.charspan(0,6)),
26 | Node(30, '_bark_v_1', type='e', properties={'TENSE': 'past'},
27 | lnk=Lnk.charspan(7,13))],
28 | links=[Link(10, 20, 'RSTR', 'H'),
29 | Link(30, 20, 'ARG1', 'NEQ')],
30 | lnk=Lnk.charspan(0,14),
31 | surface='Abrams barked.',
32 | identifier='1000380')
33 | return d
34 |
35 |
36 | def test_encode(it_rains_heavily_dmrs, abrams_barked_dmrs):
37 | assert simpledmrs.encode(DMRS()) == 'dmrs { }'
38 |
39 | assert simpledmrs.encode(it_rains_heavily_dmrs) == (
40 | 'dmrs {'
41 | ' [top=20 index=10]'
42 | ' 10 [_rain_v_1 e TENSE=past];'
43 | ' 20 [_heavy_a_1 e];'
44 | ' 20:ARG1/EQ -> 10;'
45 | ' }')
46 |
47 | assert simpledmrs.encode(it_rains_heavily_dmrs, indent=True) == (
48 | 'dmrs {\n'
49 | ' [top=20 index=10]\n'
50 | ' 10 [_rain_v_1 e TENSE=past];\n'
51 | ' 20 [_heavy_a_1 e];\n'
52 | ' 20:ARG1/EQ -> 10;\n'
53 | '}')
54 |
55 | assert simpledmrs.encode(
56 | it_rains_heavily_dmrs, properties=False, indent=True) == (
57 | 'dmrs {\n'
58 | ' [top=20 index=10]\n'
59 | ' 10 [_rain_v_1 e];\n'
60 | ' 20 [_heavy_a_1 e];\n'
61 | ' 20:ARG1/EQ -> 10;\n'
62 | '}')
63 |
64 | assert simpledmrs.encode(abrams_barked_dmrs) == (
65 | 'dmrs 1000380 {'
66 | ' [<0:14> "Abrams barked." top=30 index=30]'
67 | ' 10 [udef_q];'
68 | ' 20 [named<0:6>("Abrams") x];'
69 | ' 30 [_bark_v_1<7:13> e TENSE=past];'
70 | ' 10:RSTR/H -> 20;'
71 | ' 30:ARG1/NEQ -> 20;'
72 | ' }')
73 |
74 |
75 | def test_decode(it_rains_heavily_dmrs):
76 | d = simpledmrs.decode(
77 | 'dmrs {'
78 | ' [top=20 index=10]'
79 | ' 10 [_rain_v_1 e TENSE=past];'
80 | ' 20 [_heavy_a_1 e];'
81 | ' 20:ARG1/EQ -> 10;'
82 | ' }')
83 | assert d.top == it_rains_heavily_dmrs.top
84 | assert d.index == it_rains_heavily_dmrs.index
85 | assert d.nodes == it_rains_heavily_dmrs.nodes
86 | assert d.links == it_rains_heavily_dmrs.links
87 |
88 | d = simpledmrs.decode(
89 | 'dmrs 1000380 {'
90 | ' [<0:14> "Abrams barked." top=30 index=30]'
91 | ' 10 [udef_q];'
92 | ' 20 [named<0:6>("Abrams") x];'
93 | ' 30 [_bark_v_1<7:13> e TENSE=past];'
94 | ' 10:RSTR/H -> 20;'
95 | ' 30:ARG1/NEQ -> 20;'
96 | ' }')
97 | assert d.cfrom == 0
98 | assert d.cto == 14
99 | assert d.surface == 'Abrams barked.'
100 | assert d.identifier == '1000380'
101 | assert d.nodes[1].carg == 'Abrams'
102 | assert d.nodes[1].type == 'x'
103 | assert d.nodes[1].cto == 6
104 |
105 |
106 | def test_loads(it_rains_heavily_dmrs):
107 | ds = simpledmrs.loads(
108 | 'dmrs {\n'
109 | ' [<0:18> "It rained heavily." top=20 index=10]\n'
110 | ' 10 [_rain_v_1<3:9> e TENSE=past];\n'
111 | ' 20 [_heavy_a_1<10:17> e];\n'
112 | ' 20:ARG1/EQ -> 10;\n'
113 | ' }\n'
114 | 'dmrs {\n'
115 | ' [top=20 index=10]\n'
116 | ' 10 [_rain_v_1 e TENSE=past];\n'
117 | ' 20 [_heavy_a_1 e];\n'
118 | ' 20:ARG1/EQ -> 10;\n'
119 | ' }')
120 |
121 | assert len(ds) == 2
122 | assert ds[0].cto == 18
123 | assert ds[0].surface == 'It rained heavily.'
124 | assert ds[0].top == it_rains_heavily_dmrs.top
125 | assert ds[0].index == it_rains_heavily_dmrs.index
126 | assert ds[0].nodes == it_rains_heavily_dmrs.nodes
127 | assert ds[0].links == it_rains_heavily_dmrs.links
128 |
129 | assert ds[1].top == it_rains_heavily_dmrs.top
130 | assert ds[1].index == it_rains_heavily_dmrs.index
131 | assert ds[1].nodes == it_rains_heavily_dmrs.nodes
132 | assert ds[1].links == it_rains_heavily_dmrs.links
133 |
134 |
135 | def test_decode_no_index_issue_334():
136 | # https://github.com/delph-in/pydelphin/issues/334
137 | d = simpledmrs.decode(
138 | 'dmrs {'
139 | ' [top=10]'
140 | ' 10 [_rain_v_1<3:9> e TENSE=past];'
141 | '}'
142 | )
143 | assert d.index is None
144 |
145 | d = simpledmrs.decode(
146 | 'dmrs {'
147 | ' 10 [_rain_v_1<3:9> e TENSE=past];'
148 | '}'
149 | )
150 | assert d.top is None
151 | assert d.index is None
152 |
--------------------------------------------------------------------------------
/delphin/cli/edm.py:
--------------------------------------------------------------------------------
1 |
2 | """
3 | Compute the EDM (Elementary Dependency Match) score for two collections.
4 |
5 | The collections, GOLD and TEST, may be files containing semantic
6 | representations, decoded using '--format'; or an [incr tsdb()] test
7 | suite directory, selecting MRSs using '-p' for the parse result
8 | number. GOLD and TEST should contain the same number of items. MRS
9 | representations will be converted to EDS for comparison.
10 | """
11 |
12 | import argparse
13 | import logging
14 | import warnings
15 | from pathlib import Path
16 | from typing import Iterator, Optional, Union
17 |
18 | from delphin import dmrs, edm, eds, itsdb, mrs, tsdb, util
19 |
20 | logger = logging.getLogger(__name__)
21 |
22 | _SemanticRepresentation = Union[eds.EDS, dmrs.DMRS]
23 |
24 | parser = argparse.ArgumentParser(add_help=False)
25 |
26 | COMMAND_INFO = {
27 | 'name': 'edm',
28 | 'help': 'Evaluate with Elementary Dependency Match',
29 | 'description': __doc__,
30 | 'parser': parser,
31 | }
32 |
33 |
34 | def call_compute(args):
35 | golds = _iter_representations(args.GOLD, args.format, args.p)
36 | tests = _iter_representations(args.TEST, args.format, args.p)
37 | p, r, f = edm.compute(
38 | golds,
39 | tests,
40 | name_weight=args.N,
41 | argument_weight=args.A,
42 | property_weight=args.P,
43 | constant_weight=args.C,
44 | top_weight=args.T,
45 | ignore_missing_gold=args.ignore_missing in ('gold', 'both'),
46 | ignore_missing_test=args.ignore_missing in ('test', 'both'))
47 | print(f'Precision:\t{p}')
48 | print(f' Recall:\t{r}')
49 | print(f' F-score:\t{f}')
50 |
51 |
52 | def _iter_representations(
53 | path: Path,
54 | fmt: str,
55 | p: int
56 | ) -> Iterator[Optional[_SemanticRepresentation]]:
57 | if tsdb.is_database_directory(path):
58 | logger.debug('reading MRSs from profile: %s', (path,))
59 | ts = itsdb.TestSuite(path)
60 | for response in ts.processed_items():
61 | try:
62 | result = response.result(p)
63 | except IndexError:
64 | yield None
65 | else:
66 | yield _eds_from_mrs(result.mrs(), predicate_modifiers=True)
67 |
68 | elif path.is_file():
69 | logger.debug('reading %s from file: %s', (fmt, path,))
70 | codec = util.import_codec(fmt)
71 | rep = codec.CODEC_INFO.get('representation', '').lower()
72 | if rep == 'mrs':
73 | for sr in codec.load(path):
74 | yield _eds_from_mrs(sr, predicate_modifiers=True)
75 | elif rep in ('dmrs', 'eds'):
76 | for sr in codec.load(path):
77 | yield sr
78 | else:
79 | raise ValueError(f'unsupported representation: {rep}')
80 |
81 | else:
82 | raise ValueError(f'not a file or TSDB database: {path}')
83 |
84 |
85 | def _eds_from_mrs(
86 | m: mrs.MRS,
87 | predicate_modifiers: bool,
88 | errors: str = 'warn',
89 | ) -> Optional[eds.EDS]:
90 | try:
91 | e = eds.from_mrs(m, predicate_modifiers=predicate_modifiers)
92 | except Exception:
93 | logger.debug('could not convert MRS to EDS')
94 | if errors == 'warn':
95 | warnings.warn(
96 | "error in EDS conversion; skipping entry",
97 | stacklevel=2,
98 | )
99 | elif errors == 'strict':
100 | raise
101 | e = None
102 | return e
103 |
104 |
105 | parser.set_defaults(func=call_compute)
106 | # data selection
107 | parser.add_argument(
108 | 'GOLD',
109 | type=Path,
110 | help='corpus of gold semantic representations')
111 | parser.add_argument(
112 | 'TEST',
113 | type=Path,
114 | help='corpus of test semantic representations')
115 | parser.add_argument(
116 | '-f',
117 | '--format',
118 | metavar='FMT',
119 | default='eds',
120 | help='semantic representation format (default: eds)')
121 | parser.add_argument(
122 | '-p',
123 | metavar='N',
124 | type=int,
125 | default=0,
126 | help='parse result number (default: 0)')
127 | parser.add_argument(
128 | '--ignore-missing',
129 | metavar='X',
130 | choices=('gold', 'test', 'both', 'none'),
131 | default='none',
132 | help='do not treat missing Xs as a mismatch (default: none)')
133 | # comparison configuration
134 | parser.add_argument(
135 | '-A', metavar='WEIGHT', type=float, default=1.0,
136 | help='weight for argument triples (default: 1.0)')
137 | parser.add_argument(
138 | '-N', metavar='WEIGHT', type=float, default=1.0,
139 | help='weight for name (predicate) triples (default: 1.0)')
140 | parser.add_argument(
141 | '-P', metavar='WEIGHT', type=float, default=1.0,
142 | help='weight for property triples (default: 1.0)')
143 | parser.add_argument(
144 | '-C', metavar='WEIGHT', type=float, default=1.0,
145 | help='weight for constant triples (default: 1.0)')
146 | parser.add_argument(
147 | '-T', metavar='WEIGHT', type=float, default=1.0,
148 | help='weight for matching top triples (default: 1.0)')
149 |
--------------------------------------------------------------------------------
/docs/api/delphin.tdl.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.tdl
3 | ===========
4 |
5 | .. sidebar:: Contents
6 |
7 | .. contents::
8 | :local:
9 | :depth: 2
10 |
11 |
12 | .. automodule:: delphin.tdl
13 |
14 | Type Description Language (TDL) is a declarative language for
15 | describing type systems, mainly for the creation of DELPH-IN HPSG
16 | grammars. TDL was originally described in Krieger and Schäfer, 1994
17 | [KS1994]_, but it describes many features not in use by the DELPH-IN
18 | variant, such as disjunction. Copestake, 2002 [COP2002]_ better
19 | describes the subset in use by DELPH-IN, but this publication has
20 | become outdated to the current usage of TDL in DELPH-IN grammars and
21 | its TDL syntax description is inaccurate in places. It is, however,
22 | still a great resource for understanding the interpretation of TDL
23 | grammar descriptions. The TdlRfc_ page of the `DELPH-IN Wiki`_
24 | contains the most up-to-date description of the TDL syntax used by
25 | DELPH-IN grammars, including features such as documentation strings
26 | and regular expressions.
27 |
28 | Below is an example of a basic type from the English Resource Grammar
29 | (`ERG`_):
30 |
31 | .. code:: tdl
32 |
33 | basic_word := word_or_infl_rule & word_or_punct_rule &
34 | [ SYNSEM [ PHON.ONSET.--TL #tl,
35 | LKEYS.KEYREL [ CFROM #from,
36 | CTO #to ] ],
37 | ORTH [ CLASS #class, FROM #from, TO #to, FORM #form ],
38 | TOKENS [ +LIST #tl & < [ +CLASS #class, +FROM #from, +FORM #form ], ... >,
39 | +LAST.+TO #to ] ].
40 |
41 | The `delphin.tdl` module makes it easy to inspect what is written on
42 | definitions in Type Description Language (TDL), but it doesn't
43 | interpret type hierarchies (such as by performing unification,
44 | subsumption calculations, or creating GLB types). That is, while it
45 | wouldn't be useful for creating a parser, it is useful if you want to
46 | statically inspect the types in a grammar and the constraints they
47 | apply.
48 |
49 | .. [KS1994] Hans-Ulrich Krieger and Ulrich Schäfer. TDL: a type
50 | description language for constraint-based grammars. In Proceedings
51 | of the 15th conference on Computational linguistics, volume 2, pages
52 | 893–899. Association for Computational Linguistics, 1994.
53 |
54 | .. [COP2002] Ann Copestake. Implementing typed feature structure
55 | grammars, volume 110. CSLI publications Stanford, 2002.
56 |
57 | .. _TdlRfc: https://github.com/delph-in/docs/wiki/TdlRfc
58 | .. _`DELPH-IN Wiki`: https://github.com/delph-in/docs/wiki/
59 | .. _ERG: http://www.delph-in.net/erg/
60 |
61 |
62 | Module Parameters
63 | -----------------
64 |
65 | Some aspects of TDL parsing can be customized per grammar, and the
66 | following module variables may be reassigned to accommodate those
67 | differences. For instance, in the ERG_, the type used for list
68 | feature structures is `*list*`, while for Matrix_\ -based grammars
69 | it is `list`. PyDelphin defaults to the values used by the ERG.
70 |
71 | .. _ERG: http://www.delph-in.net/erg/
72 | .. _Matrix: http://matrix.ling.washington.edu/
73 |
74 | .. autodata:: LIST_TYPE
75 | .. autodata:: EMPTY_LIST_TYPE
76 | .. autodata:: LIST_HEAD
77 | .. autodata:: LIST_TAIL
78 | .. autodata:: DIFF_LIST_LIST
79 | .. autodata:: DIFF_LIST_LAST
80 |
81 |
82 | Functions
83 | ---------
84 |
85 | .. autofunction:: iterparse
86 | .. autofunction:: format
87 |
88 |
89 | Classes
90 | -------
91 |
92 | The TDL entity classes are the objects returned by
93 | :func:`iterparse`, but they may also be used directly to build TDL
94 | structures, e.g., for serialization.
95 |
96 |
97 | Terms
98 | '''''
99 |
100 | .. autoclass:: Term
101 | .. autoclass:: TypeTerm
102 | :show-inheritance:
103 | .. autoclass:: TypeIdentifier
104 | :show-inheritance:
105 | :members:
106 | .. autoclass:: String
107 | :show-inheritance:
108 | :members:
109 | .. autoclass:: Regex
110 | :show-inheritance:
111 | :members:
112 | .. autoclass:: AVM
113 | :show-inheritance:
114 | :members:
115 | .. autoclass:: ConsList
116 | :show-inheritance:
117 | :members:
118 | .. autoclass:: DiffList
119 | :show-inheritance:
120 | :members:
121 | .. autoclass:: Coreference
122 | :show-inheritance:
123 | :members:
124 |
125 |
126 | Conjunctions
127 | ''''''''''''
128 |
129 | .. autoclass:: Conjunction
130 | :members:
131 |
132 |
133 | Type and Instance Definitions
134 | '''''''''''''''''''''''''''''
135 |
136 | .. autoclass:: TypeDefinition
137 | :members:
138 | .. autoclass:: TypeAddendum
139 | :show-inheritance:
140 | :members:
141 | .. autoclass:: LexicalRuleDefinition
142 | :show-inheritance:
143 | :members:
144 |
145 |
146 | Morphological Patterns
147 | ''''''''''''''''''''''
148 |
149 | .. autoclass:: LetterSet
150 | :members:
151 |
152 | .. autoclass:: WildCard
153 | :members:
154 |
155 |
156 | Environments and File Inclusion
157 | '''''''''''''''''''''''''''''''
158 |
159 | .. autoclass:: TypeEnvironment
160 | :members:
161 |
162 | .. autoclass:: InstanceEnvironment
163 | :members:
164 |
165 | .. autoclass:: FileInclude
166 | :members:
167 |
168 |
169 | Comments
170 | ''''''''
171 |
172 | .. autoclass:: LineComment
173 | :members:
174 |
175 | .. autoclass:: BlockComment
176 | :members:
177 |
178 |
179 | Exceptions and Warnings
180 | -----------------------
181 |
182 | .. autoexception:: TDLError
183 | :show-inheritance:
184 |
185 | .. autoexception:: TDLSyntaxError
186 | :show-inheritance:
187 |
188 | .. autoexception:: TDLWarning
189 | :show-inheritance:
190 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.dmrsjson.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.dmrsjson
3 | =======================
4 |
5 | .. automodule:: delphin.codecs.dmrsjson
6 |
7 | Example:
8 |
9 | * *The new chef whose soup accidentally spilled quit and left.*
10 |
11 | ::
12 |
13 | {
14 | "top": 10008,
15 | "index": 10009,
16 | "nodes": [
17 | {
18 | "nodeid": 10000,
19 | "predicate": "_the_q",
20 | "lnk": {"from": 0, "to": 3}
21 | },
22 | {
23 | "nodeid": 10001,
24 | "predicate": "_new_a_1",
25 | "sortinfo": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "bool", "PERF": "-", "cvarsort": "e"},
26 | "lnk": {"from": 4, "to": 7}
27 | },
28 | {
29 | "nodeid": 10002,
30 | "predicate": "_chef_n_1",
31 | "sortinfo": {"PERS": "3", "NUM": "sg", "IND": "+", "cvarsort": "x"},
32 | "lnk": {"from": 8, "to": 12}
33 | },
34 | {
35 | "nodeid": 10003,
36 | "predicate": "def_explicit_q",
37 | "lnk": {"from": 13, "to": 18}
38 | },
39 | {
40 | "nodeid": 10004,
41 | "predicate": "poss",
42 | "sortinfo": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"},
43 | "lnk": {"from": 13, "to": 18}
44 | },
45 | {
46 | "nodeid": 10005,
47 | "predicate": "_soup_n_1",
48 | "sortinfo": {"PERS": "3", "NUM": "sg", "cvarsort": "x"},
49 | "lnk": {"from": 19, "to": 23}
50 | },
51 | {
52 | "nodeid": 10006,
53 | "predicate": "_accidental_a_1",
54 | "sortinfo": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"},
55 | "lnk": {"from": 24, "to": 36}
56 | },
57 | {
58 | "nodeid": 10007,
59 | "predicate": "_spill_v_1",
60 | "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"},
61 | "lnk": {"from": 37, "to": 44}
62 | },
63 | {
64 | "nodeid": 10008,
65 | "predicate": "_quit_v_1",
66 | "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"},
67 | "lnk": {"from": 45, "to": 49}
68 | },
69 | {
70 | "nodeid": 10009,
71 | "predicate": "_and_c",
72 | "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"},
73 | "lnk": {"from": 50, "to": 53}
74 | },
75 | {
76 | "nodeid": 10010,
77 | "predicate": "_leave_v_1",
78 | "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"},
79 | "lnk": {"from": 54, "to": 59}
80 | }
81 | ],
82 | "links": [
83 | {"from": 10000, "to": 10002, "rargname": "RSTR", "post": "H"},
84 | {"from": 10001, "to": 10002, "rargname": "ARG1", "post": "EQ"},
85 | {"from": 10003, "to": 10005, "rargname": "RSTR", "post": "H"},
86 | {"from": 10004, "to": 10005, "rargname": "ARG1", "post": "EQ"},
87 | {"from": 10004, "to": 10002, "rargname": "ARG2", "post": "NEQ"},
88 | {"from": 10006, "to": 10007, "rargname": "ARG1", "post": "EQ"},
89 | {"from": 10007, "to": 10005, "rargname": "ARG1", "post": "NEQ"},
90 | {"from": 10008, "to": 10002, "rargname": "ARG1", "post": "NEQ"},
91 | {"from": 10009, "to": 10008, "rargname": "ARG1", "post": "EQ"},
92 | {"from": 10009, "to": 10010, "rargname": "ARG2", "post": "EQ"},
93 | {"from": 10010, "to": 10002, "rargname": "ARG1", "post": "NEQ"},
94 | {"from": 10007, "to": 10002, "rargname": "MOD", "post": "EQ"},
95 | {"from": 10010, "to": 10008, "rargname": "MOD", "post": "EQ"}
96 | ]
97 | }
98 |
99 |
100 | Module Constants
101 | ----------------
102 |
103 | .. data:: HEADER
104 |
105 | `'['`
106 |
107 | .. data:: JOINER
108 |
109 | `','`
110 |
111 | .. data:: FOOTER
112 |
113 | `']'`
114 |
115 |
116 | Deserialization Functions
117 | -------------------------
118 |
119 | .. function:: load(source)
120 |
121 | See the :func:`load` codec API documentation.
122 |
123 | .. function:: loads(s)
124 |
125 | See the :func:`loads` codec API documentation.
126 |
127 | .. function:: decode(s)
128 |
129 | See the :func:`decode` codec API documentation.
130 |
131 |
132 | Serialization Functions
133 | -----------------------
134 |
135 | .. function:: dump(ms, destination, properties=True, lnk=True, indent=False, encoding='utf-8')
136 |
137 | See the :func:`dump` codec API documentation.
138 |
139 | .. function:: dumps(ms, properties=True, lnk=True, indent=False)
140 |
141 | See the :func:`dumps` codec API documentation.
142 |
143 | .. function:: encode(m, properties=True, lnk=True, indent=False)
144 |
145 | See the :func:`encode` codec API documentation.
146 |
147 |
148 | Complementary Functions
149 | -----------------------
150 |
151 | .. autofunction:: from_dict
152 | .. autofunction:: to_dict
153 |
--------------------------------------------------------------------------------
/docs/api/delphin.codecs.dmrx.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.codecs.dmrx
3 | ===================
4 |
5 | .. automodule:: delphin.codecs.dmrx
6 |
7 | Example:
8 |
9 | * *The new chef whose soup accidentally spilled quit and left.*
10 |
11 | ::
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 | def_explicit_q
28 |
29 |
30 |
31 | poss
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 | RSTR
60 | H
61 |
62 |
63 | ARG1
64 | EQ
65 |
66 |
67 | RSTR
68 | H
69 |
70 |
71 | ARG1
72 | EQ
73 |
74 |
75 | ARG2
76 | NEQ
77 |
78 |
79 | ARG1
80 | EQ
81 |
82 |
83 | ARG1
84 | NEQ
85 |
86 |
87 | ARG1
88 | NEQ
89 |
90 |
91 | ARG1
92 | EQ
93 |
94 |
95 | ARG2
96 | EQ
97 |
98 |
99 | ARG1
100 | NEQ
101 |
102 |
103 | MOD
104 | EQ
105 |
106 |
107 | MOD
108 | EQ
109 |
110 |
111 |
112 |
113 | Module Constants
114 | ----------------
115 |
116 | .. data:: HEADER
117 |
118 | `''`
119 |
120 | .. data:: JOINER
121 |
122 | `''`
123 |
124 | .. data:: FOOTER
125 |
126 | `''`
127 |
128 | Deserialization Functions
129 | -------------------------
130 |
131 | .. function:: load(source)
132 |
133 | See the :func:`load` codec API documentation.
134 |
135 | .. function:: loads(s)
136 |
137 | See the :func:`loads` codec API documentation.
138 |
139 | .. function:: decode(s)
140 |
141 | See the :func:`decode` codec API documentation.
142 |
143 | Serialization Functions
144 | -----------------------
145 |
146 | .. function:: dump(ms, destination, properties=True, lnk=True, indent=False, encoding='utf-8')
147 |
148 | See the :func:`dump` codec API documentation.
149 |
150 | .. function:: dumps(ms, properties=True, lnk=True, indent=False)
151 |
152 | See the :func:`dumps` codec API documentation.
153 |
154 | .. function:: encode(m, properties=True, lnk=True, indent=False)
155 |
156 | See the :func:`encode` codec API documentation.
157 |
--------------------------------------------------------------------------------
/docs/api/delphin.tsdb.rst:
--------------------------------------------------------------------------------
1 |
2 | delphin.tsdb
3 | ============
4 |
5 | .. automodule:: delphin.tsdb
6 |
7 |
8 | .. note::
9 |
10 | This module implements the basic, low-level functionality for
11 | working with TSDB databases. For higher-level views and uses of
12 | these databases, see :mod:`delphin.itsdb`. For complex queries
13 | of the databases, see :mod:`delphin.tsql`.
14 |
15 | TSDB databases are plain-text file-based relational databases
16 | minimally consisting of a directory with a file, called
17 | `relations`, containing the database's schema (see
18 | `Schemas`_). Every relation, or table, in the database has its own
19 | file, which may be `gzipped `_
20 | to save space. The relations have a simple format with columns
21 | delimited by ``@`` and records delimited by newlines. This makes
22 | them easy to inspect at the command line with standard Unix tools
23 | such as ``cut`` and ``awk`` (but gzipped relations need to be
24 | decompressed or piped from a tool such as ``zcat``).
25 |
26 | This module handles the technical details of reading and writing
27 | TSDB databases, including:
28 |
29 | - parsing database schemas
30 |
31 | - transparently opening either the plain-text or gzipped relations
32 | on disk, as appropriate
33 |
34 | - escaping and unescaping reserved characters in the data
35 |
36 | - pairing columns with their schema descriptions
37 |
38 | - casting types (such as ``:integer``, ``:date``, etc.)
39 |
40 | Additionally, this module provides very basic abstractions of
41 | databases and relations as the :class:`Database` and
42 | :class:`Relation` classes, respectively. These serve as base
43 | classes for the more featureful :class:`delphin.itsdb.TestSuite`
44 | and :class:`delphin.itsdb.Table` classes, but may be useful as they
45 | are for simple needs.
46 |
47 |
48 | Module Constants
49 | ----------------
50 |
51 | .. data:: SCHEMA_FILENAME
52 |
53 | ``relations`` -- The filename for the schema.
54 |
55 | .. data:: FIELD_DELIMITER
56 |
57 | ``@`` -- The character used to delimit fields (or columns) in a record.
58 |
59 | .. data:: TSDB_CORE_FILES
60 |
61 | The list of files used in "skeletons". Includes::
62 |
63 | item
64 | analysis
65 | phenomenon
66 | parameter
67 | set
68 | item-phenomenon
69 | item-set
70 |
71 | .. data:: TSDB_CODED_ATTRIBUTES
72 |
73 | The default values of specific fields. Includes::
74 |
75 | i-wf = 1
76 | i-difficulty = 1
77 | polarity = -1
78 |
79 | Fields without a special value given above get assigned one
80 | based on their datatype.
81 |
82 |
83 | Schemas
84 | -------
85 |
86 | A TSDB database defines its schema in a file called ``relations``.
87 | This file contains descriptions of each relation (table) and its
88 | fields (columns), including the datatypes and whether a column
89 | counts as a "key". Key columns may be used when joining relations
90 | together. As an example, the first 9 lines of the ``run`` relation
91 | description is as follows:
92 |
93 | ::
94 |
95 | run:
96 | run-id :integer :key # unique test run identifier
97 | run-comment :string # descriptive narrative
98 | platform :string # implementation platform (version)
99 | protocol :integer # [incr tsdb()] protocol version
100 | tsdb :string # tsdb(1) (version) used
101 | application :string # application (version) used
102 | environment :string # application-specific information
103 | grammar :string # grammar (version) used
104 | ...
105 |
106 | .. seealso::
107 |
108 | See the `TsdbSchemaRfc
109 | `_ wiki for
110 | a description of the format of ``relations`` files.
111 |
112 | In PyDelphin, TSDB schemas are represented as dictionaries of lists
113 | of :class:`Field` objects.
114 |
115 | .. autoclass:: Field
116 | :members:
117 |
118 | .. autofunction:: read_schema
119 | .. autofunction:: write_schema
120 | .. autofunction:: make_field_index
121 |
122 |
123 | Data Operations
124 | ---------------
125 |
126 | Character Escaping and Unescaping
127 | '''''''''''''''''''''''''''''''''
128 |
129 | .. autofunction:: escape
130 | .. autofunction:: unescape
131 |
132 | Record Splitting and Joining
133 | ''''''''''''''''''''''''''''
134 |
135 | .. autofunction:: split
136 | .. autofunction:: join
137 | .. autofunction:: make_record
138 |
139 | Datatype Conversion
140 | '''''''''''''''''''
141 |
142 | .. autofunction:: cast
143 | .. autofunction:: format
144 |
145 |
146 | File and Directory Operations
147 | -----------------------------
148 |
149 | Paths
150 | '''''
151 |
152 | .. autofunction:: is_database_directory
153 | .. autofunction:: get_path
154 |
155 | Relation File Access
156 | ''''''''''''''''''''
157 |
158 | .. autofunction:: open
159 | .. autofunction:: write
160 |
161 | Database Directories
162 | ''''''''''''''''''''
163 |
164 | .. autofunction:: initialize_database
165 | .. autofunction:: write_database
166 |
167 |
168 | Basic Database Class
169 | --------------------
170 |
171 | .. autoclass:: Database
172 | :members:
173 |
174 | Exceptions
175 | ----------
176 |
177 | .. autoexception:: TSDBSchemaError
178 | :show-inheritance:
179 |
180 | .. autoexception:: TSDBError
181 | :show-inheritance:
182 |
183 | .. autoexception:: TSDBWarning
184 | :show-inheritance:
185 |
--------------------------------------------------------------------------------
/tests/tfs_test.py:
--------------------------------------------------------------------------------
1 |
2 | import pytest
3 |
4 | from delphin import tfs
5 |
6 |
7 | @pytest.fixture
8 | def empty_fs():
9 | return tfs.FeatureStructure()
10 |
11 |
12 | @pytest.fixture
13 | def flat_fs():
14 | return tfs.FeatureStructure({'A': 'xYz', 'B': 2})
15 |
16 |
17 | @pytest.fixture
18 | def nested_fs():
19 | return tfs.FeatureStructure([('A.B.C', 1), ('A.B.D', 2), ('B', 3)])
20 |
21 |
22 | class TestFeatureStructure():
23 | def test_init(self):
24 | tfs.FeatureStructure()
25 | tfs.FeatureStructure({'A': 'b'})
26 | tfs.FeatureStructure([('A', 'b')])
27 | tfs.FeatureStructure([('A.B.C', 1), ('A.B.D', 2), ('B', 3)])
28 |
29 | def test_eq(self, empty_fs, flat_fs, nested_fs):
30 | assert empty_fs == tfs.FeatureStructure()
31 | assert empty_fs != flat_fs
32 | assert flat_fs == flat_fs
33 | flat2 = tfs.FeatureStructure({'A': 'XyZ', 'B': 2})
34 | assert flat_fs != flat2
35 | flat3 = tfs.FeatureStructure({'a': 'xYz', 'b': 2})
36 | assert flat_fs == flat3
37 | flat4 = tfs.FeatureStructure({'A': 'xYz', 'B': 2, 'C': 1})
38 | assert flat_fs != flat4
39 | assert nested_fs == tfs.FeatureStructure([
40 | ('A', tfs.FeatureStructure({'B.C': 1, 'B.D': 2})),
41 | ('B', 3)])
42 |
43 | def test__setitem__(self, empty_fs):
44 | empty_fs['A'] = 1
45 | assert empty_fs['A'] == 1
46 | empty_fs['a'] = 3
47 | assert empty_fs['A'] == 3
48 | empty_fs['B.C'] = 4
49 | assert empty_fs['B'] == tfs.FeatureStructure({'C': 4})
50 |
51 | def test__setitem__issue293(self):
52 | t = tfs.FeatureStructure()
53 | t['A.B'] = 'c'
54 | with pytest.raises(tfs.TFSError):
55 | t['A.B.C'] = 'd'
56 |
57 | def test__getitem__(self, empty_fs, flat_fs, nested_fs):
58 | with pytest.raises(KeyError):
59 | empty_fs['unknown']
60 | assert flat_fs['A'] == 'xYz' # case sensitive values
61 | assert flat_fs['A'] == flat_fs['a'] # case insensitive keys
62 | assert flat_fs['B'] == 2
63 | assert nested_fs['A.B.C'] == nested_fs['a.B.c'] == 1
64 | # dot notation vs nested feature structures
65 | assert nested_fs['A.B.D'] == nested_fs['A']['B']['D'] == 2
66 | with pytest.raises(KeyError):
67 | nested_fs['A.B.E']
68 |
69 | def test__delitem__(self, nested_fs):
70 | del nested_fs['A.B.C']
71 | assert 'A.B.C' not in nested_fs
72 | assert nested_fs['A.B.D'] == 2
73 | del nested_fs['A.B.D']
74 | assert nested_fs['A.B'] == tfs.FeatureStructure()
75 | del nested_fs['A']
76 | assert 'A' not in nested_fs
77 | del nested_fs['b']
78 | assert 'B' not in nested_fs
79 |
80 | def test__contains__(self):
81 | pass
82 |
83 | def test_get(self, empty_fs, flat_fs, nested_fs):
84 | assert empty_fs.get('unknown') is None
85 | assert flat_fs.get('A') == 'xYz'
86 | assert flat_fs.get('a') == flat_fs.get('A') # case insensitive keys
87 | assert flat_fs.get('B') == 2
88 |
89 | def test_features(self, empty_fs, flat_fs, nested_fs):
90 | assert empty_fs.features() == []
91 | assert sorted(flat_fs.features()) == [('A', 'xYz'), ('B', 2)]
92 | assert sorted(nested_fs.features()) == [
93 | ('A.B', tfs.FeatureStructure([('C', 1), ('D', 2)])),
94 | ('B', 3)]
95 | assert sorted(nested_fs.features(expand=True)) == [
96 | ('A.B.C', 1), ('A.B.D', 2), ('B', 3)]
97 |
98 |
99 | def test_TypedFeatureStructure():
100 | with pytest.raises(TypeError):
101 | tfs.TypedFeatureStructure()
102 |
103 | fs = tfs.TypedFeatureStructure('typename')
104 | assert fs.type == 'typename'
105 | assert fs.features() == []
106 |
107 | fs = tfs.TypedFeatureStructure('typename', [('a', 1), ('b', 2)])
108 | assert fs.type == 'typename'
109 | assert fs.features() == [('A', 1), ('B', 2)]
110 | assert fs == tfs.TypedFeatureStructure('typename', [('A', 1), ('B', 2)])
111 | assert fs != tfs.TypedFeatureStructure('name', [('A', 1), ('B', 2)])
112 | assert fs != tfs.TypedFeatureStructure('typename', [('A', 1), ('B', 3)])
113 |
114 |
115 | class TestTypeHierarchy:
116 | def test_update(self):
117 | th = tfs.TypeHierarchy('*top*')
118 | # invalid parent data type
119 | with pytest.raises(TypeError):
120 | th.update({'1': 1})
121 |
122 | def test_integrity(self):
123 | pass
124 | # awaiting issue #94
125 | # # non-unique glb
126 | # with pytest.raises(tfs.TypeHierarchyError):
127 | # tfs.TypeHierarchy('*top*', {'a': ['*top*'],
128 | # 'b': ['*top*'],
129 | # 'c': ['a', 'b'],
130 | # 'd': ['a', 'b']})
131 | # # non-symmetric non-unique glb
132 | # with pytest.raises(tfs.TypeHierarchyError):
133 | # tfs.TypeHierarchy('*top*', {'a': ['*top*'],
134 | # 'b': ['*top*'],
135 | # 'c': ['*top*'],
136 | # 'd': ['a', 'b', 'c'],
137 | # 'e': ['a', 'b']})
138 | # # non-immediate non-unique glb
139 | # with pytest.raises(tfs.TypeHierarchyError):
140 | # tfs.TypeHierarchy('*top*', {'a': ['*top*'],
141 | # 'b': ['*top*'],
142 | # 'c': ['a', 'b'],
143 | # 'a2': ['a'],
144 | # 'b2': ['b'],
145 | # 'd': ['a2', 'b2']})
146 |
--------------------------------------------------------------------------------
/delphin/codecs/edsjson.py:
--------------------------------------------------------------------------------
1 | """
2 | EDS-JSON serialization and deserialization.
3 | """
4 |
5 | import json
6 | from pathlib import Path
7 |
8 | from delphin.eds import EDS, Node
9 | from delphin.lnk import Lnk
10 |
11 | CODEC_INFO = {
12 | 'representation': 'eds',
13 | }
14 |
15 | HEADER = '['
16 | JOINER = ','
17 | FOOTER = ']'
18 |
19 |
20 | def load(source):
21 | """
22 | Deserialize a EDS-JSON file (handle or filename) to EDS objects
23 |
24 | Args:
25 | source: filename or file object
26 | Returns:
27 | a list of EDS objects
28 | """
29 | if hasattr(source, 'read'):
30 | data = json.load(source)
31 | else:
32 | source = Path(source).expanduser()
33 | with source.open() as fh:
34 | data = json.load(fh)
35 | return [from_dict(d) for d in data]
36 |
37 |
38 | def loads(s):
39 | """
40 | Deserialize a EDS-JSON string to EDS objects
41 |
42 | Args:
43 | s (str): a EDS-JSON string
44 | Returns:
45 | a list of EDS objects
46 | """
47 | data = json.loads(s)
48 | return [from_dict(d) for d in data]
49 |
50 |
51 | def dump(es, destination, properties=True, lnk=True,
52 | indent=False, encoding='utf-8'):
53 | """
54 | Serialize EDS objects to a EDS-JSON file.
55 |
56 | Args:
57 | destination: filename or file object
58 | es: iterator of :class:`~delphin.eds.EDS` objects to
59 | serialize
60 | properties: if `True`, encode variable properties
61 | lnk: if `False`, suppress surface alignments and strings
62 | indent: if `True`, adaptively indent; if `False` or `None`,
63 | don't indent; if a non-negative integer N, indent N spaces
64 | per level
65 | encoding (str): if *destination* is a filename, write to the
66 | file with the given encoding; otherwise it is ignored
67 | """
68 | if indent is False:
69 | indent = None
70 | elif indent is True:
71 | indent = 2
72 | data = [to_dict(e, properties=properties, lnk=lnk)
73 | for e in es]
74 | if hasattr(destination, 'write'):
75 | json.dump(data, destination, indent=indent)
76 | else:
77 | destination = Path(destination).expanduser()
78 | with open(destination, 'w', encoding=encoding) as fh:
79 | json.dump(data, fh)
80 |
81 |
82 | def dumps(es, properties=True, lnk=True, indent=False):
83 | """
84 | Serialize EDS objects to a EDS-JSON string.
85 |
86 | Args:
87 | es: iterator of :class:`~delphin.eds.EDS` objects to
88 | serialize
89 | properties: if `True`, encode variable properties
90 | lnk: if `False`, suppress surface alignments and strings
91 | indent: if `True`, adaptively indent; if `False` or `None`,
92 | don't indent; if a non-negative integer N, indent N spaces
93 | per level
94 | Returns:
95 | a EDS-JSON-serialization of the EDS objects
96 | """
97 | if indent is False:
98 | indent = None
99 | elif indent is True:
100 | indent = 2
101 | data = [to_dict(e, properties=properties, lnk=lnk)
102 | for e in es]
103 | return json.dumps(data, indent=indent)
104 |
105 |
106 | def decode(s):
107 | """
108 | Deserialize a EDS object from a EDS-JSON string.
109 | """
110 | return from_dict(json.loads(s))
111 |
112 |
113 | def encode(eds, properties=True, lnk=True, indent=False):
114 | """
115 | Serialize a EDS object to a EDS-JSON string.
116 |
117 | Args:
118 | e: a EDS object
119 | properties (bool): if `False`, suppress variable properties
120 | lnk: if `False`, suppress surface alignments and strings
121 | indent (bool, int): if `True` or an integer value, add
122 | newlines and indentation
123 | Returns:
124 | a EDS-JSON-serialization of the EDS object
125 | """
126 | if indent is False:
127 | indent = None
128 | elif indent is True:
129 | indent = 2
130 | d = to_dict(eds, properties=properties, lnk=lnk)
131 | return json.dumps(d, indent=indent)
132 |
133 |
134 | def to_dict(eds, properties=True, lnk=True):
135 | """
136 | Encode the EDS as a dictionary suitable for JSON serialization.
137 | """
138 | nodes = {}
139 | for node in eds.nodes:
140 | nd = {
141 | 'label': node.predicate,
142 | 'edges': node.edges
143 | }
144 | if lnk and node.lnk is not None:
145 | nd['lnk'] = {'from': node.cfrom, 'to': node.cto}
146 | if node.type is not None:
147 | nd['type'] = node.type
148 | if properties:
149 | props = node.properties
150 | if props:
151 | nd['properties'] = props
152 | if node.carg is not None:
153 | nd['carg'] = node.carg
154 | nodes[node.id] = nd
155 | return {'top': eds.top, 'nodes': nodes}
156 |
157 |
158 | def from_dict(d):
159 | """
160 | Decode a dictionary, as from :func:`to_dict`, into an EDS object.
161 | """
162 | top = d.get('top')
163 | nodes = []
164 | for nodeid, node in d.get('nodes', {}).items():
165 | props = node.get('properties', None)
166 | nodetype = node.get('type')
167 | lnk = None
168 | if 'lnk' in node:
169 | lnk = Lnk.charspan(node['lnk']['from'], node['lnk']['to'])
170 | nodes.append(
171 | Node(id=nodeid,
172 | predicate=node['label'],
173 | type=nodetype,
174 | edges=node.get('edges', {}),
175 | properties=props,
176 | carg=node.get('carg'),
177 | lnk=lnk))
178 | nodes.sort(key=lambda n: (n.cfrom, -n.cto))
179 | return EDS(top, nodes=nodes)
180 |
--------------------------------------------------------------------------------