├── VERSION
├── MANIFEST.in
├── docs
    ├── replacy_ex.png
    └── replacy_logo.png
├── replacy
    ├── version.py
    ├── resources
    │   ├── forms_lookup.json
    │   ├── test.arpa
    │   ├── match_dict_schema.json
    │   ├── match_dict.json
    │   └── patterns_test_data.json
    ├── filter_spans_by_cat.py
    ├── ref_matcher.py
    ├── default_scorer.py
    ├── suggestion_joiner.py
    ├── filter_0distance.py
    ├── test_helper.py
    ├── db.py
    ├── scorer.py
    ├── inflector.py
    ├── util.py
    ├── suggestion.py
    ├── __init__.py
    └── default_match_hooks.py
├── pytest.ini
├── tests
    ├── test_replacy.py
    ├── resources_test.py
    ├── test_hooks.py
    ├── test_scorer.py
    ├── test_multiple_whitespaces.py
    ├── test_ref_matcher.py
    ├── test_custom_props.py
    ├── test_pipeline.py
    ├── test_suggestions.py
    ├── test_inflector.py
    └── test_max_count.py
├── test.py
├── .github
    ├── workflows
    │   ├── pub.yaml
    │   └── main.yml
    └── pull_request_template.md
├── pyproject.toml
├── LICENSE.md
├── CHANGELOG.md
├── .gitignore
├── setup.py
├── README.md
└── poetry.lock


/VERSION:
--------------------------------------------------------------------------------
1 | 3.1.0
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include VERSION
2 | recursive-include replacy/resources *


--------------------------------------------------------------------------------
/docs/replacy_ex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writer/replaCy/HEAD/docs/replacy_ex.png


--------------------------------------------------------------------------------
/docs/replacy_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/writer/replaCy/HEAD/docs/replacy_logo.png


--------------------------------------------------------------------------------
/replacy/version.py:
--------------------------------------------------------------------------------
1 | # CHANGES HERE HAVE NO EFFECT: ../VERSION is the source of truth
2 | __version__ = "3.1.0"


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | filterwarnings =
3 |     error
4 |     ignore::DeprecationWarning
5 |     ignore::ImportWarning


--------------------------------------------------------------------------------
/tests/test_replacy.py:
--------------------------------------------------------------------------------
1 | from replacy.test_helper import MatchDictTestHelper
2 | 
3 | if __name__ == '__main__':
4 |     test = MatchDictTestHelper()
5 |     test.run()
6 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | """
2 | Because of the automatic file discovery that happens in QAI,
3 | PYTHONPATH is wrong if you `python -m pytest`
4 | so run `python test.py` with test.py in the root and then it works
5 | """
6 | 
7 | import pytest
8 | 
9 | pytest.main()


--------------------------------------------------------------------------------
/replacy/resources/forms_lookup.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "exact": {
 3 |             "VB": "exact",
 4 |             "VBP": "exact",
 5 |             "VBD": "exacted",
 6 |             "VBN": "exacted",
 7 |             "VBG": "exacting",
 8 |             "VBZ": "exacts"
 9 |         }
10 | }


--------------------------------------------------------------------------------
/.github/workflows/pub.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPi
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 | jobs:
 7 |   publish:
 8 |     name: Build and publish to PyPi
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |     - uses: actions/checkout@v2
12 |     - name: Build and publish to pypi
13 |       uses: JRubics/poetry-publish@v1.9
14 |       with:
15 |         python_version: "3.8"
16 |         pypi_token: ${{ secrets.PYPI_TOKEN }}
17 | 


--------------------------------------------------------------------------------
/tests/resources_test.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from replacy import ReplaceMatcher
 3 | from replacy.db import get_match_dict
 4 | 
 5 | with open("replacy/resources/match_dict.json", "r") as f:
 6 |     rules = json.load(f)
 7 | 
 8 | 
 9 | def test_file_exists():
10 |     assert rules is not None
11 | 
12 | 
13 | # spacy 3 requires a new schema
14 | # def test_valid_format():
15 | #     match_dict = get_match_dict()
16 | #     ReplaceMatcher.validate_match_dict(match_dict)
17 | 


--------------------------------------------------------------------------------
/replacy/filter_spans_by_cat.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from replacy import ESpan
 4 | from spacy.util import filter_spans
 5 | 
 6 | 
 7 | def filter_spans_by_cat(spans: List[ESpan]) -> List[ESpan]:
 8 |     if len(spans):
 9 |         subcats = set(map(lambda c: c.subcategory, spans))
10 |         grouped_spans = [[y for y in spans if y.subcategory == c] for c in subcats]
11 |         filtered_spans = []
12 |         for group in grouped_spans:
13 |             filtered_spans += filter_spans(group)
14 |         return filtered_spans
15 |     return spans
16 | 


--------------------------------------------------------------------------------
/replacy/ref_matcher.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | from spacy.matcher import Matcher
 4 | 
 5 | 
 6 | class RefMatcher:
 7 |     def __call__(self, span, orig_pattern, alignments):
 8 |         # not all parameters are needed, adding it to have same signature as RefMatcher
 9 |         pattern_indexes = set(alignments)
10 |         return {
11 |             pattern_idx: [
12 |                 span_token_idx
13 |                 for span_token_idx, pattern_index in enumerate(alignments)
14 |                 if pattern_index == pattern_idx
15 |             ]
16 |             for pattern_idx in pattern_indexes
17 |         }
18 | 


--------------------------------------------------------------------------------
/replacy/default_scorer.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from spacy.tokens import Span
 4 | 
 5 | 
 6 | class Scorer:
 7 |     def __init__(self):
 8 |         pass
 9 | 
10 |     def __call__(self, text):
11 |         """Please override this"""
12 |         return 0.5
13 | 
14 |     def score_suggestion(self, doc, span, suggestion):
15 |         """Please override this"""
16 |         text = " ".join([doc[: span.start].text] + suggestion + [doc[span.end :].text])
17 |         return self(text)
18 | 
19 |     def sort_suggestions(self, spans: List[Span]) -> List[Span]:
20 |         """Please override this"""
21 |         return spans
22 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "replaCy"
 3 | version = "3.4.0"
 4 | description = "ReplaCy = spaCy Matcher + pyInflect. Create rules, correct sentences."
 5 | authors = [
 6 |     "melisa-writer <melisa@writer.com>",
 7 |     "sam-writer <sam.havens@writer.com>",
 8 |     "manhal-daaboul <manhal@writer.com>"
 9 | ]
10 | readme = "README.md"
11 | license = "MIT"
12 | 
13 | [tool.poetry.dependencies]
14 | python = "^3.6"
15 | jsonschema = "^2.6.0"
16 | lemminflect = "0.2.1"
17 | pyfunctional = "^1.2.0"
18 | 
19 | [tool.poetry.dev-dependencies]
20 | pytest = "^5.3.2"
21 | spacy=  "^3.0.6"
22 | en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz" }
23 | kenlm = { git = "https://github.com/kpu/kenlm", rev = "master" }


--------------------------------------------------------------------------------
/replacy/suggestion_joiner.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from spacy.tokens import Span
 4 | 
 5 | 
 6 | def join_suggestions(spans: List[Span]) -> List[Span]:
 7 |     for span in spans:
 8 |         suggestions_separator = span.suggestions_separator if span.has_extension('suggestions_separator') else " "
 9 |         suggestions: List[str] = []
10 |         for s in span._.suggestions:
11 |             # in case of two exactly overlapping spans
12 |             # some of suggestions could be already processed
13 |             # this could cause problems
14 |             # this should be handled by early span filtering
15 |             try:
16 |                 suggestions += [suggestions_separator.join([t.text for t in s])]
17 |             except AttributeError:
18 |                 suggestions.append(s)
19 | 
20 |         span._.suggestions = suggestions
21 |     return spans
22 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: Build and test
 2 | on: push
 3 | jobs:
 4 |   lint:
 5 |     name: Lint with Black
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |       - uses: "lgeiger/black-action@master"
 9 |         with:
10 |           args: ". --check"
11 |   pytest:
12 |     name: pytest
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         python-version: [3.8]
17 |     steps:
18 |       - uses: actions/checkout@master
19 |       - name: Set up Python ${{ matrix.python-version }}
20 |         uses: abatilo/actions-poetry@v1.5.0
21 |         with:
22 |           python_version: ${{ matrix.python-version }}
23 |           poetry_version: 1.1.8
24 |           args: install
25 |       - name: Run pytest
26 |         uses: abatilo/actions-poetry@v1.5.0
27 |         with:
28 |           python_version: ${{ matrix.python-version }}
29 |           poetry_version: 1.1.8
30 |           args: run python -m pytest


--------------------------------------------------------------------------------
/tests/test_hooks.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import spacy
 3 | 
 4 | from replacy import default_match_hooks
 5 | from replacy.db import get_patterns_test_data
 6 | 
 7 | nlp = spacy.load("en_core_web_sm")
 8 | 
 9 | examples_list = get_patterns_test_data()
10 | 
11 | 
12 | @pytest.mark.parametrize("example", examples_list)
13 | def test_custom_patterns(example):
14 | 
15 |     hook_name = example["hook_name"]
16 | 
17 |     if example.get("args", False):
18 |         hook = getattr(default_match_hooks, hook_name)(example["args"])
19 |     elif example.get("kwargs", False):
20 |         hook = getattr(default_match_hooks, hook_name)(**example["kwargs"])
21 |     else:
22 |         hook = getattr(default_match_hooks, hook_name)()
23 | 
24 |     doc = nlp(example["text"])
25 |     start = example["start"]
26 |     end = example["end"]
27 | 
28 |     assert hook(doc, start, end) == example["result"], f"{hook_name} should work" + str(example["result"])
29 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | # PR for replaCy
 2 | 
 3 | ## PR Author
 4 | 
 5 | ### Type of change
 6 | 
 7 | <!--- What types of changes does your code introduce? Replace ` ` (space) with an `x` in all the boxes that apply: -->
 8 | - [ ] Bug fix (non-breaking change which fixes an issue)
 9 | - [ ] New feature (non-breaking change which adds functionality)
10 | - [ ] Breaking change (fix or feature that would cause existing functionality to change)
11 | 
12 | ### Reminders
13 | 
14 | - [ ] I incremented the version appropriately (now in `pyproject.toml`).
15 | - [ ] I added tests to cover my changes.
16 | - [ ] I tested my changes with a replaCy-based service to confirm my changes don't break it.
17 | - [ ] If my changes require documentation updates, I updated the documentation
18 | 
19 | ----
20 | 
21 | ## PR Reviewer
22 | 
23 | Confirm that they indeed did everything above in the `reminders` section! Especially important is that they checked this with downstream services and updated the documentation.
24 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2020 Qordoba, Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/replacy/filter_0distance.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from replacy import ESpan
 4 | 
 5 | 
 6 | def filter_0distance(spans: List[ESpan]) -> List[ESpan]:
 7 |     filtered_spans = []
 8 |     for span in spans:
 9 |         if len(span.suggestions):
10 |             suggestions = []
11 |             for suggestion in span.suggestions:
12 |                 if (span.doc[span.start:span.end].text) == suggestion:
13 |                     continue
14 |                 suggestions.append(suggestion)
15 | 
16 |             if len(suggestions):
17 |                 span.suggestions = suggestions
18 |                 filtered_spans.append(span)
19 |         else:
20 |             filtered_spans.append(span)
21 |     return filtered_spans
22 | 
23 | 
24 | def filter_0distance_with_line_break(spans: List[ESpan]) -> List[ESpan]:
25 |     filtered_spans = []
26 |     for span in spans:
27 |         if len(span.suggestions):
28 |             span_text = span.doc[span.start:span.end].text.rstrip(" \r\n")
29 |             suggestions = []
30 |             for suggestion in span.suggestions:
31 |                 if span_text == suggestion:
32 |                     continue
33 |                 suggestions.append(suggestion)
34 | 
35 |             if len(suggestions):
36 |                 span.suggestions = suggestions
37 |                 filtered_spans.append(span)
38 |         else:
39 |             filtered_spans.append(span)
40 |     return filtered_spans


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (sort of. It's early days, and there may be some breaking changes released under a minor version increment).
 7 | 
 8 | ## [0.35.0] - 2020-07-02
 9 | 
10 | ### Fixed
11 | 
12 | - Only import kenlm if asked 
13 | 
14 | ## [0.31.0] - 2020-06-23
15 | 
16 | - Oops forgot to update this for quite a while. See the README for these changes. Will try to add this updating to the CI/CD... one day.
17 | 
18 | ## [0.5.0] - 2020-01-02
19 | 
20 | ### Changed
21 | 
22 | - updated `requirements-dev.txt` to have all needed requirements for development
23 | 
24 | - `replacy/db.py:get_forms_lookup` and `replacy/db.py:get_match_dict` now each accept one parameter - the path to the resource they will load. The default value of this parameter is the value that was previously hardcoded.
25 | 
26 | - `replacy/__init__.py:ReplaceMatcher.__init__` now does not require a `match_dict` to be passed in as the second parameter. If no `match_dict` is passed, it will load one by calling `replacy/db.py:get_match_dict()` (with no parameter, so it will look in the default location).
27 | 
28 | ## [0.4.0] - 2019-12-UNK
29 | 
30 | ### UNK
31 | 
32 | ## [0.1.0 - 0.3.0] - 2019-12-18
33 | 
34 | ### First
35 | 
36 | - first pypi release
37 | 


--------------------------------------------------------------------------------
/tests/test_scorer.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import spacy
 3 | 
 4 | from replacy import ReplaceMatcher
 5 | from replacy.db import get_match_dict
 6 | 
 7 | nlp = spacy.load("en_core_web_sm")
 8 | lm_path = "replacy/resources/test.arpa"
 9 | 
10 | match_dict = get_match_dict()
11 | r_matcher = ReplaceMatcher(nlp, match_dict, lm_path=lm_path)
12 | 
13 | dumb_matcher = ReplaceMatcher(nlp, match_dict, lm_path=None)
14 | 
15 | test_examples = [
16 |     {
17 |         "sent": "This x a sentence.",
18 |         "span_start": 1,
19 |         "span_end": 2,
20 |         "suggestions": ["are", "were", "is"],
21 |         "best_suggestion": "is",
22 |     },
23 |     {
24 |         "sent": "This is x sentence.",
25 |         "span_start": 2,
26 |         "span_end": 3,
27 |         "suggestions": ["two", "a", "cat"],
28 |         "best_suggestion": "a",
29 |     },
30 |     {
31 |         "sent": "This is a sentences.",
32 |         "span_start": 3,
33 |         "span_end": 4,
34 |         "suggestions": ["sentence", "sentences", "dogs"],
35 |         "best_suggestion": "sentence",
36 |     },
37 | ]
38 | 
39 | 
40 | @pytest.mark.parametrize("example", test_examples)
41 | def test_scorer(example):
42 |     doc = nlp(example["sent"])
43 |     span = doc[example["span_start"] : example["span_end"]]
44 |     span._.suggestions = example["suggestions"]
45 | 
46 |     sorted_suggestions = sorted(
47 |         span._.suggestions,
48 |         key=lambda x: r_matcher.scorer.score_suggestion(doc, span, [x]),
49 |     )
50 |     best_suggestion = sorted_suggestions[0]
51 |     assert example["best_suggestion"] == best_suggestion
52 | 


--------------------------------------------------------------------------------
/tests/test_multiple_whitespaces.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import spacy
 3 | from replacy import ReplaceMatcher
 4 | 
 5 | nlp = spacy.load("en_core_web_sm")
 6 | 
 7 | # minimal match dict with many whitespaces
 8 | match_dict = {
 9 |     "extract-revenge": {
10 |         "patterns": [[{"LEMMA": "extract", "TEMPLATE_ID": 1}]],
11 |         "suggestions": [[{"TEXT": "exact", "FROM_TEMPLATE_ID": 1}]],
12 |         "match_hook": [
13 |             {
14 |                 "name": "succeeded_by_phrase",
15 |                 "args": "revenge",
16 |                 "match_if_predicate_is": True,
17 |             }
18 |         ],
19 |         "test": {
20 |             "positive": [
21 |                 "And at the same time extract revenge on those he so despises?",  # 0
22 |                 "Watch as Tampa Bay extracts  revenge against his former Los Angeles Rams team.",  # 1
23 |                 "In fact, the farmer was so mean to this young man he determined to extract   revenge.",  # 2
24 |                 "And at the same time extract          revenge on the whites he so despises?",  # 10 sic
25 |             ],
26 |             "negative": ["Mother flavours her custards with lemon extract."],
27 |         },
28 |     }
29 | }
30 | 
31 | r_matcher = ReplaceMatcher(nlp, match_dict, allow_multiple_whitespaces=True)
32 | 
33 | 
34 | def test_multiple_whites():
35 |     sents = match_dict["extract-revenge"]["test"]["positive"]
36 |     for sent in sents:
37 |         assert len(r_matcher(sent)), "Should correct with multiple whitespaces"
38 | 
39 |         suggestion = r_matcher(sent)[0].text.strip()
40 |         assert "extract" in suggestion, "Should correct with multiple whitespaces"
41 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea
  2 | .vscode
  3 | .DS_Store
  4 | 
  5 | # Checkpoints and models:
  6 | *.pt
  7 | *.bin
  8 | 
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | conf/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | 
 59 | # Translations
 60 | *.mo
 61 | *.pot
 62 | 
 63 | # Django stuff:
 64 | *.log
 65 | local_settings.py
 66 | db.sqlite3
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # SageMath parsed files
 91 | *.sage.py
 92 | 
 93 | # Environments
 94 | .env
 95 | .venv
 96 | env/
 97 | venv/
 98 | ENV/
 99 | env.bak/
100 | venv.bak/
101 | 
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 | 
106 | # Rope project settings
107 | .ropeproject
108 | 
109 | # mkdocs documentation
110 | /site
111 | 
112 | # mypy
113 | .mypy_cache/
114 | 


--------------------------------------------------------------------------------
/replacy/test_helper.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from typing import Any, Dict, List, Tuple
 3 | 
 4 | import spacy
 5 | 
 6 | from replacy import ReplaceMatcher
 7 | from replacy.db import get_match_dict
 8 | 
 9 | 
10 | class MatchDictTestHelper(unittest.TestCase):
11 | 
12 |     @staticmethod
13 |     def generate_cases(match_dict: Dict[str, Any]) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
14 |         positives: List[Tuple[str, str]] = []
15 |         negatives: List[Tuple[str, str]] = []
16 |         for rule_name in match_dict:
17 |             test_set = match_dict[rule_name]["test"]
18 |             positive_cases = test_set["positive"]
19 |             negative_cases = test_set["negative"]
20 |             for positive_sent in positive_cases:
21 |                 positives.append((rule_name, positive_sent))
22 |             for negative_sent in negative_cases:
23 |                 negatives.append((rule_name, negative_sent))
24 |         return positives, negatives
25 | 
26 |     @classmethod
27 |     def setUpClass(cls):
28 |         nlp = spacy.load("en_core_web_sm")
29 |         match_dict = get_match_dict()
30 |         cls.r_matcher = ReplaceMatcher(nlp, match_dict)
31 |         cls.positive_cases, cls.negative_cases = MatchDictTestHelper.generate_cases(match_dict)
32 | 
33 |     def test_positive(self):
34 |         for (match_name, positive_sent) in self.positive_cases:
35 |             spans = self.r_matcher(positive_sent)
36 |             spans_from_this_rule = list(filter(lambda s: s._.match_name == match_name, spans))
37 |             print(match_name, positive_sent)
38 |             assert len(spans_from_this_rule) > 0, "Positive case should trigger rule"
39 | 
40 |     def test_negative(self):
41 |         for (match_name, negative_sent) in self.negative_cases:
42 |             spans = self.r_matcher(negative_sent)
43 |             spans_from_this_rule = list(filter(lambda s: s._.match_name == match_name, spans))
44 |             print(match_name, negative_sent)
45 |             assert len(spans_from_this_rule) == 0, "Negative case should NOT trigger rule"
46 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List
 3 | 
 4 | from setuptools import setup, find_packages
 5 | from setuptools.command.install import install
 6 | 
 7 | here = os.path.abspath(os.path.dirname(__file__))
 8 | 
 9 | with open(os.path.join(here, "README.md"), encoding="utf-8") as f:
10 |     long_description = f.read()
11 | 
12 | with open(os.path.join(here, "VERSION"), encoding="utf-8") as f:
13 |     __version__ = f.read().strip()
14 |     with open(os.path.join(here, "replacy", "version.py"), "w+", encoding="utf-8") as v:
15 |         v.write("# CHANGES HERE HAVE NO EFFECT: ../VERSION is the source of truth\n")
16 |         v.write(f'__version__ = "{__version__}"')
17 | """
18 | requirementPath = os.path.abspath("./requirements.txt")
19 | install_requires: List[str] = []
20 | if os.path.isfile(requirementPath):
21 |     with open(requirementPath) as f:
22 |         install_requires = f.read().splitlines()
23 | """
24 | setup(
25 |     name="replacy",
26 |     description="ReplaCy = spaCy Matcher + pyInflect. Create rules, correct sentences.",
27 |     packages=find_packages(),
28 |     package_data={"replacy": ["resources/*"]},
29 |     include_package_data=True,
30 |     author="Qordoba",
31 |     author_email="Sam Havens <sam.havens@qordoba.com>, Melisa Stal <melisa@qordoba.com>",
32 |     url="https://github.com/Qordobacode/replaCy",
33 |     version=__version__,
34 |     license="MIT",
35 |     long_description=long_description,
36 |     long_description_content_type="text/markdown",
37 |     install_requires=["pyfunctional>=1.2.0", "jsonschema>=2.6.0", "lemminflect==0.2.1"],
38 |     python_requires=">=3.5",
39 |     classifiers=[
40 |         "Development Status :: 3 - Alpha",
41 |         "Intended Audience :: Developers",
42 |         "Intended Audience :: Science/Research",
43 |         "License :: OSI Approved :: MIT License",
44 |         "Natural Language :: English",
45 |         "Programming Language :: Python :: 3.5",
46 |         "Programming Language :: Python :: 3.6",
47 |         "Programming Language :: Python :: 3.7",
48 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
49 |         "Topic :: Text Processing :: Linguistic",
50 |         "Typing :: Typed",
51 |     ],
52 | )
53 | 


--------------------------------------------------------------------------------
/replacy/db.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from typing import Any, Dict, List, Union
 4 | 
 5 | here = os.path.abspath(os.path.dirname(__file__))
 6 | 
 7 | 
 8 | def _load_list(paths: List[str]) -> dict:
 9 |     content: Dict[str, Any] = {}
10 |     for p in paths:
11 |         with open(p) as h:
12 |             t = json.load(h)
13 |             content.update(t)
14 |     return content
15 | 
16 | 
17 | def load_json(path_or_dir: Union[str, List[str]]) -> dict:
18 |     path_error = (
19 |         "replacy.db.load_json expects a valid path to a json file, "
20 |         "a list of (valid) paths to json files, "
21 |         "or the (valid) path to a directory with json files"
22 |         f", but received {path_or_dir}"
23 |     )
24 |     if type(path_or_dir) == str:
25 |         json_path = str(path_or_dir)  # make mypy happy
26 |         if (
27 |             os.path.exists(json_path)
28 |             and os.path.isfile(json_path)
29 |             and json_path[-5:] == ".json"
30 |         ):
31 |             with open(json_path) as h:
32 |                 content = json.load(h)
33 |         elif os.path.isdir(json_path):
34 |             paths = [
35 |                 os.path.join(json_path, f)
36 |                 for f in os.listdir(json_path)
37 |                 if f.endswith(".json")
38 |             ]
39 |             content = _load_list(paths)
40 |         else:
41 |             raise ValueError(path_error)
42 |     elif type(path_or_dir) == list:
43 |         paths = list(path_or_dir)  # for mypy
44 |         content = _load_list(paths)
45 |     else:
46 |         raise TypeError(path_error)
47 |     return content
48 | 
49 | 
50 | def get_forms_lookup(forms_path="resources/forms_lookup.json"):
51 |     matches_path = os.path.join(here, forms_path)
52 |     return load_json(matches_path)
53 | 
54 | 
55 | def get_match_dict(match_path="resources/match_dict.json"):
56 |     matches_path = os.path.join(here, match_path)
57 |     return load_json(matches_path)
58 | 
59 | 
60 | def get_match_dict_schema(schema_path="resources/match_dict_schema.json"):
61 |     full_schema_path = os.path.join(here, schema_path)
62 |     return load_json(full_schema_path)
63 | 
64 | 
65 | def get_patterns_test_data(data_path="resources/patterns_test_data.json"):
66 |     test_data_path = os.path.join(here, data_path)
67 |     return load_json(test_data_path)
68 | 
69 | 
70 | def load_lm(model_path):
71 |     import kenlm
72 |     return kenlm.Model(model_path)
73 | 


--------------------------------------------------------------------------------
/tests/test_ref_matcher.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import spacy
 3 | 
 4 | from replacy import ReplaceMatcher
 5 | 
 6 | nlp = spacy.load("en_core_web_sm")
 7 | 
 8 | match_dict = {
 9 |     "match-1": {
10 |         "patterns": [[
11 |             {"POS": {"NOT_IN": ["ADJ"]}, "OP": "*"},
12 |             {"POS": "ADJ", "OP": "*"},
13 |             {"POS": "NOUN"},
14 |             {"LEMMA": "be", "TEMPLATE_ID": 1},
15 |             {"LEMMA": "deliver"},
16 |             {"IS_PUNCT": False, "OP": "*"},
17 |             {"IS_PUNCT": True},
18 |         ]],
19 |         "suggestions": [
20 |             [
21 |                 {"TEXT": "A"},
22 |                 {"TEXT": "delivery"},
23 |                 {"TEXT": "of"},
24 |                 {"PATTERN_REF": 1},
25 |                 {"PATTERN_REF": 2},
26 |                 {"TEXT": "be", "FROM_TEMPLATE_ID": 1},
27 |                 {"TEXT": "made"},
28 |                 {"PATTERN_REF": -2},
29 |                 {"PATTERN_REF": -1},
30 |             ]
31 |         ],
32 |         "test": {"positive": [], "negative": []},
33 |     },
34 |     "match-2": {
35 |         "patterns": [[
36 |             {"TEXT": "I"},
37 |             {"POS": "VERB",},
38 |             {"POS": "DET", "OP": "?"},
39 |             {"TEXT": "dog"},
40 |             {"POS": "DET"},
41 |             {"POS": "ADJ", "OP": "*"},
42 |             {"POS": "NOUN"},
43 |         ]],
44 |         "suggestions": [
45 |             [
46 |                 {"PATTERN_REF": 0},
47 |                 {"PATTERN_REF": 1},
48 |                 {"PATTERN_REF": 4},
49 |                 {"PATTERN_REF": 5},
50 |                 {"PATTERN_REF": 6},
51 |                 {"TEXT": "to"},
52 |                 {"PATTERN_REF": 2},
53 |                 {"PATTERN_REF": 3},
54 |             ]
55 |         ],
56 |         "test": {"positive": [], "negative": []},
57 |     },
58 | }
59 | 
60 | r_matcher = ReplaceMatcher(nlp, match_dict)
61 | 
62 | sents = [
63 |     "The fresh juicy sandwiches were delivered to everyone at the shop before lunchtime.",
64 |     "Looks like I fed the dog some popcorn.",
65 | ]
66 | 
67 | suggestions = [
68 |     "A delivery of fresh juicy sandwiches was made to everyone at the shop before lunchtime .",
69 |     "I fed some popcorn to the dog",
70 | ]
71 | 
72 | 
73 | def test_refs():
74 |     for sent, sugg in zip(sents, suggestions):
75 |         span = r_matcher(sent)
76 |         print(span[0])
77 |         print(span[0]._.suggestions[0])
78 |         assert span[0]._.suggestions[0] == sugg
79 | 


--------------------------------------------------------------------------------
/tests/test_custom_props.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import pytest
 4 | import spacy
 5 | from replacy import ReplaceMatcher
 6 | from replacy.db import get_match_dict
 7 | 
 8 | nlp = spacy.load("en_core_web_sm")
 9 | 
10 | with open("replacy/resources/match_dict.json", "r") as md:
11 |     match_dict = json.load(md)
12 |     r_matcher = ReplaceMatcher(nlp, match_dict)
13 | 
14 | r_matcher.match_dict.update(
15 |     {
16 |         "sometest": {
17 |             "patterns": [[{"LOWER": "sometest"}]],
18 |             "suggestions": [[{"TEXT": "this part isn't the point"}]],
19 |             "test": {"positive": ["positive test"], "negative": ["negative test"]},
20 |             "comment": "this is an example comment",
21 |             "description": 'The expression is "make do".',
22 |             "category": "R:VERB",
23 |             "yo": "yoyo",
24 |             "whoa": ["it's", "a", "list"],
25 |             "damn": {"a dict": "too?"},
26 |             "nice": 420,
27 |             "also_nice": 42.0,
28 |             "meh": True,
29 |         }
30 |     }
31 | )
32 | new_matcher = ReplaceMatcher(nlp, r_matcher.match_dict)
33 | # This matches the new entry above
34 | matched_span = new_matcher("sometest")[0]
35 | 
36 | # This matches a "normal" replaCy match example, so uses defaults
37 | no_match_span = new_matcher("I will extract revenge")[0]
38 | 
39 | 
40 | def test_custom_properties_string():
41 |     assert no_match_span._.yo == "", "automatically infers string types"
42 |     assert matched_span._.yo == "yoyo", "picks up custom string types"
43 | 
44 | 
45 | def test_custom_properties_list():
46 |     assert no_match_span._.whoa == [], "automatically infers list types"
47 |     assert matched_span._.whoa == ["it's", "a", "list"], "picks up custom list types"
48 | 
49 | 
50 | def test_custom_properties_dict():
51 |     assert no_match_span._.damn == {}, "automatically infers dict types"
52 |     assert matched_span._.damn == {"a dict": "too?"}, "picks up custom dict types"
53 | 
54 | 
55 | def test_custom_properties_int():
56 |     assert no_match_span._.nice == 0, "automatically infers int types"
57 |     assert matched_span._.nice == 420, "picks up custom int types"
58 | 
59 | 
60 | def test_custom_properties_float():
61 |     assert no_match_span._.also_nice == 0.0, "automatically infers float types"
62 |     assert matched_span._.also_nice == 42.0, "picks up custom float types"
63 | 
64 | 
65 | def test_custom_properties_bool():
66 |     assert no_match_span._.meh == False, "automatically infers bool types"
67 |     assert matched_span._.meh == True, "picks up custom bool types"
68 | 


--------------------------------------------------------------------------------
/tests/test_pipeline.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import pytest
 4 | import spacy
 5 | from spacy.tokens import Span
 6 | from spacy.util import filter_spans
 7 | 
 8 | from replacy import ReplaceMatcher
 9 | from replacy.suggestion import Suggestion
10 | 
11 | nlp = spacy.load("en_core_web_sm")
12 | 
13 | match_dict = {
14 |     "hyuck": {
15 |         "patterns": [[{"LOWER": "hyuck"}]],
16 |         "suggestions": [[{"TEXT": "ha"}]],
17 |         "test": {"positive": [], "negative": []},
18 |     },
19 |     "hyuck-hyuck": {
20 |         "patterns": [[{"LOWER": "hyuck"}, {"LOWER": "hyuck"}]],
21 |         "suggestions": [[{"TEXT": "haha"}]],
22 |         "test": {"positive": [], "negative": []},
23 |     },
24 | }
25 | 
26 | 
27 | def test_default_pipe():
28 |     replaCy = ReplaceMatcher(nlp, match_dict)
29 |     assert replaCy.pipe_names == ["sorter", "filter", "joiner"]
30 | 
31 | 
32 | class NewComponent:
33 |     gibberish = "jknasdkjna"
34 | 
35 |     def __init__(self, name="garbler"):
36 |         self.name = name
37 | 
38 |     def __call__(self, spans: List[Span]):
39 |         for s in spans:
40 |             s._.suggestions = [[Suggestion(text=self.gibberish, max_count=1, id=69)]]
41 |         return spans
42 | 
43 | 
44 | garbler = NewComponent()
45 | 
46 | 
47 | def test_add_pipe_first():
48 |     replaCy = ReplaceMatcher(nlp, match_dict)
49 |     replaCy.add_pipe(garbler, first=True)
50 |     assert replaCy.pipe_names == ["garbler", "sorter", "filter", "joiner"]
51 | 
52 | 
53 | def test_add_pipe_last():
54 |     replaCy = ReplaceMatcher(nlp, match_dict)
55 |     replaCy.add_pipe(garbler, last=True)
56 |     assert replaCy.pipe_names == ["sorter", "filter", "joiner", "garbler"]
57 | 
58 | 
59 | def test_add_pipe_before():
60 |     replaCy = ReplaceMatcher(nlp, match_dict)
61 |     replaCy.add_pipe(garbler, before="joiner")
62 |     assert replaCy.pipe_names == ["sorter", "filter", "garbler", "joiner"]
63 | 
64 | 
65 | def test_add_pipe_after():
66 |     replaCy = ReplaceMatcher(nlp, match_dict)
67 |     replaCy.add_pipe(garbler, after="filter")
68 |     assert replaCy.pipe_names == ["sorter", "filter", "garbler", "joiner"]
69 | 
70 | 
71 | def test_component_added_after_filter_is_called():
72 |     replaCy = ReplaceMatcher(nlp, match_dict)
73 |     replaCy.add_pipe(garbler, after="filter")
74 |     spans = replaCy("hyuck, that's funny")
75 |     assert spans[0]._.suggestions[0] == NewComponent.gibberish
76 | 
77 | 
78 | def test_span_filter_component():
79 |     replaCy = ReplaceMatcher(nlp, match_dict)
80 |     spans = replaCy("hyuck hyuck")
81 |     assert (
82 |         len(spans) == 3
83 |     ), "without span overlap filtering there are three spans (one for each hyuck, and one for both)"
84 |     replaCy.add_pipe(filter_spans, before="joiner")
85 |     spans = replaCy("hyuck hyuck")
86 |     assert len(spans) == 1, "with span overlap filtering there is only one span"
87 | 


--------------------------------------------------------------------------------
/replacy/resources/test.arpa:
--------------------------------------------------------------------------------
  1 | 
  2 | \data\
  3 | ngram 1=37
  4 | ngram 2=47
  5 | ngram 3=11
  6 | ngram 4=6
  7 | ngram 5=4
  8 | 
  9 | \1-grams:
 10 | -1.383514	,	-0.30103
 11 | -1.139057	.	-0.845098
 12 | -1.029493	</s>
 13 | -99	<s>	-0.4149733
 14 | -1.995635	<unk>	-20
 15 | -1.285941	a	-0.69897
 16 | -1.687872	also	-0.30103
 17 | -1.687872	beyond	-0.30103
 18 | -1.687872	biarritz	-0.30103
 19 | -1.687872	call	-0.30103
 20 | -1.687872	concerns	-0.30103
 21 | -1.687872	consider	-0.30103
 22 | -1.687872	considering	-0.30103
 23 | -1.687872	for	-0.30103
 24 | -1.509559	higher	-0.30103
 25 | -1.687872	however	-0.30103
 26 | -1.687872	i	-0.30103
 27 | -1.687872	immediate	-0.30103
 28 | -1.687872	in	-0.30103
 29 | -1.687872	is	-0.30103
 30 | -1.285941	little	-0.69897
 31 | -1.383514	loin	-0.30103
 32 | -1.687872	look	-0.30103
 33 | -1.285941	looking	-0.4771212
 34 | -1.206319	more	-0.544068
 35 | -1.509559	on	-0.4771212
 36 | -1.509559	screening	-0.4771212
 37 | -1.687872	small	-0.30103
 38 | -1.687872	the	-0.30103
 39 | -1.687872	to	-0.30103
 40 | -1.687872	watch	-0.30103
 41 | -1.687872	watching	-0.30103
 42 | -1.687872	what	-0.30103
 43 | -1.687872	would	-0.30103
 44 | -3.141592	foo
 45 | -2.718281	bar	3.0
 46 | -6.535897	baz	-0.0
 47 | 
 48 | \2-grams:
 49 | -0.6925742	, .
 50 | -0.7522095	, however
 51 | -0.7522095	, is
 52 | -0.0602359	. </s>
 53 | -0.4846522	<s> looking	-0.4771214
 54 | -1.051485	<s> screening
 55 | -1.07153	<s> the
 56 | -1.07153	<s> watching
 57 | -1.07153	<s> what
 58 | -0.09132547	a little	-0.69897
 59 | -0.2922095	also call
 60 | -0.2922095	beyond immediate
 61 | -0.2705918	biarritz .
 62 | -0.2922095	call for
 63 | -0.2922095	concerns in
 64 | -0.2922095	consider watch
 65 | -0.2922095	considering consider
 66 | -0.2834328	for ,
 67 | -0.5511513	higher more
 68 | -0.5845945	higher small
 69 | -0.2834328	however ,
 70 | -0.2922095	i would
 71 | -0.2922095	immediate concerns
 72 | -0.2922095	in biarritz
 73 | -0.2922095	is to
 74 | -0.09021038	little more	-0.1998621
 75 | -0.7273645	loin ,
 76 | -0.6925742	loin .
 77 | -0.6708385	loin </s>
 78 | -0.2922095	look beyond
 79 | -0.4638903	looking higher
 80 | -0.4638903	looking on	-0.4771212
 81 | -0.5136299	more .	-0.4771212
 82 | -0.3561665	more loin
 83 | -0.1649931	on a	-0.4771213
 84 | -0.1649931	screening a	-0.4771213
 85 | -0.2705918	small .
 86 | -0.287799	the screening
 87 | -0.2922095	to look
 88 | -0.2622373	watch </s>
 89 | -0.2922095	watching considering
 90 | -0.2922095	what i
 91 | -0.2922095	would also
 92 | -2	also would	-6
 93 | -15	<unk> <unk>	-2
 94 | -4	<unk> however	-1
 95 | -6	foo bar
 96 | 
 97 | \3-grams:
 98 | -0.01916512	more . </s>
 99 | -0.0283603	on a little	-0.4771212
100 | -0.0283603	screening a little	-0.4771212
101 | -0.01660496	a little more	-0.09409451
102 | -0.3488368	<s> looking higher
103 | -0.3488368	<s> looking on	-0.4771212
104 | -0.1892331	little more loin
105 | -0.04835128	looking on a	-0.4771212
106 | -3	also would consider	-7
107 | -6	<unk> however <unk>	-12
108 | -7	to look a
109 | 
110 | \4-grams:
111 | -0.009249173	looking on a little	-0.4771212
112 | -0.005464747	on a little more	-0.4771212
113 | -0.005464747	screening a little more
114 | -0.1453306	a little more loin
115 | -0.01552657	<s> looking on a	-0.4771212
116 | -4	also would consider higher	-8
117 | 
118 | \5-grams:
119 | -0.003061223	<s> looking on a little
120 | -0.001813953	looking on a little more
121 | -0.0432557	on a little more loin
122 | -5	also would consider higher looking
123 | 
124 | \end\


--------------------------------------------------------------------------------
/tests/test_suggestions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import spacy
 3 | 
 4 | from replacy import ReplaceMatcher
 5 | 
 6 | nlp = spacy.load("en_core_web_sm")
 7 | 
 8 | # They read us the stories they themselves had written.
 9 | 
10 | match_dict = {
11 |     "match-1": {
12 |         "patterns": [[
13 |             {"LOWER": {"IN": ["they", "she"]}},
14 |             {"LEMMA": "read", "TEMPLATE_ID": 1},
15 |             {"LOWER": "us"},
16 |             {"LOWER": "the"},
17 |             {"LEMMA": "story", "TEMPLATE_ID": 1},
18 |             {"LOWER": {"IN": ["they", "she"]}},
19 |             {"LOWER": {"IN": ["themselves", "herself"]}},
20 |             {"LEMMA": "have", "OP": "*"},
21 |             {"LEMMA": {"IN": ["write", "made"]}},
22 |         ]],
23 |         "suggestions": [
24 |             [
25 |                 {"PATTERN_REF": 0},
26 |                 {"TEXT": {"IN": ["sing", "give"]}, "FROM_TEMPLATE_ID": 1},
27 |                 {"PATTERN_REF": 2},
28 |                 {"TEXT": {"IN": ["a", "the", "some"]}},
29 |                 {"TEXT": "story", "INFLECTION": "NOUN"},
30 |                 {"PATTERN_REF": 5, "REPLACY_OP": "UPPER"},
31 |                 {"PATTERN_REF": 6},
32 |                 {"TEXT": {"IN": ["write", "made", "create"]}, "INFLECTION": "VBD"},
33 |             ]
34 |         ],
35 |         "test": {"positive": [], "negative": []},
36 |     }
37 | }
38 | 
39 | outputs = [
40 |     "They sang us a stories THEY themselves wrote",
41 |     "They sang us a stories THEY themselves made",
42 |     "They sang us a stories THEY themselves created",
43 |     "They sang us a story THEY themselves wrote",
44 |     "They sang us a story THEY themselves made",
45 |     "They sang us a story THEY themselves created",
46 |     "They sang us the stories THEY themselves wrote",
47 |     "They sang us the stories THEY themselves made",
48 |     "They sang us the stories THEY themselves created",
49 |     "They sang us the story THEY themselves wrote",
50 |     "They sang us the story THEY themselves made",
51 |     "They sang us the story THEY themselves created",
52 |     "They sang us some stories THEY themselves wrote",
53 |     "They sang us some stories THEY themselves made",
54 |     "They sang us some stories THEY themselves created",
55 |     "They sang us some story THEY themselves wrote",
56 |     "They sang us some story THEY themselves made",
57 |     "They sang us some story THEY themselves created",
58 |     "They gave us a stories THEY themselves wrote",
59 |     "They gave us a stories THEY themselves made",
60 |     "They gave us a stories THEY themselves created",
61 |     "They gave us a story THEY themselves wrote",
62 |     "They gave us a story THEY themselves made",
63 |     "They gave us a story THEY themselves created",
64 |     "They gave us the stories THEY themselves wrote",
65 |     "They gave us the stories THEY themselves made",
66 |     "They gave us the stories THEY themselves created",
67 |     "They gave us the story THEY themselves wrote",
68 |     "They gave us the story THEY themselves made",
69 |     "They gave us the story THEY themselves created",
70 |     "They gave us some stories THEY themselves wrote",
71 |     "They gave us some stories THEY themselves made",
72 |     "They gave us some stories THEY themselves created",
73 |     "They gave us some story THEY themselves wrote",
74 |     "They gave us some story THEY themselves made",
75 |     "They gave us some story THEY themselves created",
76 | ]
77 | 
78 | r_matcher = ReplaceMatcher(nlp, match_dict=match_dict)
79 | spans = r_matcher("They read us the stories they themselves had written.")
80 | suggestions = spans[0]._.suggestions
81 | 
82 | 
83 | def test_suggestions():
84 |     assert set(suggestions) <= set(outputs)
85 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 | <img src="./docs/replacy_logo.png" align="center" />
  3 | </p>
  4 | 
  5 | # replaCy: match & replace with spaCy
  6 | 
  7 | We found that in multiple projects we had duplicate code for using spaCy’s blazing fast matcher to do the same thing: Match-Replace-Grammaticalize. So we wrote replaCy!
  8 | 
  9 | - Match - spaCy’s matcher is great, and lets you match on text, shape, POS, dependency parse, and other features. We extended this with “match hooks”, predicates that get used in the callback function to further refine a match.
 10 | - Replace - Not built into spaCy’s matcher syntax, but easily added. You often want to replace a matched word with some other term.
 11 | - Grammaticalize - If you match on ”LEMMA”: “dance”, and replace with suggestions: ["sing"], but the actual match is danced, you need to conjugate “sing” appropriately. This is the “killer feature” of replaCy
 12 | 
 13 | [![spaCy](https://img.shields.io/badge/made%20with%20❤%20and-spaCy-09a3d5.svg)](https://spacy.io)
 14 | [![pypi Version](https://img.shields.io/pypi/v/replacy.svg?style=flat-square&logo=pypi&logoColor=white)](https://pypi.org/project/replacy/)
 15 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/ambv/black)
 16 | 
 17 | <p align="center">
 18 | <img src="./docs/replacy_ex.png" align="center" />
 19 | </p>
 20 | 
 21 | 
 22 | ## Requirements
 23 | 
 24 | - `spacy >= 2.0` (not installed by default, but replaCy needs to be instantiated with an `nlp` object)
 25 | 
 26 | ## Installation
 27 | 
 28 | `pip install replacy`
 29 | 
 30 | ## Quick start
 31 | 
 32 | ```python
 33 | from replacy import ReplaceMatcher
 34 | from replacy.db import load_json
 35 | import spacy
 36 | 
 37 | 
 38 | match_dict = load_json('/path/to/your/match/dict.json')
 39 | # load nlp spacy model of your choice
 40 | nlp = spacy.load("en_core_web_sm")
 41 | 
 42 | rmatcher = ReplaceMatcher(nlp, match_dict=match_dict)
 43 | 
 44 | # get inflected suggestions
 45 | # look up the first suggestion
 46 | span = rmatcher("She extracts revenge.")[0]
 47 | span._.suggestions
 48 | # >>> ['exacts']
 49 | ```
 50 | 
 51 | ## Input
 52 | 
 53 | ReplaceMatcher accepts both text and spaCy doc.
 54 | 
 55 | ```python
 56 | # text is ok
 57 | span = r_matcher("She extracts revenge.")[0]
 58 | 
 59 | # doc is ok too
 60 | doc = nlp("She extracts revenge.")
 61 | span = r_matcher(doc)[0]
 62 | ```
 63 | 
 64 | ## match_dict.json format
 65 | 
 66 | Here is a minimal `match_dict.json`:
 67 | 
 68 | ```json
 69 | {
 70 |   "extract-revenge": {
 71 |     "patterns": [
 72 |       {
 73 |         "LEMMA": "extract",
 74 |         "TEMPLATE_ID": 1
 75 |       }
 76 |     ],
 77 |     "suggestions": [
 78 |       [
 79 |         {
 80 |           "TEXT": "exact",
 81 |           "FROM_TEMPLATE_ID": 1
 82 |         }
 83 |       ]
 84 |     ],
 85 |     "match_hook": [
 86 |       {
 87 |         "name": "succeeded_by_phrase",
 88 |         "args": "revenge",
 89 |         "match_if_predicate_is": true
 90 |       }
 91 |     ],
 92 |     "test": {
 93 |       "positive": [
 94 |         "And at the same time extract revenge on those he so despises?",
 95 |         "Watch as Tampa Bay extracts revenge against his former Los Angeles Rams team."
 96 |       ],
 97 |       "negative": ["Mother flavours her custards with lemon extract."]
 98 |     }
 99 |   }
100 | }
101 | ```
102 | For more information how to compose `match_dict` see our [wiki](https://github.com/Qordobacode/replaCy/wiki/match_dict.json-format): 
103 | 
104 | 
105 | # Citing
106 | 
107 | If you use replaCy in your research, please cite with the following BibText
108 | 
109 | ```bibtext
110 | @misc{havens2019replacy,
111 |     title  = {SpaCy match and replace, maintaining conjugation},
112 |     author = {Sam Havens, Aneta Stal, and Manhal Daaboul},
113 |     url    = {https://github.com/Qordobacode/replaCy},
114 |     year   = {2019}
115 | }
116 | 


--------------------------------------------------------------------------------
/replacy/scorer.py:
--------------------------------------------------------------------------------
  1 | import string
  2 | import warnings
  3 | from typing import List
  4 | 
  5 | from kenlm import Model as KenLMModel
  6 | from spacy.tokens import Doc, Span, Token
  7 | 
  8 | from replacy.default_scorer import Scorer
  9 | 
 10 | 
 11 | class KenLMScorer(Scorer):
 12 | 
 13 |     name = "kenlm"
 14 | 
 15 |     def __init__(self, model=None, path=None, nlp=None, lowercase=True):
 16 | 
 17 |         if model:
 18 |             self.model = model
 19 |         elif path:
 20 |             self.model = KenLMModel(path)
 21 | 
 22 |         self._check_model()
 23 | 
 24 |         if nlp:
 25 |             self.nlp = nlp
 26 |         else:
 27 |             import spacy
 28 | 
 29 |             self.nlp = spacy.load("en_core_web_sm")
 30 | 
 31 |         self.lowercase = lowercase
 32 | 
 33 |     def _check_model(self):
 34 |         assert isinstance(self.model, KenLMModel)
 35 |         assert self.model.score("testing !") < 0
 36 | 
 37 |     def preprocess(self, segment):
 38 |         """
 39 |         SpaCy tokenize + lowercase. Ignore extra whitespaces.
 40 |         - if Doc, Span, Token - retrieve .lower_
 41 |         - if string - convert to Doc first
 42 |         """
 43 |         if isinstance(segment, (Doc, Span, Token)):
 44 |             # spaCy tokenizer, ignore whitespaces
 45 |             tok = [token.text for token in segment if not token.is_space]
 46 |             if self.lowercase:
 47 |                 tok = [token.lower() for token in tok]
 48 | 
 49 |         elif isinstance(segment, str):
 50 |             doc = self.nlp(segment, disable=self.nlp.pipe_names)
 51 |             return self.preprocess(doc)
 52 | 
 53 |         return " ".join(tok)
 54 | 
 55 |     def __call__(self, segment, score_type="perplexity"):
 56 | 
 57 |         text = self.preprocess(segment)
 58 |         word_count = len(text.split())
 59 | 
 60 |         if word_count < 2:
 61 |             warnings.warn(f"Scorer: Received {word_count} tokens, expected >= 2.")
 62 |             return float("-inf")
 63 | 
 64 |         if isinstance(segment, Doc):
 65 |             # if doc - assume bos, eos=True
 66 |             bos = True
 67 |             eos = True
 68 | 
 69 |         if isinstance(segment, (Span, Token)):
 70 |             # if span - assume bos, eos=False
 71 |             bos = False
 72 |             eos = False
 73 | 
 74 |         if isinstance(segment, str):
 75 |             # string passed - guess:
 76 |             bos = text.capitalize() == text
 77 |             eos = text[-1] in string.punctuation
 78 | 
 79 |         # log10 prob
 80 |         score = self.model.score(text, bos=bos, eos=eos)
 81 | 
 82 |         if score_type == "log":
 83 |             return score
 84 | 
 85 |         elif score_type == "perplexity":
 86 |             prob = 10.0 ** (score)
 87 |             prob = 0.00000000001 if prob == 0 else prob
 88 |             return prob ** (-1 / word_count)
 89 |         else:
 90 |             raise NotImplementedError
 91 | 
 92 |     def score_suggestion(self, doc: Doc, span: Span, suggestion: List[str]) -> float:
 93 |         """
 94 |         between spacy 2.3.2 and 2.3.5 the behavior of slicing docs changed
 95 |         so doc[len(doc):] now throws an exception (it just returned the empty span before)
 96 | 
 97 |         also, we use arrays of text tokens rather than t.text_with_ws_ because
 98 |         Ken wants space-tokenized strings
 99 |         """
100 |         if span.start == 0:
101 |             head = []
102 |         else:
103 |             head = [t.text for t in doc[: span.start]]
104 |         if span.end >= len(doc):
105 |             tail = []
106 |         else:
107 |             tail = [t.text for t in doc[span.end :]]
108 |         text = " ".join(head + suggestion + tail)
109 |         return self(text)
110 | 
111 |     def sort_suggestions(self, spans: List[Span]) -> List[Span]:
112 |         for span in spans:
113 |             if len(span._.suggestions) > 1:
114 |                 span._.suggestions = sorted(
115 |                     span._.suggestions,
116 |                     key=lambda x: self.score_suggestion(
117 |                         span.doc, span, [t.text for t in x]
118 |                     ),
119 |                 )
120 |         return spans
121 | 


--------------------------------------------------------------------------------
/tests/test_inflector.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from replacy.inflector import Inflector
  4 | 
  5 | xfail = pytest.mark.xfail
  6 | 
  7 | inflector = Inflector()
  8 | 
  9 | inflector_dataset = [
 10 |     {
 11 |         "source": "Those are examples.",
 12 |         "target": "Those are rabbits.",
 13 |         "index": 2,
 14 |         "word": "rabbit",
 15 |     },
 16 |     {
 17 |         "source": "Stop avoiding the question.",
 18 |         "target": "Stop evading the question.",
 19 |         "index": 1,
 20 |         "word": "evade",
 21 |     },
 22 |     {
 23 |         "source": "She loves kittens.",
 24 |         "target": "She hates kittens.",
 25 |         "index": 1,
 26 |         "word": "hate",
 27 |     },
 28 | ]
 29 | 
 30 | 
 31 | @pytest.mark.parametrize("data", inflector_dataset)
 32 | def test_inflector(data):
 33 |     assert (
 34 |         inflector.insert(data["source"], data["word"], data["index"]) == data["target"]
 35 |     ), "should inflect"
 36 | 
 37 | 
 38 | """
 39 | Test lemmatization. 
 40 | Plural and singular forms of nouns should have common (or at least intersecting) sets of lemmas
 41 | Important for max count estimation (see: suggestion.py).
 42 | 
 43 | Exceptions to handle separately:
 44 |     {
 45 |         "plural":"people", 
 46 |         "singular": "person"
 47 |     },
 48 |     {
 49 |         "plural": "ox", 
 50 |         "singular": "oxen"
 51 |     }
 52 | 
 53 | Why do we test this?
 54 | ReplaCy uses ML-based lemminflect to lemmatize. 
 55 | This test assures any lemminflect model upgrades do not break current behaviour.
 56 | """
 57 | 
 58 | irregular_nouns = [
 59 |     {
 60 |         "plural":"elf", 
 61 |         "singular":"elves"
 62 |     },
 63 |     {
 64 |         "plural":"calf", 
 65 |         "singular": "calves"
 66 |     },
 67 |     {
 68 |         "plural":"knife", 
 69 |         "singular": "knives"
 70 |     },
 71 |     {
 72 |         "plural":"loaf", 
 73 |         "singular": "loaves"
 74 |     },
 75 |     {
 76 |         "plural":"shelf", 
 77 |         "singular": "shelves"
 78 |     },
 79 |     {
 80 |         "plural":"wolf", 
 81 |         "singular": "wolves"
 82 |     },
 83 |     {
 84 |         "plural":"loaf", 
 85 |         "singular": "loaves"
 86 |     },
 87 |     {
 88 |         "plural":"man", 
 89 |         "singular": "men"
 90 |     },
 91 |     {
 92 |         "plural":"mouse", 
 93 |         "singular": "mice"
 94 |     },
 95 |     {
 96 |         "plural":"child", 
 97 |         "singular": "children"
 98 |     },
 99 |     {
100 |         "plural":"foot", 
101 |         "singular": "feet"
102 |     },
103 |     {
104 |         "plural":"goose", 
105 |         "singular": "geese"
106 |     },
107 |     {
108 |         "plural":"tooth", 
109 |         "singular": "teeth"
110 |     },
111 |     {
112 |         "plural":"louse", 
113 |         "singular": "lice"
114 |     },
115 |     {
116 |         "plural":"cactus", 
117 |         "singular": "cacti"
118 |     },
119 |     {
120 |         "plural": "appendix", 
121 |         "singular": "appendices"
122 |     },
123 |     {
124 |         "plural": "cod", 
125 |         "singular": "cods"
126 |     },
127 |     {
128 |         "plural": "shrimp", 
129 |         "singular": "shrimps"
130 |     },
131 |     {
132 |         "plural": "fish", 
133 |         "singular": "fishes"
134 |     },
135 |     {
136 |         "plural": "quail", 
137 |         "singular": "quails"
138 |     }
139 | ]
140 | 
141 | irregular_nouns_lemma_exceptions = [
142 |     {
143 |         "plural": "people", 
144 |         "singular": "person"
145 |     },
146 |     {
147 |         "plural": "ox", 
148 |         "singular": "oxen"
149 |     }
150 | ]
151 | 
152 | @pytest.mark.parametrize("pair", irregular_nouns)
153 | def test_lemmatization(pair):
154 |     singular_lemmas = set(inflector.get_lemmas(pair["singular"]))
155 |     plural_lemmas = set(inflector.get_lemmas(pair["plural"]))
156 | 
157 |     assert len(singular_lemmas & plural_lemmas) > 0, "lemmas are different!"
158 | 
159 | @xfail(raises=AssertionError)
160 | @pytest.mark.parametrize("pair", irregular_nouns_lemma_exceptions)
161 | def test_lemmatization_exceptions(pair):
162 |     singular_lemmas = set(inflector.get_lemmas(pair["singular"]))
163 |     plural_lemmas = set(inflector.get_lemmas(pair["plural"]))
164 | 
165 |     assert len(singular_lemmas & plural_lemmas) > 0, "lemmas are different!"
166 | 


--------------------------------------------------------------------------------
/tests/test_max_count.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import spacy
  3 | 
  4 | from replacy import ReplaceMatcher
  5 | 
  6 | nlp = spacy.load("en_core_web_sm")
  7 | 
  8 | # They read us the stories they themselves had written.
  9 | 
 10 | match_dict = {
 11 |     "match-1": {
 12 |         "patterns": [[
 13 |             {"LOWER": {"IN": ["they", "she"]}},
 14 |             {"LEMMA": "read", "TEMPLATE_ID": 1},
 15 |             {"LOWER": "us"},
 16 |             {"LOWER": "the"},
 17 |             {"LEMMA": "story", "TEMPLATE_ID": 1},
 18 |             {"LOWER": {"IN": ["they", "she"]}},
 19 |             {"LOWER": {"IN": ["themselves", "herself"]}},
 20 |             {"LEMMA": "have", "OP": "*"},
 21 |             {"LEMMA": {"IN": ["write", "made"]}},
 22 |         ]],
 23 |         "suggestions": [
 24 |             [
 25 |                 {"PATTERN_REF": 0},
 26 |                 {"TEXT": {"IN": ["sing", "give"]}, "FROM_TEMPLATE_ID": 1},
 27 |                 {"PATTERN_REF": 2},
 28 |                 {"TEXT": {"IN": ["a", "the", "some"]}},
 29 |                 {"TEXT": "story", "INFLECTION": "NOUN"},
 30 |                 {"PATTERN_REF": 5, "REPLACY_OP": "UPPER"},
 31 |                 {"PATTERN_REF": 6},
 32 |                 {"TEXT": {"IN": ["write", "made", "create"]}, "INFLECTION": "VBD"},
 33 |             ]
 34 |         ],
 35 |         "test": {"positive": [], "negative": []},
 36 |     }
 37 | }
 38 | 
 39 | outputs = [
 40 |     "They sang us a stories THEY themselves wrote",
 41 |     "They sang us a stories THEY themselves made",
 42 |     "They sang us a stories THEY themselves created",
 43 |     "They gave us a stories THEY themselves wrote",
 44 |     "They gave us a stories THEY themselves made",
 45 |     "They gave us a stories THEY themselves created",
 46 |     "They sang us the story THEY themselves wrote",
 47 |     "They sang us the story THEY themselves made",
 48 |     "They sang us the story THEY themselves created",
 49 |     "They gave us the story THEY themselves wrote",
 50 |     "They gave us the story THEY themselves made",
 51 |     "They gave us the story THEY themselves created",
 52 | ]
 53 | 
 54 | output_default_max_count_1 = [
 55 |     "They sang us a stories THEY themselves wrote",
 56 |     "They sang us a story THEY themselves made",
 57 |     "They gave us a stories THEY themselves made",
 58 |     "They gave us a story THEY themselves wrote",
 59 |     "They sang us the stories THEY themselves made",
 60 |     "They sang us the story THEY themselves wrote",
 61 |     "They gave us the stories THEY themselves wrote",
 62 |     "They gave us the story THEY themselves made",
 63 |     "They sang us some stories THEY themselves created",
 64 |     "They gave us some story THEY themselves created",
 65 | ]
 66 | 
 67 | r_matcher1 = ReplaceMatcher(
 68 |     nlp,
 69 |     match_dict=match_dict,
 70 |     lm_path="./replacy/resources/test.arpa",
 71 |     filter_suggestions=True,
 72 | )
 73 | 
 74 | spans = r_matcher1("They read us the stories they themselves had written.")
 75 | suggestions = spans[0]._.suggestions
 76 | 
 77 | 
 78 | def test_suggestions():
 79 |     assert suggestions == outputs
 80 | 
 81 | 
 82 | r_matcher_max_count_1 = ReplaceMatcher(
 83 |     nlp,
 84 |     match_dict=match_dict,
 85 |     lm_path="./replacy/resources/test.arpa",
 86 |     filter_suggestions=True,
 87 |     default_max_count=1,
 88 | )
 89 | 
 90 | spans_max_count_1 = r_matcher_max_count_1(
 91 |     "They read us the stories they themselves had written."
 92 | )
 93 | suggestions_max_count_1 = spans_max_count_1[0]._.suggestions
 94 | 
 95 | 
 96 | def test_default_max_count():
 97 |     assert suggestions_max_count_1 == output_default_max_count_1
 98 | 
 99 | 
100 | short_match_dict_2_sugg = {
101 |     "match-1": {
102 |         "patterns": [[
103 |             {"LOWER": {"IN": ["they", "she"]}},
104 |             {"LEMMA": "read", "TEMPLATE_ID": 1},
105 |         ]],
106 |         "suggestions": [
107 |             [
108 |                 {"PATTERN_REF": 0},
109 |                 {"FROM_TEMPLATE_ID": 1, "TEXT": {"IN": ["sing", "give"]}},
110 |             ],
111 |             [{"PATTERN_REF": 0}, {"FROM_TEMPLATE_ID": 1, "TEXT": "dance"},],
112 |         ],
113 |         "test": {"negative": [], "positive": []},
114 |     }
115 | }
116 | 
117 | 
118 | def test_multiple_suggestions_max_count():
119 |     r_matcher = ReplaceMatcher(
120 |         nlp,
121 |         match_dict=short_match_dict_2_sugg,
122 |         lm_path="./replacy/resources/test.arpa",
123 |         filter_suggestions=True,
124 |         debug=True,
125 |     )
126 |     spans = r_matcher("They read us the stories they themselves had written.")
127 |     assert len(spans[0]._.suggestions) == 3
128 | 
129 | 
130 | short_match_dict = {
131 |     "match-1": {
132 |         "patterns": [[
133 |             {"LOWER": {"IN": ["they", "she"]}},
134 |             {"LEMMA": "read", "TEMPLATE_ID": 1},
135 |         ]],
136 |         "suggestions": [
137 |             [
138 |                 {"PATTERN_REF": 0},
139 |                 {"FROM_TEMPLATE_ID": 1, "TEXT": {"IN": ["sing", "give"]}},
140 |             ]
141 |         ],
142 |         "test": {"negative": [], "positive": []},
143 |     }
144 | }
145 | 
146 | 
147 | def test_manual_max_count():
148 |     # use short match dict
149 |     # default_max_count=1
150 |     # expect 1 suggestion
151 | 
152 |     r_matcher = ReplaceMatcher(
153 |         nlp,
154 |         match_dict=short_match_dict,
155 |         lm_path="./replacy/resources/test.arpa",
156 |         filter_suggestions=True,
157 |         default_max_count=1,
158 |         debug=True,
159 |     )
160 |     spans = r_matcher("They read us the stories they themselves had written.")
161 |     assert len(spans[0]._.suggestions) == 1
162 | 
163 |     # MAX_COUNT=2 for ['sing', 'give']
164 |     # default_max_count=1
165 |     # expect 2 suggestions
166 | 
167 |     short_match_dict["match-1"]["suggestions"][0][1]["MAX_COUNT"] = 2
168 | 
169 |     r_matcher = ReplaceMatcher(
170 |         nlp,
171 |         match_dict=short_match_dict,
172 |         lm_path="./replacy/resources/test.arpa",
173 |         filter_suggestions=True,
174 |         default_max_count=1,
175 |         debug=True,
176 |     )
177 |     spans = r_matcher("They read us the stories they themselves had written.")
178 |     suggestions = spans[0]._.suggestions
179 | 
180 |     assert len(spans[0]._.suggestions) == 2
181 | 


--------------------------------------------------------------------------------
/replacy/inflector.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import lemminflect
  4 | import spacy
  5 | from spacy.tokens import Token
  6 | 
  7 | from replacy.db import get_forms_lookup
  8 | 
  9 | 
 10 | class Inflector:
 11 |     def __init__(self, nlp=None, forms_lookup=None):
 12 | 
 13 |         self.nlp = nlp
 14 |         if not self.nlp:
 15 |             self.nlp = spacy.load("en_core_web_sm")
 16 | 
 17 |         self.forms_lookup = forms_lookup
 18 |         if not self.forms_lookup:
 19 |             self.forms_lookup = get_forms_lookup()
 20 | 
 21 |     def get_dict_form(self, word, tag):
 22 |         for k in self.forms_lookup:
 23 |             if (
 24 |                 word in self.forms_lookup[k].values()
 25 |                 and tag in self.forms_lookup[k].keys()
 26 |             ):
 27 |                 return self.forms_lookup[k][tag]
 28 |         return None
 29 | 
 30 |     def auto_inflect(self, doc, suggestion, index):
 31 |         """
 32 |         Inflect the suggestion using token at position 'index' as template.
 33 |         ex. (washed, eat) => ate
 34 |         Returns inflected suggestion as text.
 35 |         If the inflection is not supported, check verb_forms.json
 36 |         if not found - returns None.
 37 |         """
 38 | 
 39 |         try:
 40 |             doc.text
 41 |         except AttributeError:
 42 |             doc = self.nlp(doc)
 43 | 
 44 |         sentence = doc.text
 45 | 
 46 |         token = doc[index]
 47 |         token_start = token.idx
 48 |         token_end = token_start + len(token)
 49 | 
 50 |         changed_sentence = "".join(
 51 |             [sentence[:token_start], suggestion, sentence[token_end:]]
 52 |         )
 53 | 
 54 |         changed_doc = self.nlp(changed_sentence)
 55 |         changed_token = changed_doc[index]
 56 | 
 57 |         return self.inflect_or_lookup(changed_token, token.tag_)
 58 | 
 59 |     @staticmethod
 60 |     def tag_to_pos(tag):
 61 |         if tag in ["JJ", "JJR", "JJS"]:
 62 |             return "ADJ"
 63 |         elif tag in ["RB", "RBR", "RBS"]:
 64 |             return "ADV"
 65 |         elif tag in ["NN", "NNS"]:
 66 |             return "NOUN"
 67 |         elif tag in ["NNP", "NNPS"]:
 68 |             return "PROPN"
 69 |         elif tag in ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "MD"]:
 70 |             return "VERB"  # AUX
 71 |         else:
 72 |             return tag
 73 | 
 74 |     def get_inflection_type(self, value: str):
 75 |         pos_values = ["ADJ", "ADV", "NOUN", "PROPN", "VERB", "AUX"]
 76 |         if value in pos_values:
 77 |             return "pos"
 78 |         elif Inflector.tag_to_pos(value) in pos_values:
 79 |             return "tag"
 80 |         elif value == "ALL":
 81 |             return "all"
 82 |         else:
 83 |             warnings.warn(
 84 |                 f"Inflection <<{value}>> not supported, will fallback to <<ALL>>."
 85 |             )
 86 |             return "all"
 87 | 
 88 |     def get_lemmas(self, word, tag=None, pos=None):
 89 | 
 90 |         lemmas = []
 91 | 
 92 |         if tag:
 93 |             # infer pos from tag
 94 |             pos = Inflector.tag_to_pos(tag)
 95 | 
 96 |         if pos:
 97 |             lemma_dict = lemminflect.getLemma(word, upos=pos)
 98 |             lemmas = list(lemma_dict)
 99 |         else:
100 |             # no pos provided, return all lemmas
101 |             lemma_dict = lemminflect.getAllLemmas(word)
102 |             for i in lemma_dict.values():
103 |                 lemmas += list(i)
104 | 
105 |         return lemmas
106 | 
107 |     def inflect_lemma(self, lemma, tag=None, pos=None):
108 | 
109 |         inflections = []
110 |         # tag based
111 |         if tag:
112 |             inflection_tuple = lemminflect.getInflection(lemma, tag=tag)
113 |             inflections = list(inflection_tuple)
114 |         else:
115 |             # pos based, can be None too
116 |             inflection_dict = lemminflect.getAllInflections(lemma, upos=pos)
117 |             for i in inflection_dict.values():
118 |                 inflections += list(i)
119 | 
120 |         return inflections
121 | 
122 |     def inflect_token(self, token: Token, tag=None, pos=None):
123 | 
124 |         if tag:
125 |             # dictionary look up
126 |             # returns None if not found
127 |             inflection = self.get_dict_form(token.lemma_, tag=tag)
128 | 
129 |             if not inflection:
130 |                 # tag provided, spaCy inflection (has .lemma_)
131 |                 inflection = token._.inflect(tag)
132 | 
133 |             inflections = [inflection]
134 |         else:
135 |             # fallback to pyinflect inflection
136 |             # get all inflections
137 |             inflections = self.inflect_lemma(token.lemma_, tag=tag, pos=pos)
138 | 
139 |         return inflections
140 | 
141 |     def inflect_string(self, word: str, tag=None, pos=None):
142 | 
143 |         inflections = []
144 | 
145 |         # lemmatize
146 |         lemmas = self.get_lemmas(word, tag=tag, pos=pos)
147 |         for lemma in lemmas:
148 |             # check dict forms first
149 |             # those are potential corrections to lemminflect
150 |             # returns None if not found
151 |             lemma_i = [self.get_dict_form(lemma, tag=tag)]
152 |             if not lemma_i[0]:
153 |                 lemma_i = self.inflect_lemma(lemma, tag=tag, pos=pos)
154 |             inflections += lemma_i
155 | 
156 |         return inflections
157 | 
158 |     def inflect_or_lookup(self, word, tag=None, pos=None):
159 | 
160 |         if isinstance(word, Token):
161 |             # token inflection tries spaCy ext (._.inflect)
162 |             # with spaCy lemmatizer (.lemma_)
163 |             return self.inflect_token(word, tag=tag, pos=pos)
164 | 
165 |         elif isinstance(word, str):
166 |             return self.inflect_string(word, tag=tag, pos=pos)
167 | 
168 |     def insert(self, doc, suggestion: str, index: int):
169 |         """
170 |         Returns the sentence with inserted inflected token.
171 |         If inflection is not supported - returns the original sentence.
172 |         ex. She washed her eggs. -> She ate her eggs.
173 |         If many inflections returned, take the first form.
174 |         """
175 | 
176 |         # if string passed, conversion to doc
177 |         try:
178 |             doc.text
179 |         except AttributeError:
180 |             doc = self.nlp(doc)
181 | 
182 |         infl_tokens = self.auto_inflect(doc, suggestion, index)
183 | 
184 |         if len(infl_tokens):
185 |             infl_token = infl_tokens[0]
186 | 
187 |         if infl_token:
188 |             token = doc[index]
189 |             changed_sent = "".join(
190 |                 [doc.text[: token.idx], infl_token, doc.text[token.idx + len(token) :],]
191 |             )
192 |             return changed_sent
193 |         else:
194 |             return doc.text
195 | 


--------------------------------------------------------------------------------
/replacy/util.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from typing import Any, Callable, Dict, List, Union
  3 | 
  4 | import spacy
  5 | from functional import seq
  6 | from jsonschema import validate
  7 | from spacy.tokens import Doc
  8 | 
  9 | from replacy.db import get_match_dict_schema
 10 | 
 11 | 
 12 | def set_known_extensions(span_class):
 13 |     known_string_extensions = ["match_name"]
 14 |     known_list_extensions = ["suggestions"]
 15 |     for ext in known_list_extensions:
 16 |         span_class.set_extension(ext, default=[], force=True)
 17 |     for ext in known_string_extensions:
 18 |         span_class.set_extension(ext, default="", force=True)
 19 |     expected_properties = (
 20 |         ["patterns", "match_hook", "test"]
 21 |         + known_list_extensions
 22 |         + known_string_extensions
 23 |     )
 24 |     return expected_properties
 25 | 
 26 | 
 27 | # set custom extensions for any unexpected keys found in the match_dict
 28 | def get_novel_prop_defaults(match_dict, span_class, expected_properties):
 29 |     """
 30 |     Also mutates the ~global Span~ passed `span_class` to add any needed extensions
 31 |     """
 32 |     novel_properties = (
 33 |         seq(match_dict.values())
 34 |         .flat_map(lambda x: x.keys())
 35 |         .distinct()
 36 |         .difference(expected_properties)
 37 |     )
 38 |     novel_prop_defaults: Dict[str, Any] = {}
 39 |     for x in match_dict.values():
 40 |         for k, v in x.items():
 41 |             if k in novel_properties and k not in novel_prop_defaults.keys():
 42 |                 if isinstance(v, str):
 43 |                     novel_prop_defaults[k] = ""
 44 |                 elif isinstance(v, list):
 45 |                     novel_prop_defaults[k] = []
 46 |                 elif isinstance(v, dict):
 47 |                     novel_prop_defaults[k] = {}
 48 |                 elif isinstance(v, int):
 49 |                     novel_prop_defaults[k] = 0
 50 |                 elif isinstance(v, float):
 51 |                     novel_prop_defaults[k] = 0.0
 52 |                 elif isinstance(v, bool):
 53 |                     novel_prop_defaults[k] = False
 54 |                 else:
 55 |                     # just default to whatever value we find
 56 |                     print(k, v)
 57 |                     novel_prop_defaults[k] = v
 58 |     for prop, default in novel_prop_defaults.items():
 59 |         span_class.set_extension(prop, default=default, force=True)
 60 |     return novel_prop_defaults
 61 | 
 62 | 
 63 | def validate_match_dict(match_dict):
 64 |     match_dict_schema = get_match_dict_schema()
 65 |     validate(instance=match_dict, schema=match_dict_schema)
 66 | 
 67 | 
 68 | def equal_except_nth_place(list1, list2, n):
 69 |     # compares two lists, skips nth place
 70 | 
 71 |     # if empty:
 72 |     if not len(list1) * len(list2):
 73 |         return False
 74 | 
 75 |     # if suggestions come from different suggestions:
 76 |     if list1[0].id != list2[0].id:
 77 |         return False
 78 | 
 79 |     # if different length - not equal
 80 |     if len(list1) != len(list2):
 81 |         return False
 82 | 
 83 |     for i in range(len(list1)):
 84 |         if i != n:
 85 |             if list1[i].text != list2[i].text:
 86 |                 return False
 87 |     return True
 88 | 
 89 | 
 90 | def eliminate_options(elem, chosen, rest):
 91 |     # use elem to eliminate elements above the max_count limits
 92 |     for i, item in enumerate(elem):
 93 |         # item with no max count
 94 |         max_count = item.max_count
 95 |         elem_text = item.text
 96 |         if max_count is None:
 97 |             continue
 98 |         # item is exclusive (= max count 1)
 99 |         elif max_count == 1:
100 |             # eliminate equal except i from rest
101 |             rest = [r for r in rest if not equal_except_nth_place(elem, r, i)]
102 |         # item has a custom max count
103 |         else:
104 |             # get hom many times this item has been used so far
105 |             # it this very context
106 |             current_count = [r for r in chosen if equal_except_nth_place(elem, r, i)]
107 |             # it this is max (with elem), eliminate other options from rest
108 |             if len(current_count) >= max_count:
109 |                 rest = [r for r in rest if not equal_except_nth_place(elem, r, i)]
110 |     return rest
111 | 
112 | 
113 | def get_predicates(
114 |     match_hooks, default_match_hooks, custom_match_hooks
115 | ) -> List[Callable]:
116 |     predicates = []
117 |     for hook in match_hooks:
118 |         # template - ex. succeeded_by_phrase
119 |         try:
120 |             template = getattr(default_match_hooks, hook["name"])
121 |         except AttributeError:
122 |             # if the hook isn't in custom_match_hooks, this will still
123 |             # raise an exception. I think that is the correct behavior
124 |             template = getattr(custom_match_hooks, hook["name"])
125 | 
126 |         # predicate - filled template ex. succeeded_by_word("to")
127 |         # will match "in addition to..." but not "in addition, ..."
128 |         args = hook.get("args", None)
129 |         kwargs = hook.get("kwargs", None)
130 |         if args is None:
131 |             if kwargs is None:
132 |                 # the match_hook is nullary
133 |                 pred = template()
134 |             else:
135 |                 pred = template(**kwargs)
136 |         elif type(args) == dict:
137 |             # should we force them to use kwargs?
138 |             warnings.warn(
139 |                 f"WARNING: dict passed as sole args argument. Calling {hook['name']} "
140 |                 f"with single argument {args}. If you want to call with keyword arguments, use kwargs"
141 |             )
142 |             pred = template(args)
143 |         else:
144 |             # oops, bad design, we assume non-dicts are called directly
145 |             pred = template(args)
146 | 
147 |         # to confuse people for centuries to come ...
148 |         # negate, since positive breaks matching
149 |         # see cb in get_callback
150 |         if bool(hook.get("match_if_predicate_is", False)):
151 |             # neg flips the boolean value of a predicate
152 |             pred = default_match_hooks.neg(pred)
153 |         predicates.append(pred)
154 |     return predicates
155 | 
156 | 
157 | def make_doc_if_not_doc(text_or_doc: Union[str, Doc], nlp) -> Doc:
158 |     if hasattr(text_or_doc, "text"):
159 |         doc = text_or_doc
160 |     else:
161 |         doc = nlp(text_or_doc)
162 |     return doc
163 | 
164 | 
165 | def at_most_one_is_not_none(*args) -> bool:
166 |     return len(list(filter(bool, [x is not None for x in args]))) <= 1
167 | 
168 | 
169 | def attach_debug_hook(matches: Dict[str, Dict]) -> Dict[str, Dict]:
170 |     new_matches = {}
171 |     for match_name, match_dict in matches.items():
172 |         new_dict = match_dict
173 |         hooks = match_dict.get("match_hook", [])
174 |         hooks.append(
175 |             {
176 |                 "name": "debug_hook",
177 |                 "args": match_name,
178 |                 "match_if_predicate_is": True,
179 |             }
180 |         )
181 |         new_dict["match_hook"] = hooks
182 |         new_matches[match_name] = new_dict
183 |     return new_matches
184 | 


--------------------------------------------------------------------------------
/replacy/resources/match_dict_schema.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "title": "Schema for validation ReplaCy Match/Replace format",
  3 |     "type": "object",
  4 |     "definitions": {
  5 |         "replacyAttributeItem": {
  6 |             "type": "object",
  7 |             "properties": {
  8 |                 "ORTH": {
  9 |                     "$ref": "#/definitions/spacyValue"
 10 |                 },
 11 |                 "TEXT": {
 12 |                     "$ref": "#/definitions/spacyValue"
 13 |                 },
 14 |                 "LEMMA": {
 15 |                     "$ref": "#/definitions/spacyValue"
 16 |                 },
 17 |                 "LOWER": {
 18 |                     "$ref": "#/definitions/spacyValue"
 19 |                 },
 20 |                 "LENGTH": {
 21 |                     "$ref": "#/definitions/spacyValue"
 22 |                 },
 23 |                 "POS": {
 24 |                     "$ref": "#/definitions/spacyValue"
 25 |                 },
 26 |                 "TAG": {
 27 |                     "$ref": "#/definitions/spacyValue"
 28 |                 },
 29 |                 "DEP": {
 30 |                     "$ref": "#/definitions/spacyValue"
 31 |                 },
 32 |                 "SHAPE": {
 33 |                     "$ref": "#/definitions/spacyValue"
 34 |                 },
 35 |                 "ENT_TYPE": {
 36 |                     "$ref": "#/definitions/spacyValue"
 37 |                 },
 38 |                 "OP": {
 39 |                     "enum": [
 40 |                         "!",
 41 |                         "?",
 42 |                         "*",
 43 |                         "+"
 44 |                     ]
 45 |                 },
 46 |                 "TEMPLATE_ID": {
 47 |                     "type": "integer"
 48 |                 }
 49 |             },
 50 |             "patternProperties": {
 51 |                 "^IS_[A-Z]*$": {
 52 |                     "type": "boolean"
 53 |                 }
 54 |             }
 55 |         },
 56 |         "replacyAttribute": {
 57 |             "type": "array",
 58 |             "items": {
 59 |                 "$ref": "#/definitions/replacyAttributeItem"
 60 |             },
 61 |             "minItems": 1
 62 |         },
 63 |         "spacyOperator": {
 64 |             "type": "object",
 65 |             "additionalProperties": false,
 66 |             "properties": {
 67 |                 "REGEX": {
 68 |                     "type": "string"
 69 |                 },
 70 |                 "IN": {
 71 |                     "type": "array",
 72 |                     "items": {
 73 |                         "type": "string"
 74 |                     }
 75 |                 },
 76 |                 "NOT_IN": {
 77 |                     "type": "array",
 78 |                     "items": {
 79 |                         "type": "string"
 80 |                     }
 81 |                 },
 82 |                 "==": {
 83 |                     "type": "number"
 84 |                 },
 85 |                 ">=": {
 86 |                     "type": "number"
 87 |                 },
 88 |                 ">": {
 89 |                     "type": "number"
 90 |                 },
 91 |                 "<=": {
 92 |                     "type": "number"
 93 |                 },
 94 |                 "<": {
 95 |                     "type": "number"
 96 |                 },
 97 |                 "OP": {
 98 |                     "enum": [
 99 |                         "!",
100 |                         "?",
101 |                         "*",
102 |                         "+"
103 |                     ]
104 |                 }
105 |             }
106 |         },
107 |         "textOperator": {
108 |             "type": "object",
109 |             "additionalProperties": false,
110 |             "properties": {
111 |                 "IN": {
112 |                     "type": "array",
113 |                     "items": {
114 |                         "type": "string"
115 |                     }
116 |                 }
117 |             }
118 |         },
119 |         "spacyValue": {
120 |             "oneOf": [
121 |                 {
122 |                     "type": "string"
123 |                 },
124 |                 {
125 |                     "type": "integer"
126 |                 },
127 |                 {
128 |                     "$ref": "#/definitions/spacyOperator"
129 |                 }
130 |             ]
131 |         },
132 |         "textValue":{
133 |             "oneOf":[
134 |                 {
135 |                     "type": "string"
136 |                 },
137 |                 {
138 |                     "$ref": "#/definitions/textOperator"
139 |                 }
140 |             ]
141 |         },
142 |         "replacySuggestionItem": {
143 |             "type": "object",
144 |             "additionalProperties": false,
145 |             "properties": {
146 |                 "TEXT": {
147 |                     "$ref": "#/definitions/textValue"
148 |                 },
149 |                 "FROM_TEMPLATE_ID": {
150 |                     "type": "integer"
151 |                 },
152 |                 "PATTERN_REF": {
153 |                     "type": "integer"
154 |                 },
155 |                 "REPLACY_OP": {
156 |                     "enum": [
157 |                         "LOWER",
158 |                         "UPPER",
159 |                         "TITLE"
160 |                     ]
161 |                 },
162 |                 "INFLECTION": {
163 |                     "enum": [
164 |                         "ADJ", 
165 |                         "ADV", 
166 |                         "PROPN", 
167 |                         "VERB",
168 |                         "AUX",
169 |                         "JJ", 
170 |                         "JJR", 
171 |                         "JJS",
172 |                         "RB", 
173 |                         "RBR", 
174 |                         "RBS",
175 |                         "NN", 
176 |                         "NNS",
177 |                         "NNP", 
178 |                         "NNPS",
179 |                         "VB", 
180 |                         "VBD", 
181 |                         "VBG", 
182 |                         "VBN", 
183 |                         "VBP", 
184 |                         "VBZ", 
185 |                         "MD",
186 |                         "ALL"
187 |                     ]
188 |                 }
189 |             }
190 |         },
191 |         "replacySuggestion": {
192 |             "type": "array",
193 |             "items": {
194 |                 "$ref": "#/definitions/replacySuggestionItem"
195 |             }
196 |         },
197 |         "matchHookItem": {
198 |             "type": "object",
199 |             "properties": {
200 |                 "name": {
201 |                     "type": "string"
202 |                 },
203 |                 "args": {
204 |                     "oneOf": [
205 |                         {
206 |                             "type": "array"
207 |                         },
208 |                         {
209 |                             "type": "string"
210 |                         },
211 |                         {
212 |                             "type": "number"
213 |                         },
214 |                         {
215 |                             "type": "boolean"
216 |                         }
217 |                     ]
218 |                 },
219 |                 "kwargs": {
220 |                     "type": "object"
221 |                 },
222 |                 "match_if_predicate_is": {
223 |                     "type": "boolean"
224 |                 }
225 |             },
226 |             "required": [
227 |                 "name",
228 |                 "match_if_predicate_is"
229 |             ]
230 |         }
231 |     },
232 |     "patternProperties": {
233 |         "^[a-z_-][A-Za-z0-9_-]*$": {
234 |             "type": "object",
235 |             "properties": {
236 |                 "patterns": {
237 |                     "type": "array",
238 |                     "items": {
239 |                         "$ref": "#/definitions/replacyAttribute"
240 |                     },
241 |                     "minItems": 1
242 |                 },
243 |                 "suggestions": {
244 |                     "type": "array",
245 |                     "minItems": 0,
246 |                     "items": {
247 |                         "$ref": "#/definitions/replacySuggestion"
248 |                     }
249 |                 },
250 |                 "match_hook": {
251 |                     "type": "array",
252 |                     "minItems": 0,
253 |                     "items": {
254 |                         "$ref": "#/definitions/matchHookItem"
255 |                     }
256 |                 },
257 |                 "test": {
258 |                     "type": "object",
259 |                     "additionalProperties": false,
260 |                     "properties": {
261 |                         "positive": {
262 |                             "type": "array",
263 |                             "items": {
264 |                                 "type": "string"
265 |                             }
266 |                         },
267 |                         "negative": {
268 |                             "type": "array",
269 |                             "items": {
270 |                                 "type": "string"
271 |                             }
272 |                         }
273 |                     }
274 |                 },
275 |                 "description": {
276 |                     "type": "string"
277 |                 },
278 |                 "comment": {
279 |                     "type": "string"
280 |                 },
281 |                 "category": {
282 |                     "type": "string"
283 |                 }
284 |             },
285 |             "required": [
286 |                 "patterns",
287 |                 "suggestions"
288 |             ]
289 |         }
290 |     }
291 | }


--------------------------------------------------------------------------------
/replacy/resources/match_dict.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "extract-revenge": {
  3 |         "patterns": [
  4 |             [
  5 |                 {
  6 |                     "LEMMA": "extract",
  7 |                     "TEMPLATE_ID": 1
  8 |                 }
  9 |             ]
 10 |         ],
 11 |         "suggestions": [
 12 |             [
 13 |                 {
 14 |                     "TEXT": "exact",
 15 |                     "FROM_TEMPLATE_ID": 1
 16 |                 }
 17 |             ]
 18 |         ],
 19 |         "match_hook": [
 20 |             {
 21 |                 "name": "succeeded_by_phrase",
 22 |                 "args": "revenge",
 23 |                 "match_if_predicate_is": true
 24 |             }
 25 |         ],
 26 |         "test": {
 27 |             "positive": [
 28 |                 "And at the same time extract revenge on those he so despises?",
 29 |                 "Watch as Tampa Bay extracts revenge against his former Los Angeles Rams team."
 30 |             ],
 31 |             "negative": [
 32 |                 "Mother flavours her custards with lemon extract."
 33 |             ]
 34 |         }
 35 |     },
 36 |     "make-due": {
 37 |         "patterns": [
 38 |             [
 39 |                 {
 40 |                     "LEMMA": "make",
 41 |                     "TEMPLATE_ID": 1
 42 |                 },
 43 |                 {
 44 |                     "LOWER": "due"
 45 |                 }
 46 |             ]
 47 |         ],
 48 |         "suggestions": [
 49 |             [
 50 |                 {
 51 |                     "TEXT": "make",
 52 |                     "FROM_TEMPLATE_ID": 1
 53 |                 },
 54 |                 {
 55 |                     "TEXT": "do"
 56 |                 }
 57 |             ]
 58 |         ],
 59 |         "test": {
 60 |             "positive": [
 61 |                 "Viewers will have to make due with tired re-runs and second-rate movies."
 62 |             ],
 63 |             "negative": [
 64 |                 "The empty vessels make the greatest sound.",
 65 |                 "I'll make do.",
 66 |                 "She only has sons; she'll make dudes."
 67 |             ]
 68 |         },
 69 |         "comment": "this is an example comment",
 70 |         "description": "The expression is \"make do\".",
 71 |         "category": "R:VERB",
 72 |         "unexpected": "replaCy should handle arbitrary properties here, and attach them to the relevant spans"
 73 |     },
 74 |     "requirement": {
 75 |         "patterns": [
 76 |             [
 77 |                 {
 78 |                     "LEMMA": "requirement",
 79 |                     "POS": "NOUN",
 80 |                     "TEMPLATE_ID": 1
 81 |                 }
 82 |             ]
 83 |         ],
 84 |         "suggestions": [
 85 |             [
 86 |                 {
 87 |                     "TEXT": "need",
 88 |                     "FROM_TEMPLATE_ID": 1
 89 |                 }
 90 |             ]
 91 |         ],
 92 |         "match_hook": [
 93 |             {
 94 |                 "name": "part_of_compound",
 95 |                 "match_if_predicate_is": false
 96 |             },
 97 |             {
 98 |                 "name": "preceded_by_lemma",
 99 |                 "kwargs": {
100 |                     "lemma": "hello",
101 |                     "distance": 22
102 |                 },
103 |                 "match_if_predicate_is": false
104 |             }
105 |         ],
106 |         "test": {
107 |             "positive": [
108 |                 "The system has the following requirements: blood of a virgin, suffering, and cat food.",
109 |                 "Our immediate requirement is extra staff."
110 |             ],
111 |             "negative": [
112 |                 "There is a residency requirement for obtaining citizenship.",
113 |                 "What is the minimum entrance requirement for this course?"
114 |             ]
115 |         }
116 |     },
117 |     "lt-example": {
118 |         "patterns": [
119 |             [
120 |                 {
121 |                     "LOWER": {
122 |                         "IN": [
123 |                             "have",
124 |                             "has"
125 |                         ]
126 |                     }
127 |                 },
128 |                 {
129 |                     "TAG": {
130 |                         "IN": [
131 |                             "VBD",
132 |                             "VBP",
133 |                             "VB",
134 |                             "VBN"
135 |                         ]
136 |                     }
137 |                 },
138 |                 {
139 |                     "TAG": {
140 |                         "NOT_IN": [
141 |                             "VBG"
142 |                         ]
143 |                     }
144 |                 }
145 |             ]
146 |         ],
147 |         "suggestions": [
148 |             [
149 |                 {
150 |                     "PATTERN_REF": 0
151 |                 },
152 |                 {
153 |                     "PATTERN_REF": 1,
154 |                     "INFLECTION": "VBN"
155 |                 },
156 |                 {
157 |                     "PATTERN_REF": 2
158 |                 }
159 |             ]
160 |         ],
161 |         "description": "Possible agreement error -- use past participle here",
162 |         "test": {
163 |             "positive": [
164 |                 "I have eat this"
165 |             ],
166 |             "negative": [
167 |                 "I ate this"
168 |             ]
169 |         }
170 |     },
171 |     "assemble_attach_together": {
172 |         "comment": "Match the word together if it is a modifier of any form of assemble or attach, and suggest removing it",
173 |         "patterns": [
174 |             [
175 |                 {
176 |                     "LOWER": "together"
177 |                 }
178 |             ]
179 |         ],
180 |         "match_hook": [
181 |             {
182 |                 "name": "relative_x_is_y",
183 |                 "kwargs": {
184 |                     "children_or_ancestors": "ancestors",
185 |                     "pos_or_dep": "dep",
186 |                     "value": "ROOT"
187 |                 },
188 |                 "match_if_predicate_is": true
189 |             }
190 |         ],
191 |         "suggestions": [
192 |             [
193 |                 {
194 |                     "TEXT": ""
195 |                 }
196 |             ]
197 |         ],
198 |         "test": {
199 |             "positive": [
200 |                 "Avengers, assemble the team together!",
201 |                 "We assembled the furniture together."
202 |             ],
203 |             "negative": [
204 |                 "After we assemble, we can go together",
205 |                 "My arm is attached to my shoulder, I like that they are together."
206 |             ]
207 |         }
208 |     },
209 |     "effective_in_its_ability": {
210 |         "patterns": [
211 |             [
212 |                 {
213 |                     "LEMMA": "be",
214 |                     "TEMPLATE_ID": 1
215 |                 },
216 |                 {
217 |                     "LOWER": "effective"
218 |                 },
219 |                 {
220 |                     "LOWER": "in"
221 |                 },
222 |                 {
223 |                     "DEP": "poss"
224 |                 },
225 |                 {
226 |                     "LOWER": "ability"
227 |                 },
228 |                 {
229 |                     "LOWER": "to"
230 |                 },
231 |                 {
232 |                     "POS": "VERB"
233 |                 }
234 |             ]
235 |         ],
236 |         "suggestions": [
237 |             [
238 |                 {
239 |                     "TEXT": "effectively"
240 |                 },
241 |                 {
242 |                     "PATTERN_REF": 6,
243 |                     "FROM_TEMPLATE_ID": 1
244 |                 }
245 |             ]
246 |         ],
247 |         "comment": "You can use pattern_ref and from_template_id together",
248 |         "test": {
249 |             "positive": [
250 |                 "The pail was effective in its ability to carry water"
251 |             ],
252 |             "negative": [
253 |                 "The pail wasn't effective in its ability to carry water"
254 |             ]
255 |         }
256 |     },
257 |     "dupe-test": {
258 |         "patterns": [
259 |             [
260 |                 {
261 |                     "LEMMA": "make",
262 |                     "TEMPLATE_ID": 1
263 |                 }
264 |             ]
265 |         ],
266 |         "suggestions": [
267 |             [
268 |                 {
269 |                     "TEXT": "build",
270 |                     "FROM_TEMPLATE_ID": 1
271 |                 }
272 |             ]
273 |         ],
274 |         "comment": "This is a bad match, it is here to demonstrate overlap behavior",
275 |         "test": {
276 |             "positive": [
277 |                 "I will make something"
278 |             ],
279 |             "negative": [
280 |                 "I will build something"
281 |             ]
282 |         }
283 |     },
284 |     "all-caps": {
285 |         "patterns": [
286 |             [
287 |                 {
288 |                     "IS_UPPER": true,
289 |                     "TEXT": {
290 |                         "REGEX": "^[A-Z]{2,}$"
291 |                     },
292 |                     "OP": "+"
293 |                 },
294 |                 {
295 |                     "IS_LOWER": true,
296 |                     "OP": "*"
297 |                 }
298 |             ]
299 |         ],
300 |         "suggestions": [
301 |             [
302 |                 {
303 |                     "PATTERN_REF": 0,
304 |                     "REPLACY_OP": "LOWER"
305 |                 },
306 |                 {
307 |                     "PATTERN_REF": 1,
308 |                     "REPLACY_OP": "UPPER"
309 |                 }
310 |             ]
311 |         ],
312 |         "test": {
313 |             "positive": [
314 |                 "TENNIS is a lovely game.",
315 |                 "THIS IS SO SILLY",
316 |                 "THIS IS SO SILLY waay to go"
317 |             ],
318 |             "negative": [
319 |                 "this is so silly"
320 |             ]
321 |         }
322 |     }
323 | }
324 | 


--------------------------------------------------------------------------------
/replacy/resources/patterns_test_data.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "hook_name": "succeeded_by_phrase",
  4 |         "args": "up",
  5 |         "text": "Do not, for one repulse, give up the purpose that you resolved to effect.",
  6 |         "start": 7,
  7 |         "end": 8,
  8 |         "result": true
  9 |     },
 10 |     {
 11 |         "hook_name": "succeeded_by_phrase",
 12 |         "args": "up",
 13 |         "text": "Give as good as one gets.",
 14 |         "start": 0,
 15 |         "end": 1,
 16 |         "result": false
 17 |     },
 18 |     {
 19 |         "hook_name": "succeeded_by_phrase",
 20 |         "args": [
 21 |             "height",
 22 |             "value",
 23 |             "size",
 24 |             "number"
 25 |         ],
 26 |         "text": "He was only five feet nine, the minimum height for a policeman.",
 27 |         "start": 8,
 28 |         "end": 9,
 29 |         "result": true
 30 |     },
 31 |     {
 32 |         "hook_name": "succeeded_by_phrase",
 33 |         "args": [
 34 |             "height",
 35 |             "value",
 36 |             "size",
 37 |             "number"
 38 |         ],
 39 |         "text": "Raising the minimum wage would ratchet up real incomes in general.",
 40 |         "start": 2,
 41 |         "end": 3,
 42 |         "result": false
 43 |     },
 44 |     {
 45 |         "hook_name": "succeeded_by_phrase",
 46 |         "args": [
 47 |             "height",
 48 |             "value",
 49 |             "size",
 50 |             "number"
 51 |         ],
 52 |         "text": "You must get a minimum of 60 questions right to pass the examination.",
 53 |         "start": 4,
 54 |         "end": 5,
 55 |         "result": false
 56 |     },
 57 |     {
 58 |         "hook_name": "succeeded_by_pos",
 59 |         "args": "ADV",
 60 |         "text": "Their car was bigger and therefore more comfortable.",
 61 |         "start": 3,
 62 |         "end": 5,
 63 |         "result": true
 64 |     },
 65 |     {
 66 |         "hook_name": "succeeded_by_pos",
 67 |         "args": [
 68 |             "PRON",
 69 |             "DET"
 70 |         ],
 71 |         "text": "You do it.",
 72 |         "start": 1,
 73 |         "end": 2,
 74 |         "result": true
 75 |     },
 76 |     {
 77 |         "hook_name": "succeeded_by_pos",
 78 |         "args": [
 79 |             "PRON",
 80 |             "DET"
 81 |         ],
 82 |         "text": "She does a dance.",
 83 |         "start": 1,
 84 |         "end": 2,
 85 |         "result": true
 86 |     },
 87 |     {
 88 |         "hook_name": "succeeded_by_pos",
 89 |         "args": [
 90 |             "PRON",
 91 |             "DET"
 92 |         ],
 93 |         "text": "I do fun things.",
 94 |         "start": 1,
 95 |         "end": 2,
 96 |         "result": false
 97 |     },
 98 |     {
 99 |         "hook_name": "succeeded_by_pos",
100 |         "args": "ADJ",
101 |         "text": "Their car was bigger and therefore more comfortable.",
102 |         "start": 3,
103 |         "end": 5,
104 |         "result": false
105 |     },
106 |     {
107 |         "hook_name": "preceded_by_pos",
108 |         "args": "AUX",
109 |         "text": "Their car was bigger and therefore more comfortable.",
110 |         "start": 3,
111 |         "end": 4,
112 |         "result": true
113 |     },
114 |     {
115 |         "hook_name": "succeeded_by_dep",
116 |         "args": "conj",
117 |         "text": "Their car was bigger and therefore more comfortable.",
118 |         "start": 6,
119 |         "end": 7,
120 |         "result": true
121 |     },
122 |     {
123 |         "hook_name": "succeeded_by_dep",
124 |         "args": "punct",
125 |         "text": "Their car was bigger and therefore more comfortable.",
126 |         "start": 6,
127 |         "end": 7,
128 |         "result": false
129 |     },
130 |     {
131 |         "hook_name": "preceded_by_dep",
132 |         "args": "advmod",
133 |         "text": "Their car was bigger and therefore more comfortable.",
134 |         "start": 7,
135 |         "end": 8,
136 |         "result": true
137 |     },
138 |     {
139 |         "hook_name": "preceded_by_dep",
140 |         "args": "cc",
141 |         "text": "Their car was bigger and therefore more comfortable.",
142 |         "start": 7,
143 |         "end": 8,
144 |         "result": false
145 |     },
146 |     {
147 |         "hook_name": "preceded_by_dep",
148 |         "args": [
149 |             "ROOT",
150 |             "mark"
151 |         ],
152 |         "text": "If Sam wants to party; let him",
153 |         "start": 1,
154 |         "end": 2,
155 |         "result": true
156 |     },
157 |     {
158 |         "hook_name": "preceded_by_dep",
159 |         "args": [
160 |             "ROOT",
161 |             "mark"
162 |         ],
163 |         "text": "My name is Sam.",
164 |         "start": 3,
165 |         "end": 4,
166 |         "result": true
167 |     },
168 |     {
169 |         "hook_name": "preceded_by_dep",
170 |         "args": [
171 |             "ROOT",
172 |             "mark"
173 |         ],
174 |         "text": "That's called a 'beer', Sam.",
175 |         "start": 8,
176 |         "end": 9,
177 |         "result": false
178 |     },
179 |     {
180 |         "hook_name": "part_of_compound",
181 |         "args": null,
182 |         "text": "Our immediate requirement is extra staff.",
183 |         "start": 2,
184 |         "end": 3,
185 |         "result": false
186 |     },
187 |     {
188 |         "hook_name": "part_of_compound",
189 |         "args": null,
190 |         "text": "There is a residency requirement for obtaining citizenship.",
191 |         "start": 4,
192 |         "end": 5,
193 |         "result": true
194 |     },
195 |     {
196 |         "hook_name": "surrounded_by_phrase",
197 |         "args": ",",
198 |         "text": "The dog is young, well trained, and good natured.",
199 |         "start": 5,
200 |         "end": 7,
201 |         "result": true
202 |     },
203 |     {
204 |         "hook_name": "surrounded_by_phrase",
205 |         "args": ",",
206 |         "text": "The dog is young, well trained, and good natured.",
207 |         "start": 5,
208 |         "end": 6,
209 |         "result": false
210 |     },
211 |     {
212 |         "hook_name": "succeeded_by_num",
213 |         "args": null,
214 |         "text": "The area is approximately 100 square kilometers.",
215 |         "start": 3,
216 |         "end": 4,
217 |         "result": true
218 |     },
219 |     {
220 |         "hook_name": "succeeded_by_num",
221 |         "args": null,
222 |         "text": "The pies have a shelf life of approximately one week.",
223 |         "start": 7,
224 |         "end": 8,
225 |         "result": true
226 |     },
227 |     {
228 |         "hook_name": "succeeded_by_num",
229 |         "args": null,
230 |         "text": "The conservatory measures approximately 13ft x 16ft.",
231 |         "start": 3,
232 |         "end": 4,
233 |         "result": true
234 |     },
235 |     {
236 |         "hook_name": "succeeded_by_num",
237 |         "args": null,
238 |         "text": "Officials at the school say they received a bomb threat at approximately 11:30 a.m. today.",
239 |         "start": 11,
240 |         "end": 12,
241 |         "result": true
242 |     },
243 |     {
244 |         "hook_name": "succeeded_by_num",
245 |         "args": null,
246 |         "text": "One pound is approximately equal to 454 grams.",
247 |         "start": 3,
248 |         "end": 4,
249 |         "result": false
250 |     },
251 |     {
252 |         "hook_name": "succeeded_by_num",
253 |         "args": null,
254 |         "text": "The village has approximately doubled in size since 1960.",
255 |         "start": 4,
256 |         "end": 5,
257 |         "result": false
258 |     },
259 |     {
260 |         "hook_name": "succeeded_by_num",
261 |         "args": null,
262 |         "text": "Gain got by a lie will burn one’s fingers.",
263 |         "start": 7,
264 |         "end": 8,
265 |         "result": false
266 |     },
267 |     {
268 |         "hook_name": "succeeded_by_currency",
269 |         "args": null,
270 |         "text": "Approximately $150 million is to be spent on improvements.",
271 |         "start": 0,
272 |         "end": 1,
273 |         "result": true
274 |     },
275 |     {
276 |         "hook_name": "succeeded_by_currency",
277 |         "args": null,
278 |         "text": "I paid them £100 for the damage and I hope that's the last I'll hear of it.",
279 |         "start": 2,
280 |         "end": 3,
281 |         "result": true
282 |     },
283 |     {
284 |         "hook_name": "succeeded_by_currency",
285 |         "args": null,
286 |         "text": "I have debited ~100 against your account.",
287 |         "start": 2,
288 |         "end": 3,
289 |         "result": false
290 |     },
291 |     {
292 |         "hook_name": "relative_x_is_y",
293 |         "kwargs": {
294 |             "children_or_ancestors": "children",
295 |             "pos_or_dep": "dep",
296 |             "value": "csubj"
297 |         },
298 |         "text": "Your condition is serious and requires surgery.",
299 |         "start": 5,
300 |         "end": 6,
301 |         "result": false
302 |     },
303 |     {
304 |         "hook_name": "relative_x_is_y",
305 |         "kwargs": {
306 |             "children_or_ancestors": "children",
307 |             "pos_or_dep": "dep",
308 |             "value": "csubj"
309 |         },
310 |         "text": "I require stimulants to function.",
311 |         "start": 1,
312 |         "end": 2,
313 |         "result": false
314 |     },
315 |     {
316 |         "hook_name": "relative_x_is_y",
317 |         "kwargs": {
318 |             "children_or_ancestors": "children",
319 |             "pos_or_dep": "dep",
320 |             "value": "csubj"
321 |         },
322 |         "text": "Deciphering the code requires an expert.",
323 |         "start": 3,
324 |         "end": 4,
325 |         "result": true
326 |     },
327 |     {
328 |         "hook_name": "relative_x_is_y",
329 |         "kwargs": {
330 |             "children_or_ancestors": "children",
331 |             "pos_or_dep": "dep",
332 |             "value": "csubj"
333 |         },
334 |         "text": "Making small models requires manual skill.",
335 |         "start": 3,
336 |         "end": 4,
337 |         "result": true
338 |     },
339 |     {
340 |         "hook_name": "part_of_phrase",
341 |         "args": "hungry for apples",
342 |         "text": "he seems really hungry for apples today",
343 |         "start": 5,
344 |         "end": 6,
345 |         "result": true
346 |     },
347 |     {
348 |         "hook_name": "part_of_phrase",
349 |         "args": "hungry for apples",
350 |         "text": "he seems really hungry for some apples today",
351 |         "start": 6,
352 |         "end": 7,
353 |         "result": false
354 |     },
355 |     {
356 |         "hook_name": "part_of_phrase",
357 |         "args": "hungry for apples today",
358 |         "text": "he seems really hungry for apples today",
359 |         "start": 5,
360 |         "end": 6,
361 |         "result": true
362 |     },
363 |     {
364 |         "hook_name": "part_of_phrase",
365 |         "args": "hungry for apples today apples",
366 |         "text": "he seems really hungry for apples today apples",
367 |         "start": 5,
368 |         "end": 6,
369 |         "result": true
370 |     },
371 |     {
372 |         "hook_name": "part_of_phrase",
373 |         "args": "hungry for apples today apples",
374 |         "text": "he seems really hungry for apples today apples",
375 |         "start": 7,
376 |         "end": 8,
377 |         "result": true
378 |     },
379 |     {
380 |         "hook_name": "sentence_has",
381 |         "args": [
382 |             "rick",
383 |             "morty",
384 |             "jerry",
385 |             "wubba lubba dub dub"
386 |         ],
387 |         "text": "I turned myself into a pickle. I'm pickle rick!",
388 |         "start": 1,
389 |         "end": 2,
390 |         "result": true
391 |     },
392 |     {
393 |         "hook_name": "sentence_has",
394 |         "args": [
395 |             "rick",
396 |             "morty",
397 |             "jerry",
398 |             "wubba lubba dub dub"
399 |         ],
400 |         "text": "I turned myself into a pickle. I'm pickle rick!",
401 |         "start": 7,
402 |         "end": 8,
403 |         "result": true
404 |     },
405 |     {
406 |         "hook_name": "sentence_has",
407 |         "args": [
408 |             "rick",
409 |             "morty",
410 |             "jerry",
411 |             "wubba lubba dub dub"
412 |         ],
413 |         "text": "I turned myself into a pickle. I'm pickle rick!",
414 |         "start": 8,
415 |         "end": 9,
416 |         "result": true
417 |     },
418 |     {
419 |         "hook_name": "sentence_has",
420 |         "args": [
421 |             "rick",
422 |             "morty",
423 |             "jerry",
424 |             "wubba lubba dub dub"
425 |         ],
426 |         "text": "Wubba lubba dub dub means I am in great pain, please help me",
427 |         "start": 6,
428 |         "end": 7,
429 |         "result": true
430 |     }
431 | ]


--------------------------------------------------------------------------------
/replacy/suggestion.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import warnings
  3 | 
  4 | from functional import seq
  5 | 
  6 | from replacy.inflector import Inflector
  7 | from replacy.ref_matcher import RefMatcher
  8 | 
  9 | 
 10 | class SuggestionGenerator:
 11 |     def __init__(
 12 |         self, nlp, forms_lookup=None, filter_suggestions=False, default_max_count=None
 13 |     ):
 14 |         self.forms_lookup = forms_lookup
 15 |         self.inflector = Inflector(nlp=nlp, forms_lookup=self.forms_lookup)
 16 |         self.ref_matcher = (RefMatcher())
 17 |         self.filter_suggestions = filter_suggestions
 18 |         self.default_max_count = default_max_count
 19 | 
 20 |     @staticmethod
 21 |     def get_options(item, doc, start, end, pattern, pattern_ref):
 22 |         item_options = []
 23 |         # set
 24 |         if "TEXT" in item:
 25 |             if isinstance(item["TEXT"], dict):
 26 |                 item_options = item["TEXT"].get("IN", [])
 27 |             elif isinstance(item["TEXT"], str):
 28 |                 item_options = [item["TEXT"]]
 29 |         # copy
 30 |         elif "PATTERN_REF" in item:
 31 |             ref = int(item["PATTERN_REF"])
 32 |             if ref >= 0:
 33 |                 try:
 34 |                     refd_text = None
 35 |                     if ref in pattern_ref:
 36 |                         refd_tokens = pattern_ref[ref]
 37 |                         if len(refd_tokens):
 38 |                             min_i = start + min(refd_tokens)
 39 |                             max_i = start + max(refd_tokens)
 40 |                             refd_text = doc[min_i : max_i + 1].text
 41 |                 except:
 42 |                     warnings.warn(
 43 |                         f"Ref matcher failed for span {doc[start:end]} and {pattern_ref}."
 44 |                     )
 45 |                     refd_text = doc[start + ref].text
 46 |             else:
 47 |                 # this is confusing. Example:
 48 |                 # doc = nlp("I like apples, blood oranges, and bananas")
 49 |                 # start = 2, end = 9 gives doc[start:end] == "apples, blood oranges, and bananas"
 50 |                 # but doc[9] != "bananas", it is an IndexError, the last token is end-1
 51 |                 # so, per python conventions, PATTERN_REF = -1 would mean the last matched token
 52 |                 # so we can just add ref and end if ref is negative
 53 |                 # to do: match again to get multi-token
 54 |                 try:
 55 |                     # map ref to positive
 56 |                     ref = len(pattern_ref) + ref
 57 |                     refd_tokens = pattern_ref[ref]
 58 |                     if len(refd_tokens):
 59 |                         min_i = start + min(refd_tokens)
 60 |                         max_i = start + max(refd_tokens)
 61 |                         refd_text = doc[min_i : max_i + 1].text
 62 |                     else:
 63 |                         refd_text = None
 64 |                 except:
 65 |                     warnings.warn(
 66 |                         f"Ref matcher failed for span {doc[start:end]} and {pattern_ref}."
 67 |                     )
 68 |                     refd_text = doc[end + ref].text
 69 | 
 70 |             if refd_text:
 71 |                 if "REGEX" in item:
 72 |                     regex_p = pattern[item["PATTERN_REF"]]
 73 |                     # regex is with ignore case flag
 74 |                     # so having this line to avoid exception when LOWER isn't in the pattern
 75 |                     # if at any point needed to be specific or use case sensitive
 76 |                     # we should add "REGEX_KEY" (TEXT or LOWER) in suggestions
 77 |                     regex_pattern = (
 78 |                         regex_p["LOWER"]["REGEX"]
 79 |                         if "LOWER" in regex_p
 80 |                         else regex_p["TEXT"]["REGEX"]
 81 |                     )
 82 |                     regex_replace = item["REGEX"]
 83 |                     refd_text = re.sub(
 84 |                         regex_pattern, regex_replace, refd_text, flags=re.IGNORECASE
 85 |                     )
 86 | 
 87 |                 if "SUFFIX" in item:
 88 |                     refd_text += item["SUFFIX"]
 89 | 
 90 |                 item_options = [refd_text]
 91 |             else:
 92 |                 item_options = []
 93 | 
 94 |         return item_options
 95 | 
 96 |     def get_item_max_count(self, item, item_options):
 97 | 
 98 |         # max count can be hard set in match_dict
 99 |         max_count = item.get("MAX_COUNT", None)
100 |         if max_count:
101 |             return max_count
102 | 
103 |         # can be soft set by default
104 |         # but no more than possible - ex. list len
105 |         # or maximal ie. list len
106 |         if self.default_max_count:
107 |             max_count = min(self.default_max_count, len(item_options))
108 |         else:
109 |             max_count = len(item_options)
110 | 
111 |         # if we don't want to guess max count
112 |         # to eliminate grammatical variants
113 |         # end here
114 |         if not self.filter_suggestions:
115 |             return max_count
116 | 
117 |         # if max count is not hard set
118 |         # try to lower max count in special cases (A - G)
119 |         # to eliminate non grammatical suggestions
120 | 
121 |         # A. empty
122 |         # ex. []
123 |         if not len(item_options):
124 |             return 1
125 | 
126 |         # B. contains non letters
127 |         # ex. ["", ","]
128 |         if not all([o.isalpha() for o in item_options]):
129 |             return 1
130 | 
131 |         # C. is multi token
132 |         # ex. ["in a", "for"]
133 |         if max([len(o.split()) for o in item_options]) > 1:
134 |             return 1
135 | 
136 |         # D. if inflection is set to tag - good
137 |         # other options - will always return many
138 |         if "INFLECTION" in item:
139 |             inflection = item.get("INFLECTION")
140 |             inflection_type = self.inflector.get_inflection_type(inflection)
141 |             if inflection_type != "tag":
142 |                 return 1
143 | 
144 |         # contains many options
145 |         # ex. ["eat", "walk"]
146 |         if len(item_options) > 1:
147 | 
148 |             # E. contains words of the same lemma
149 |             # ex. [slow, slowly]
150 |             lemmas = set([])
151 |             for option in item_options:
152 |                 option_lemmas = set(self.inflector.get_lemmas(option))
153 |                 if len(lemmas & option_lemmas):
154 |                     return 1
155 |                 lemmas |= option_lemmas
156 | 
157 |             # F. det:
158 |             # ex. ["a", "an"]
159 |             if any([article in item_options for article in ["a", "an", "the"]]):
160 |                 return 1
161 | 
162 |             # G. irregular plurals - only 2 detected so hardcoded
163 |             # person / people
164 |             # ox / oxen
165 |             if all([el in item_options for el in ["person", "people"]]) or all(
166 |                 [el in item_options for el in ["ox", "oxen"]]
167 |             ):
168 |                 return 1
169 | 
170 |         return max_count
171 | 
172 |     def inflect(self, item, item_options, pattern, pattern_ref, doc, start, end):
173 |         # set
174 |         if "INFLECTION" in item:
175 |             inflection_value = item["INFLECTION"]
176 |             inflection_type = self.inflector.get_inflection_type(inflection_value)
177 |             if inflection_type == "pos":
178 |                 # set by pos
179 |                 item_options = (
180 |                     seq(item_options)
181 |                     .map(
182 |                         lambda x: self.inflector.inflect_or_lookup(
183 |                             x, pos=inflection_value
184 |                         )
185 |                     )
186 |                     .flatten()
187 |                     .list()
188 |                 )
189 |             elif inflection_type == "tag":
190 |                 # set by tag
191 |                 item_options = (
192 |                     seq(item_options)
193 |                     .map(
194 |                         lambda x: self.inflector.inflect_or_lookup(
195 |                             x, tag=inflection_value
196 |                         )
197 |                     )
198 |                     .flatten()
199 |                     .list()
200 |                 )
201 |             else:
202 |                 # get all forms
203 |                 item_options = (
204 |                     seq(item_options)
205 |                     .map(lambda x: self.inflector.inflect_or_lookup(x, pos=None))
206 |                     .flatten()
207 |                     .list()
208 |                 )
209 |         # copy
210 |         elif "FROM_TEMPLATE_ID" in item:
211 |             template_id = int(item["FROM_TEMPLATE_ID"])
212 |             index = None
213 |             for i, token in enumerate(pattern):
214 |                 if "TEMPLATE_ID" in token and token["TEMPLATE_ID"] == template_id:
215 |                     index = i
216 |                     break
217 | 
218 |             # use token <-> pattern mapping
219 |             # given pattern index, find doc index:
220 |             doc_indices = pattern_ref[index]
221 |             if len(doc_indices) == 0:
222 |                 # fallback to direct mapping:
223 |                 warnings.warn(
224 |                     f"Ref matcher failed for span {doc[start:end]} and {pattern_ref}."
225 |                 )
226 |                 doc_index = index
227 |             elif len(doc_indices) >= 1:
228 |                 # == 1 good case
229 |                 # >1 more tokens found, fallback to the first token
230 |                 doc_index = doc_indices[0]
231 | 
232 |             if doc_index is not None:
233 |                 item_options = (
234 |                     seq(item_options)
235 |                     .map(
236 |                         lambda x: self.inflector.auto_inflect(doc, x, start + doc_index)
237 |                     )
238 |                     .flatten()
239 |                     .list()
240 |                 )
241 |         return item_options
242 | 
243 |     def case(self, item, item_options):
244 |         # This should probably be a list of ops
245 |         # and we should have a parser class
246 |         if "REPLACY_OP" in item:
247 |             op = item["REPLACY_OP"]
248 |             if op == "LOWER":
249 |                 item_options = [t.lower() for t in item_options]
250 |             if op == "TITLE":
251 |                 item_options = [t.title() for t in item_options]
252 |             if op == "UPPER":
253 |                 item_options = [t.upper() for t in item_options]
254 |         return item_options
255 | 
256 |     def __call__(
257 |         self, pre_suggestion, doc, start, end, pattern, pre_suggestion_id, alignments
258 |     ):
259 |         """
260 |         Suggestion text:
261 |             - set: "TEXT": "cat"
262 |             - choose one from: "TEXT": {"IN": ["a", "b"]}
263 |             - copy from pattern: "PATTERN_REF": 3 (copy from 3rd pattern match)
264 |         Set suggestion text inflection:
265 |             - set by tag: "INFLECTION": "VBG" (returns one)
266 |             - set by pos: "INFLECTION": "NOUN" (returns many. ex. NNS, NN)
267 |             - get all: "INFLECTION": "ALL" (returns a lot, use infrequently)
268 |             - copy from pattern: "FROM_TEMPLATE_ID": 2 (copy from token with "TEMPLATE_ID":2)
269 |         Suggestions case matching:
270 |             - lowercase: "REPLACY_OP: "LOWER"
271 |             - title: "REPLACY_OP: "TITLE"
272 |             - upper: "REPLACY_OP: "UPPER"
273 |         Suggestions item max count:
274 |             - set by tag: "MAX_COUNT": n (int) (take best n words from options)
275 |             - implied MAX_COUNT = 1 if words share the same lemma or are mutually exclusive, ex. a/an
276 |         """
277 |         # get token <-> pattern correspondence
278 |         pattern_obj = pattern[0]
279 |         pattern_ref = self.ref_matcher(doc[start:end], pattern_obj, alignments)
280 | 
281 |         suggestions = []
282 | 
283 |         for item in pre_suggestion:
284 |             # get text
285 |             item_options = SuggestionGenerator.get_options(
286 |                 item, doc, start, end, pattern_obj, pattern_ref
287 |             )
288 | 
289 |             # guess or read max count count
290 |             max_count = self.get_item_max_count(item, item_options)
291 | 
292 |             # inflect
293 |             inflected_options = self.inflect(
294 |                 item, item_options, pattern_obj, pattern_ref, doc, start, end
295 |             )
296 | 
297 |             # case
298 |             cased_options = self.case(item, inflected_options)
299 | 
300 |             # if non empty (can be when matching with OP)
301 |             if len(cased_options):
302 |                 suggestion_variant = SuggestionVariants(
303 |                     cased_options, max_count, pre_suggestion_id
304 |                 )
305 |                 suggestions.append(suggestion_variant)
306 | 
307 |         return suggestions
308 | 
309 | 
310 | class SuggestionVariants:
311 |     def __init__(self, cased_options, max_count, id):
312 |         self.cased_options = cased_options
313 |         self.max_count = max_count
314 |         self.id = id
315 | 
316 |     def __len__(self):
317 |         return len(self.cased_options)
318 | 
319 |     def __repr__(self):
320 |         return f'(cased_options={",".join(self.cased_options)}, max_count={self.max_count}, id={self.id})'
321 | 
322 |     def __iter__(self):
323 |         for option in self.cased_options:
324 |             yield Suggestion(option, self.max_count, self.id)
325 | 
326 | 
327 | class Suggestion:
328 |     def __init__(self, text, max_count, id):
329 |         self.text = text
330 |         self.max_count = max_count
331 |         self.id = id
332 | 
333 |     def __repr__(self):
334 |         return f"(text={self.text}, max_count={self.max_count}, id={self.id})"
335 | 


--------------------------------------------------------------------------------
/replacy/__init__.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import itertools
  3 | import logging
  4 | import warnings
  5 | from types import ModuleType
  6 | from typing import Callable, List, Optional, Tuple
  7 | 
  8 | from functional import seq
  9 | from spacy.matcher import Matcher
 10 | from spacy.tokens import Span
 11 | from spacy.tokens.underscore import get_ext_args
 12 | 
 13 | from replacy import default_match_hooks
 14 | from replacy.db import get_forms_lookup, get_match_dict, load_lm
 15 | from replacy.default_scorer import Scorer
 16 | from replacy.suggestion import SuggestionGenerator
 17 | from replacy.suggestion_joiner import join_suggestions
 18 | from replacy.util import (
 19 |     at_most_one_is_not_none,
 20 |     attach_debug_hook,
 21 |     eliminate_options,
 22 |     get_novel_prop_defaults,
 23 |     get_predicates,
 24 |     make_doc_if_not_doc,
 25 |     set_known_extensions,
 26 |     validate_match_dict
 27 | )
 28 | from replacy.version import __version__
 29 | 
 30 | logging.basicConfig(level=logging.INFO)
 31 | 
 32 | PipelineComponent = Callable[[List[Span]], List[Span]]
 33 | 
 34 | 
 35 | class ESpan(Span):
 36 |     """
 37 |     dangerous version of Span class
 38 |     intentionally bypass the _ attribute so that the class itself has all the properties
 39 |     this can result in name collisions, etc
 40 | 
 41 |     Why use it? there are cases where overlapping spans cause problems for the built in spacy.tokens.Span
 42 |     but for some reason this works
 43 |     """
 44 | 
 45 |     def __getattribute__(self, name):
 46 |         """
 47 |         when python attempts to access to underscore property, don't let it, give it self
 48 |         this means that:
 49 | 
 50 |         ```python
 51 |         >>> doc = nlp("She extracts revenge.")
 52 |         >>> es = ESpan(doc, 1, 2)
 53 |         >>> e._.comment = "yo metaprogramming"
 54 |         >>> e.comment
 55 |         'yo metaprogramming'
 56 |         ```
 57 |         """
 58 |         if name == "_":
 59 |             return self
 60 |         return super().__getattribute__(name)
 61 | 
 62 |     @classmethod
 63 |     def set_extension(cls, name, **kwargs):
 64 |         # if we only want to allow default values, this works:
 65 |         default, method, getter, setter = get_ext_args(**kwargs)
 66 |         setattr(cls, name, default)
 67 |         # if we want to allow getters and setters or methods for dynamic props, we have to implement that
 68 |         # I think it is doable using the `property` built-in method as shown here
 69 |         # https://stackoverflow.com/a/1355444/3518108
 70 | 
 71 |     @classmethod
 72 |     def has_extension(cls, name):
 73 |         return hasattr(cls, name)
 74 | 
 75 | 
 76 | class ReplaceMatcher:
 77 |     """
 78 |     The main unit of functionality. Instantiate with `nlp`, (an instance of spaCy) and a match dict.
 79 |     Usage example, including a module of custom match hooks:
 80 | 
 81 |     ```python
 82 |         from replacy import ReplaceMatcher
 83 |         from replacy.db import load_json
 84 |         import spacy
 85 | 
 86 |         import my.custom_hooks as ch  # suppose this suggests `excepts=>accepts` under some conditions
 87 | 
 88 | 
 89 |         nlp = spacy.load("en_core_web_sm")
 90 |         rmatch_dict = load_json("./resources/match_dict.json")
 91 |         rmatcher = ReplaceMatcher(nlp, rmatch_dict, custom_match_hooks=ch)
 92 |         span = rmatcher("She excepts her fate.")[0]
 93 |         span._.suggestions
 94 |         # >>> ['accepts']
 95 |     ```
 96 |     """
 97 | 
 98 |     validate_match_dict = validate_match_dict
 99 | 
100 |     def __init__(
101 |         self,
102 |         nlp,
103 |         match_dict=None,
104 |         forms_lookup=None,
105 |         custom_match_hooks: Optional[ModuleType] = None,
106 |         allow_multiple_whitespaces=False,
107 |         max_suggestions_count=1000,
108 |         lm_path=None,
109 |         filter_suggestions=False,
110 |         default_max_count=None,
111 |         debug=False,
112 |         SpanClass=Span,
113 |     ):
114 |         self.debug = debug
115 |         # self.extended_span = extended_span
116 |         self.Span = SpanClass
117 |         self.logger = logging.getLogger("replaCy")
118 |         self.default_match_hooks = default_match_hooks
119 |         self.custom_match_hooks = custom_match_hooks
120 |         self.nlp = nlp
121 |         self.match_dict = match_dict if match_dict else get_match_dict()
122 |         if self.debug:
123 |             self.match_dict = attach_debug_hook(self.match_dict)
124 |         self.allow_multiple_whitespaces = allow_multiple_whitespaces
125 |         self.matcher = Matcher(self.nlp.vocab)
126 |         self.predicates = {}
127 |         self._init_matcher()
128 |         self.spans: List[Span] = []
129 |         self.max_suggestions_count = max_suggestions_count
130 |         self.forms_lookup = forms_lookup if forms_lookup else get_forms_lookup()
131 |         self.suggestion_gen = SuggestionGenerator(
132 |             nlp, forms_lookup, filter_suggestions, default_max_count
133 |         )
134 |         expected_properties = set_known_extensions(self.Span)
135 |         self.novel_prop_defaults = get_novel_prop_defaults(
136 |             self.match_dict, self.Span, expected_properties
137 |         )
138 |         self._set_scorer(lm_path)
139 |         # Pipeline doesn't include matcher, since doesn't have the signature List[Span] -> None
140 |         self.pipeline: List[Tuple[str, PipelineComponent]] = [
141 |             ("sorter", self.scorer.sort_suggestions),
142 |             ("filter", self.max_count_filter),
143 |             ("joiner", join_suggestions),
144 |         ]
145 | 
146 |     @classmethod
147 |     def with_espan(cls, *args, **kwargs):
148 |         return cls(*args, **kwargs, SpanClass=ESpan)
149 | 
150 |     def _init_matcher(self):
151 |         for match_name, ps in self.match_dict.items():
152 |             patterns = copy.deepcopy(ps["patterns"])
153 | 
154 |             patterns = self._allow_multiple_whitespaces(patterns)
155 |             patterns = self._remove_unsupported(patterns)
156 | 
157 |             match_hooks = ps.get("match_hook", [])
158 |             self.predicates[match_name] = get_predicates(
159 |                 match_hooks, self.default_match_hooks, self.custom_match_hooks
160 |             )
161 |             self.matcher.add(match_name, patterns)
162 | 
163 |     @staticmethod
164 |     def _fix_alignment_multiple_whitespaces(alignments):
165 |         return [int(a / 2) for a in alignments]
166 | 
167 |     @staticmethod
168 |     def _allow_multiple_whitespaces(patterns):
169 |         """
170 |         allow matching tokens separated by multiple whitespaces
171 |         they may appear after normalizing nonstandard whitespaces
172 |         ex. "Here␣is␣a\u180E\u200Bproblem." -> "Here␣is␣a␣␣problem."
173 |         pattern can be preceded and followed by whitespace tokens
174 |         to keep preceded_by... with and succeeded_by... with match hooks working
175 |         """
176 |         if True:
177 |             white_pattern = {"IS_SPACE": True, "OP": "?"}
178 |             normalized_patterns = []
179 |             for pattern in patterns:
180 |                 normalized_pattern = [white_pattern]
181 |                 for p in pattern:
182 |                     normalized_pattern += [p, white_pattern]
183 |                 normalized_patterns.append(normalized_pattern)
184 |             patterns = normalized_patterns
185 |         return patterns
186 | 
187 |     @staticmethod
188 |     def _remove_unsupported(patterns):
189 |         # remove custom attributes not supported by spaCy Matcher
190 |         for pattern in patterns:
191 |             for p in pattern:
192 |                 if "TEMPLATE_ID" in p:
193 |                     del p["TEMPLATE_ID"]
194 |         return patterns
195 | 
196 |     def _callback(self, doc, match):
197 |         match_id, start, end, alignments = match
198 |         alignments = ReplaceMatcher._fix_alignment_multiple_whitespaces(alignments)
199 | 
200 |         match_name = self.nlp.vocab[match_id].text
201 | 
202 |         for pred in self.predicates[match_name]:
203 |             try:
204 |                 if pred(doc, start, end):
205 |                     return None
206 |             except IndexError:
207 |                 break
208 | 
209 |         span = self.Span(doc, start, end)
210 | 
211 |         # find in match_dict if needed
212 |         span._.match_name = match_name
213 | 
214 |         pre_suggestions = self.match_dict[match_name]["suggestions"]
215 | 
216 |         span._.suggestions = []
217 | 
218 |         for i, x in enumerate(pre_suggestions):
219 |             span._.suggestions += self.process_suggestions(
220 |                 x, doc, start, end, match_name, i, alignments
221 |             )
222 | 
223 |         for novel_prop, default_value in self.novel_prop_defaults.items():
224 |             setattr(
225 |                 span._,
226 |                 novel_prop,
227 |                 self.match_dict[match_name].get(novel_prop, default_value),
228 |             )
229 |         self.spans.append(span)
230 | 
231 |     def _set_scorer(self, lm_path):
232 |         # The following is not ideal
233 |         # We should update replaCy to accept a Scorer as a parameter
234 |         if lm_path:
235 |             from replacy.scorer import KenLMScorer
236 | 
237 |             self.scorer: Scorer = KenLMScorer(nlp=self.nlp, model=load_lm(lm_path))
238 |         else:
239 |             self.scorer = Scorer()
240 | 
241 |     def max_count_filter(self, spans: List[Span]) -> List[Span]:
242 |         # for each span, reduce number of suggestions
243 |         # based on max_count of each suggestion text item
244 |         # assumption - elements are already sorted
245 |         for span in spans:
246 |             suggestions = span._.suggestions
247 |             if len(suggestions):
248 |                 rest = suggestions
249 |                 chosen = []
250 | 
251 |                 while len(rest):
252 |                     elem = rest[0]
253 |                     rest = rest[1:]
254 | 
255 |                     # the first element in rest
256 |                     # not eliminated => good
257 |                     chosen.append(elem)
258 |                     rest = eliminate_options(elem, chosen, rest)
259 | 
260 |                 # log matched span and filtered out suggestions
261 |                 if self.debug:
262 | 
263 |                     self.logger.info(
264 |                         f"{span._.match_name} matched '{span.text}' token indices {span.start}:{span.end}"
265 |                     )
266 |                     self.logger.info(f"Accepted suggestions: {chosen}")
267 | 
268 |                     suggestions_diff = [f for f in suggestions if f not in chosen]
269 |                     if len(suggestions_diff):
270 |                         self.logger.info(f"Ignored suggestions: {suggestions_diff}")
271 | 
272 |                 span._.suggestions = chosen
273 |         return spans
274 | 
275 |     def process_suggestions(
276 |         self, pre_suggestion, doc, start, end, match_name, pre_suggestion_id, alignments
277 |     ):
278 |         # get token <-> pattern correspondence
279 |         pattern = self.match_dict[match_name]["patterns"]
280 | 
281 |         suggestion_variants = self.suggestion_gen(
282 |             pre_suggestion, doc, start, end, pattern, pre_suggestion_id, alignments
283 |         )
284 |         # assert there aren't more than max_suggestions_count
285 |         # otherwise raise warning and return []
286 |         suggestions_count = (
287 |             seq(suggestion_variants).map(lambda x: len(x)).reduce(lambda x, y: x * y, 1)
288 |         )
289 | 
290 |         if suggestions_count > self.max_suggestions_count:
291 |             warnings.warn(
292 |                 f"Got {suggestions_count} suggestions, max is {self.max_suggestions_count}. \
293 |                 Will fallback to empty suggestions."
294 |             )
295 |             opt_combinations = []
296 |         else:
297 |             opt_combinations = list(itertools.product(*suggestion_variants))
298 |             opt_combinations = [list(o) for o in opt_combinations]
299 |         return opt_combinations
300 | 
301 |     @property
302 |     def pipe_names(self):
303 |         return [x[0] for x in self.pipeline]
304 | 
305 |     def add_pipe(
306 |         self,
307 |         component: PipelineComponent,
308 |         name: str = None,
309 |         before: str = None,
310 |         after: str = None,
311 |         first: bool = None,
312 |         last: bool = None,
313 |     ):
314 |         """
315 |         Add a component to the pipeline
316 |         A component must take one argument, a list of spans, and return None (modify the spans)
317 | 
318 |         Optionally, you can either specify a component to add it before or after,
319 |         tell replaCy to add it first or last in the pipeline, or define a custom name.
320 |         If no name is set and no name attribute is present on your component, the function/class name is used.
321 |         """
322 |         if not at_most_one_is_not_none(before, after, first, last):
323 |             raise ValueError("Only one of before, after, first, last can be set")
324 |         if name is None:
325 |             if hasattr(component, "name"):
326 |                 name = getattr(component, "name")
327 |             else:
328 |                 name = component.__name__
329 | 
330 |         if name in self.pipe_names:
331 |             raise ValueError(
332 |                 f"Component {component} has name collision with existing pipeline component. \
333 |             current pipeline: {self.pipeline}"
334 |             )
335 |         pipeline_step = (name, component)
336 | 
337 |         if last or all([before == None, after == None, first == None, last == None]):
338 |             self.pipeline.append(pipeline_step)
339 |         elif first:
340 |             self.pipeline.insert(0, pipeline_step)
341 |         elif before:
342 |             if before not in self.pipe_names:
343 |                 raise ValueError(
344 |                     f"can't insert component before {before}; no component of that name in pipeline"
345 |                 )
346 |             reference_component_index = next(
347 |                 i for i, tup in enumerate(self.pipeline) if tup[0] == before
348 |             )
349 |             self.pipeline.insert(reference_component_index, pipeline_step)
350 |         elif after:
351 |             if after == "matcher":
352 |                 # same as "first"
353 |                 self.pipeline.insert(0, pipeline_step)
354 |             if after not in self.pipe_names:
355 |                 raise ValueError(
356 |                     f"can't insert component after {after}; no component of that name in pipeline"
357 |                 )
358 |             reference_component_index = next(
359 |                 i for i, tup in enumerate(self.pipeline) if tup[0] == after
360 |             )
361 |             self.pipeline.insert(reference_component_index + 1, pipeline_step)
362 |         else:
363 |             warnings.warn(
364 |                 f"Weird values passes to add_pipe, appending {name} to the end of the pipeline"
365 |             )
366 |             self.pipeline.append(pipeline_step)
367 | 
368 |     def remove_pipe(self, name):
369 |         pipelines = []
370 |         for p in self.pipeline:
371 |             if p[0] == name:
372 |                 continue
373 |             pipelines.append(p)
374 |         self.pipeline = pipelines
375 | 
376 |     def __call__(self, sent):
377 |         # self.spans must be cleared - global
378 |         self.spans = []
379 |         doc = make_doc_if_not_doc(sent, self.nlp)
380 |         # this fills up self.spans
381 |         matches = self.matcher(doc, with_alignments=True)
382 | 
383 |         # do the callback here instead of to pass it as callback on match
384 |         # here we alignment information to use for pattern ref
385 |         # we don't have this info on match callback
386 |         for match in matches:
387 |             self._callback(doc, match)
388 | 
389 |         for _, component in self.pipeline:
390 |             # the default pipeline will:
391 |             # sort suggestions by lm score
392 |             # filter out based on max_count
393 |             # merge lists of words into phrases
394 |             self.spans = component(self.spans)
395 |             # this works because a component's signature is List[Span] -> List[Span]
396 |         return self.spans
397 | 


--------------------------------------------------------------------------------
/replacy/default_match_hooks.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module contains predicates which influence what counts as a match
  3 | If the predicate (function) returns True, the match will be ignored
  4 | """
  5 | import operator
  6 | import re
  7 | import sys
  8 | from typing import Callable, List, Union
  9 | 
 10 | from spacy.tokens.doc import Doc
 11 | 
 12 | SpacyMatchPredicate = Callable[[Doc, int, int], bool]
 13 | 
 14 | 
 15 | def _check_args(x):
 16 |     """
 17 |     get calling function name to give a nice error message
 18 |     """
 19 |     caller = sys._getframe(1).f_code.co_name
 20 |     if not isinstance(x, (list, str)):
 21 |         raise ValueError(f"args of {caller} should be a string or list of strings")
 22 | 
 23 | 
 24 | def compose(f, g):
 25 |     return lambda doc, start, end: f(g(doc, start, end))
 26 | 
 27 | 
 28 | def neg(f):
 29 |     # function negation, ex. neg(preceded_by_pos(pos))
 30 |     return compose(operator.not_, f)
 31 | 
 32 | 
 33 | def succeeded_by_phrase(phrases) -> SpacyMatchPredicate:
 34 |     _check_args(phrases)
 35 |     if not isinstance(phrases, list):
 36 |         phrases = [phrases]
 37 | 
 38 |     def _succeeded_by_phrase(doc, start, end):
 39 |         if end >= len(doc):
 40 |             return False
 41 |         return any([doc[end:].text.lower().startswith(p.lower()) for p in phrases])
 42 | 
 43 |     return _succeeded_by_phrase
 44 | 
 45 | 
 46 | def preceded_by_phrase(phrases) -> SpacyMatchPredicate:
 47 |     _check_args(phrases)
 48 |     if not isinstance(phrases, list):
 49 |         phrases = [phrases]
 50 | 
 51 |     def _preceded_by_phrase(doc, start, end):
 52 |         if start <= 0:
 53 |             return False
 54 |         return any([doc[:start].text.lower().endswith(p.lower()) for p in phrases])
 55 | 
 56 |     return _preceded_by_phrase
 57 | 
 58 | 
 59 | def succeeded_by_pos(pos) -> SpacyMatchPredicate:
 60 |     _check_args(pos)
 61 |     if not isinstance(pos, list):
 62 |         pos = [pos]
 63 | 
 64 |     def _succeeded_by_pos(doc, start, end):
 65 |         if end >= len(doc):
 66 |             return False
 67 |         bools = [doc[end].pos_ == p for p in pos]
 68 |         return any(bools)
 69 | 
 70 |     return _succeeded_by_pos
 71 | 
 72 | 
 73 | def preceded_by_pos(pos) -> SpacyMatchPredicate:
 74 |     _check_args(pos)
 75 |     if not isinstance(pos, list):
 76 |         pos = [pos]
 77 | 
 78 |     def _preceded_by_pos(doc, start, end):
 79 |         if start <= 0:
 80 |             return False
 81 |         bools = [doc[start - 1].pos_ == p for p in pos]
 82 |         return any(bools)
 83 | 
 84 |     return _preceded_by_pos
 85 | 
 86 | 
 87 | def succeeded_by_lemma(lemma) -> SpacyMatchPredicate:
 88 |     _check_args(lemma)
 89 |     if not isinstance(lemma, list):
 90 |         lemma = [lemma]
 91 | 
 92 |     def _succeeded_by_lemma(doc, start, end):
 93 |         if end >= len(doc):
 94 |             return False
 95 |         bools = [doc[end].lemma_ == l for l in lemma]
 96 |         return any(bools)
 97 | 
 98 |     return _succeeded_by_lemma
 99 | 
100 | 
101 | def preceded_by_lemma(lemma, distance=1) -> SpacyMatchPredicate:
102 |     _check_args(lemma)
103 |     if not isinstance(lemma, list):
104 |         lemma = [lemma]
105 | 
106 |     def _preceded_by_lemma(doc, start, end):
107 |         if start < distance:
108 |             return False
109 |         bools = [doc[start - distance].lemma_ == l for l in lemma]
110 |         return any(bools)
111 | 
112 |     return _preceded_by_lemma
113 | 
114 | 
115 | def succeeded_by_dep(dep) -> SpacyMatchPredicate:
116 |     _check_args(dep)
117 |     if not isinstance(dep, list):
118 |         dep = [dep]
119 | 
120 |     def _succeeded_by_dep(doc, start, end):
121 |         if end >= len(doc):
122 |             return False
123 |         bools = [doc[end].dep_ == d for d in dep]
124 |         return any(bools)
125 | 
126 |     return _succeeded_by_dep
127 | 
128 | 
129 | def preceded_by_dep(dep) -> SpacyMatchPredicate:
130 |     _check_args(dep)
131 |     if not isinstance(dep, list):
132 |         dep = [dep]
133 | 
134 |     def _preceded_by_dep(doc, start, end):
135 |         if start <= 0:
136 |             return False
137 |         bools = [doc[start - 1].dep_ == d for d in dep]
138 |         return any(bools)
139 | 
140 |     return _preceded_by_dep
141 | 
142 | 
143 | def sentence_has(
144 |     phrases: Union[str, List[str]], case_sensitive=False
145 | ) -> SpacyMatchPredicate:
146 |     _check_args(phrases)
147 |     if not isinstance(phrases, list):
148 |         phrases = [phrases]
149 | 
150 |     def _sentence_has(doc, start, end):
151 |         if case_sensitive:
152 |             return any(p in doc.text for p in phrases)
153 |         return any(p.lower() in doc.text.lower() for p in phrases)
154 | 
155 |     return _sentence_has
156 | 
157 | 
158 | def surrounded_by_phrase(phrase) -> SpacyMatchPredicate:
159 |     def _surrounded_by_hook(doc, start, end):
160 |         if start <= 0 or end >= len(doc):
161 |             return False
162 |         precedes = doc[:start].text.lower().endswith(phrase.lower())
163 |         follows = doc[end:].text.lower().startswith(phrase.lower())
164 |         return precedes and follows
165 | 
166 |     return _surrounded_by_hook
167 | 
168 | 
169 | def part_of_compound() -> SpacyMatchPredicate:
170 |     def _word_is_part_of_compound_hook(doc, start, end):
171 |         head = doc[start]
172 |         is_compound = head.dep_ == "compound"
173 |         is_part_of_compound = any(
174 |             [t.dep_ == "compound" and t.head == head for t in doc]
175 |         )
176 |         return is_compound or is_part_of_compound
177 | 
178 |     return _word_is_part_of_compound_hook
179 | 
180 | 
181 | def relative_x_is_y(
182 |     children_or_ancestors: str, pos_or_dep: str, value: Union[str, List[str]]
183 | ) -> SpacyMatchPredicate:
184 |     """
185 |     This hook looks at all the tokens in a matched span to determine
186 |     whether any of the children or the first ancestor have a given .pos_ or
187 |     .dep_. This replaces the implementation of the Dependency Matcher in
188 |     the previous version by looking at token.children or token.ancestors in
189 |     the matched span.
190 | 
191 |     Example hook:
192 |             {
193 |             "name": "relative_x_is_y",
194 |             "kwargs": {
195 |                 "children_or_ancestors": "children",
196 |                 "pos_or_dep": "dep",
197 |                 "value": "pobj"
198 |             },
199 |             "match_if_predicate_is": false
200 |         }
201 |     """
202 | 
203 |     if not isinstance(value, list):
204 |         value = [value]
205 | 
206 |     if not isinstance(children_or_ancestors, str):
207 |         raise TypeError("children_or_ancestors must be a string!")
208 | 
209 |     if not isinstance(pos_or_dep, str):
210 |         raise TypeError("pos_or_dep must be a string!")
211 | 
212 |     if children_or_ancestors not in ["children", "ancestors"]:
213 |         raise ValueError(
214 |             "children_or_ancestors must be set to either `children` or `ancestors`"
215 |         )
216 | 
217 |     if pos_or_dep not in ["pos", "dep", "tag"]:
218 |         raise ValueError("pos_or_dep must be set to either `pos`, `dep`, or `tag`!")
219 | 
220 |     def _in_children(doc, start, end):
221 |         if end >= len(doc):
222 |             return False
223 |         for val in value:
224 |             match_span = doc[start:end]
225 |             if pos_or_dep == "pos":
226 |                 return any(
227 |                     [child.pos_ == val for tok in match_span for child in tok.children]
228 |                 )
229 |             elif pos_or_dep == "dep":
230 |                 return any([child.dep_ == val for tok in match_span for child in tok.children])
231 |             elif pos_or_dep == "tag":
232 |                 return any([child.tag_ == val for tok in match_span for child in tok.children])
233 | 
234 |     def _in_ancestors(doc, start, end):
235 |         if end >= len(doc):
236 |             return False
237 |         for val in value:
238 |             match_span = doc[start:end]
239 |             if pos_or_dep == "pos":
240 |                 for t in match_span:
241 |                     ancestor = list(t.ancestors)[0] if len(list(t.ancestors)) else None
242 |                     if ancestor and ancestor.pos_ == val:
243 |                         return True
244 |                 return False
245 |             if pos_or_dep == "dep":
246 |                 for t in match_span:
247 |                     ancestor = list(t.ancestors)[0] if len(list(t.ancestors)) else None
248 |                     if ancestor and ancestor.dep_ == val:
249 |                         return True
250 |                 return False
251 |             if pos_or_dep == "tag":
252 |                 for t in match_span:
253 |                     ancestor = list(t.ancestors)[0] if len(list(t.ancestors)) else None
254 |                     if ancestor and ancestor.tag_ == val:
255 |                         return True
256 |                 return False
257 | 
258 |     if children_or_ancestors == "children":
259 |         return _in_children
260 | 
261 |     if children_or_ancestors == "ancestors":
262 |         return _in_ancestors
263 | 
264 | 
265 | def part_of_phrase(phrase) -> SpacyMatchPredicate:
266 |     def _part_of_phrase(doc, start, end):
267 |         matched = doc[start:end].text.lower()
268 |         parts = phrase.split(matched)
269 |         for i in range(len(parts) - 1):
270 |             firstpart = ""
271 |             secondpart = ""
272 |             for part in parts[: i - 1]:
273 |                 firstpart += part
274 |             for part in parts[i + 1 :]:
275 |                 secondpart += part
276 |             precedes = doc.text.lower()[: doc[start:end].start_char].endswith(firstpart)
277 |             follows = doc.text.lower()[doc[start:end].end_char :].startswith(secondpart)
278 |             if precedes and follows:
279 |                 return True
280 |         return False
281 | 
282 |     return _part_of_phrase
283 | 
284 | 
285 | def succeeded_by_num() -> SpacyMatchPredicate:
286 |     def _succeeded_by_num(doc, start, end):
287 |         if end >= len(doc):
288 |             return False
289 |         return doc[end].like_num or doc[end].pos_ == "NUM" or doc[end].is_digit
290 | 
291 |     return _succeeded_by_num
292 | 
293 | 
294 | def succeeded_by_currency() -> SpacyMatchPredicate:
295 |     def _succeeded_by_currency(doc, start, end):
296 |         if end >= len(doc):
297 |             return False
298 |         return doc[end].is_currency
299 | 
300 |     return _succeeded_by_currency
301 | 
302 | 
303 | def debug_hook(match_name: str) -> SpacyMatchPredicate:
304 |     """
305 |     Don't use this manually.
306 |     if debug is set (i.e. ReplaceMatcher.debug), then run utils.attach_debug_hook on your match_dict when you load it
307 |     it will return a new match_dict with the debug hook attached to each match
308 |     """
309 | 
310 |     def _print_match(doc: Doc, start: int, end: int):
311 |         print(
312 |             f"DEBUG:    {match_name} matched '{doc[start: end].text}'    token indices {start}:{end}"
313 |         )
314 |         return True
315 | 
316 |     return _print_match
317 | 
318 | 
319 | def preceded_by_space() -> SpacyMatchPredicate:
320 |     def _preceded_by_space(doc, start, end):
321 |         span = doc[start:end]
322 |         return doc.text[span.start_char - 1] == " "
323 | 
324 |     return _preceded_by_space
325 | 
326 | 
327 | def preceded_by_punct() -> SpacyMatchPredicate:
328 |     def _preceded_by_punct(doc, start, end):
329 |         if start == 0:
330 |             return False
331 |         previous_token = doc[start - 1]
332 |         return previous_token.is_punct
333 | 
334 |     return _preceded_by_punct
335 | 
336 | 
337 | def preceded_by_num() -> SpacyMatchPredicate:
338 |     def _preceded_by_number(doc, start, end):
339 |         if start == 0:
340 |             return False
341 |         previous_token = doc[start - 1]
342 |         return (
343 |             previous_token.like_num
344 |             or previous_token.pos_ == "NUM"
345 |             or previous_token.is_digit
346 |         )
347 | 
348 |     return _preceded_by_number
349 | 
350 | 
351 | def preceded_by_currency() -> SpacyMatchPredicate:
352 |     def _preceded_by_currency(doc, start, end):
353 |         if start == 0:
354 |             return False
355 |         previous_token = doc[start - 1]
356 |         return previous_token.is_currency
357 | 
358 |     return _preceded_by_currency
359 | 
360 | 
361 | def preceded_by_token(token) -> SpacyMatchPredicate:
362 |     token_list = token if isinstance(token, list) else [token]
363 | 
364 |     def _preceded_by_token(doc, start, end):
365 |         if start == 0:
366 |             return False
367 |         previous_token = doc[start - 1]
368 |         return any([previous_token.lower_ == t.lower() for t in token_list])
369 | 
370 |     return _preceded_by_token
371 | 
372 | 
373 | def succeeded_by_token(token) -> SpacyMatchPredicate:
374 |     token_list = token if isinstance(token, list) else [token]
375 | 
376 |     def _succeeded_by_token(doc, start, end):
377 |         if end == len(doc):
378 |             return False
379 |         next_token = doc[end]
380 |         return any([next_token.lower_ == t.lower() for t in token_list])
381 | 
382 |     return _succeeded_by_token
383 | 
384 | 
385 | def preceded_by_tag(tag) -> SpacyMatchPredicate:
386 |     tag_list = tag if isinstance(tag, list) else [tag]
387 | 
388 |     def _preceded_by_tag(doc, start, end):
389 |         if start == 0:
390 |             return False
391 |         previous_token = doc[start - 1]
392 |         return any([previous_token.tag_ == t for t in tag_list])
393 | 
394 |     return _preceded_by_tag
395 | 
396 | 
397 | def preceded_by_regex(regex, sensitive=False) -> SpacyMatchPredicate:
398 |     def _preceded_by_regex(doc, start, end):
399 |         if start == 0:
400 |             return False
401 |         previous_token = doc[start - 1]
402 |         flags = 0 if sensitive == True else re.IGNORECASE
403 |         return re.search(regex, previous_token.text, flags) is not None
404 | 
405 |     return _preceded_by_regex
406 | 
407 | 
408 | def succeeded_by_tag(tag) -> SpacyMatchPredicate:
409 |     tag_list = tag if isinstance(tag, list) else [tag]
410 | 
411 |     def _succeeded_by_tag(doc, start, end):
412 |         if end == len(doc):
413 |             return False
414 |         next_token = doc[end]
415 |         return any([next_token.tag_ == t for t in tag_list])
416 | 
417 |     return _succeeded_by_tag
418 | 
419 | 
420 | def succeeded_by_regex(regex, sensitive=False) -> SpacyMatchPredicate:
421 |     def _succeeded_by_regex(doc, start, end):
422 |         if end == len(doc):
423 |             return False
424 |         next_token = doc[end]
425 |         flags = 0 if sensitive == True else re.IGNORECASE
426 |         return re.search(regex, next_token.text, flags) is not None
427 | 
428 |     return _succeeded_by_regex
429 | 
430 | 
431 | def succeeded_by_same_token() -> SpacyMatchPredicate:
432 |     def _succeeded_by_same_token(doc, start, end):
433 |         if end == len(doc):
434 |             return False
435 |         token = doc[start]
436 |         next_token = doc[end]
437 |         return token.lower_ == next_token.lower_
438 | 
439 |     return _succeeded_by_same_token
440 | 
441 | 
442 | def succeeded_by_punct() -> SpacyMatchPredicate:
443 |     def _succeeded_by_punct(doc, start, end):
444 |         if end == len(doc):
445 |             return False
446 |         next_token = doc[end]
447 |         return next_token.is_punct
448 | 
449 |     return _succeeded_by_punct
450 | 
451 | 
452 | def succeeded_by_word() -> SpacyMatchPredicate:
453 |     def _succeeded_by_word(doc, start, end):
454 |         if end == len(doc):
455 |             return False
456 |         next_token = doc[end]
457 |         return (
458 |             not next_token.is_punct
459 |             and not next_token.is_digit
460 |             and not next_token.is_space
461 |         )
462 | 
463 |     return _succeeded_by_word
464 | 
465 | 
466 | def is_start_of_sentence() -> SpacyMatchPredicate:
467 |     return lambda doc, start, end: doc[start].is_sent_start
468 | 
469 | 
470 | def is_end_of_sentence() -> SpacyMatchPredicate:
471 |     return lambda doc, start, end: end == len(doc) or doc[end].is_sent_end
472 | 
473 | 
474 | def sentence_ends_with(phrase) -> SpacyMatchPredicate:
475 |     def _sentence_ends_with(doc, start, end):
476 |         return doc[end:].text.lower().strip().endswith(phrase.lower())
477 | 
478 |     return _sentence_ends_with
479 | 
480 | 
481 | # for compatibility with a previous version with spelling errors
482 | # point incorrectly spelled versions to correct versions
483 | # eventually deprecate these
484 | preceeded_by_phrase = preceded_by_phrase
485 | preceeded_by_pos = preceded_by_pos
486 | preceeded_by_dep = preceded_by_dep
487 | 


--------------------------------------------------------------------------------
/poetry.lock:
--------------------------------------------------------------------------------
   1 | [[package]]
   2 | name = "atomicwrites"
   3 | version = "1.4.0"
   4 | description = "Atomic file writes."
   5 | category = "dev"
   6 | optional = false
   7 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
   8 | 
   9 | [[package]]
  10 | name = "attrs"
  11 | version = "21.2.0"
  12 | description = "Classes Without Boilerplate"
  13 | category = "dev"
  14 | optional = false
  15 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
  16 | 
  17 | [package.extras]
  18 | dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit"]
  19 | docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
  20 | tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"]
  21 | tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"]
  22 | 
  23 | [[package]]
  24 | name = "blis"
  25 | version = "0.7.5"
  26 | description = "The Blis BLAS-like linear algebra library, as a self-contained C-extension."
  27 | category = "dev"
  28 | optional = false
  29 | python-versions = "*"
  30 | 
  31 | [package.dependencies]
  32 | numpy = ">=1.15.0"
  33 | 
  34 | [[package]]
  35 | name = "catalogue"
  36 | version = "2.0.6"
  37 | description = "Super lightweight function registries for your library"
  38 | category = "dev"
  39 | optional = false
  40 | python-versions = ">=3.6"
  41 | 
  42 | [package.dependencies]
  43 | typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
  44 | zipp = {version = ">=0.5", markers = "python_version < \"3.8\""}
  45 | 
  46 | [[package]]
  47 | name = "certifi"
  48 | version = "2021.10.8"
  49 | description = "Python package for providing Mozilla's CA Bundle."
  50 | category = "dev"
  51 | optional = false
  52 | python-versions = "*"
  53 | 
  54 | [[package]]
  55 | name = "charset-normalizer"
  56 | version = "2.0.8"
  57 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
  58 | category = "dev"
  59 | optional = false
  60 | python-versions = ">=3.5.0"
  61 | 
  62 | [package.extras]
  63 | unicode_backport = ["unicodedata2"]
  64 | 
  65 | [[package]]
  66 | name = "click"
  67 | version = "7.1.2"
  68 | description = "Composable command line interface toolkit"
  69 | category = "dev"
  70 | optional = false
  71 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
  72 | 
  73 | [[package]]
  74 | name = "colorama"
  75 | version = "0.4.4"
  76 | description = "Cross-platform colored terminal text."
  77 | category = "dev"
  78 | optional = false
  79 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
  80 | 
  81 | [[package]]
  82 | name = "contextvars"
  83 | version = "2.4"
  84 | description = "PEP 567 Backport"
  85 | category = "dev"
  86 | optional = false
  87 | python-versions = "*"
  88 | 
  89 | [package.dependencies]
  90 | immutables = ">=0.9"
  91 | 
  92 | [[package]]
  93 | name = "cymem"
  94 | version = "2.0.6"
  95 | description = "Manage calls to calloc/free through Cython"
  96 | category = "dev"
  97 | optional = false
  98 | python-versions = "*"
  99 | 
 100 | [[package]]
 101 | name = "dataclasses"
 102 | version = "0.8"
 103 | description = "A backport of the dataclasses module for Python 3.6"
 104 | category = "dev"
 105 | optional = false
 106 | python-versions = ">=3.6, <3.7"
 107 | 
 108 | [[package]]
 109 | name = "dill"
 110 | version = "0.2.7.1"
 111 | description = "serialize all of python"
 112 | category = "main"
 113 | optional = false
 114 | python-versions = "*"
 115 | 
 116 | [[package]]
 117 | name = "en-core-web-sm"
 118 | version = "3.0.0"
 119 | description = "English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer."
 120 | category = "dev"
 121 | optional = false
 122 | python-versions = "*"
 123 | 
 124 | [package.dependencies]
 125 | spacy = ">=3.0.0,<3.1.0"
 126 | 
 127 | [package.source]
 128 | type = "url"
 129 | url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz"
 130 | [[package]]
 131 | name = "future"
 132 | version = "0.18.2"
 133 | description = "Clean single-source support for Python 3 and 2"
 134 | category = "main"
 135 | optional = false
 136 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
 137 | 
 138 | [[package]]
 139 | name = "idna"
 140 | version = "3.3"
 141 | description = "Internationalized Domain Names in Applications (IDNA)"
 142 | category = "dev"
 143 | optional = false
 144 | python-versions = ">=3.5"
 145 | 
 146 | [[package]]
 147 | name = "immutables"
 148 | version = "0.16"
 149 | description = "Immutable Collections"
 150 | category = "dev"
 151 | optional = false
 152 | python-versions = ">=3.6"
 153 | 
 154 | [package.dependencies]
 155 | typing-extensions = {version = ">=3.7.4.3", markers = "python_version < \"3.8\""}
 156 | 
 157 | [package.extras]
 158 | test = ["flake8 (>=3.8.4,<3.9.0)", "pycodestyle (>=2.6.0,<2.7.0)", "mypy (>=0.910)", "pytest (>=6.2.4,<6.3.0)"]
 159 | 
 160 | [[package]]
 161 | name = "importlib-metadata"
 162 | version = "4.8.2"
 163 | description = "Read metadata from Python packages"
 164 | category = "dev"
 165 | optional = false
 166 | python-versions = ">=3.6"
 167 | 
 168 | [package.dependencies]
 169 | typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
 170 | zipp = ">=0.5"
 171 | 
 172 | [package.extras]
 173 | docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
 174 | perf = ["ipython"]
 175 | testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pep517", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"]
 176 | 
 177 | [[package]]
 178 | name = "jinja2"
 179 | version = "3.0.3"
 180 | description = "A very fast and expressive template engine."
 181 | category = "dev"
 182 | optional = false
 183 | python-versions = ">=3.6"
 184 | 
 185 | [package.dependencies]
 186 | MarkupSafe = ">=2.0"
 187 | 
 188 | [package.extras]
 189 | i18n = ["Babel (>=2.7)"]
 190 | 
 191 | [[package]]
 192 | name = "jsonschema"
 193 | version = "2.6.0"
 194 | description = "An implementation of JSON Schema validation for Python"
 195 | category = "main"
 196 | optional = false
 197 | python-versions = "*"
 198 | 
 199 | [package.extras]
 200 | format = ["rfc3987", "strict-rfc3339", "webcolors"]
 201 | 
 202 | [[package]]
 203 | name = "kenlm"
 204 | version = "0.0.0"
 205 | description = ""
 206 | category = "dev"
 207 | optional = false
 208 | python-versions = "*"
 209 | develop = false
 210 | 
 211 | [package.source]
 212 | type = "git"
 213 | url = "https://github.com/kpu/kenlm"
 214 | reference = "master"
 215 | resolved_reference = "f01e12d83c7fd03ebe6656e0ad6d73a3e022bd50"
 216 | 
 217 | [[package]]
 218 | name = "lemminflect"
 219 | version = "0.2.1"
 220 | description = "A python module for English lemmatization and inflection."
 221 | category = "main"
 222 | optional = false
 223 | python-versions = "*"
 224 | 
 225 | [package.dependencies]
 226 | numpy = "*"
 227 | 
 228 | [[package]]
 229 | name = "markupsafe"
 230 | version = "2.0.1"
 231 | description = "Safely add untrusted strings to HTML/XML markup."
 232 | category = "dev"
 233 | optional = false
 234 | python-versions = ">=3.6"
 235 | 
 236 | [[package]]
 237 | name = "more-itertools"
 238 | version = "8.12.0"
 239 | description = "More routines for operating on iterables, beyond itertools"
 240 | category = "dev"
 241 | optional = false
 242 | python-versions = ">=3.5"
 243 | 
 244 | [[package]]
 245 | name = "murmurhash"
 246 | version = "1.0.6"
 247 | description = "Cython bindings for MurmurHash"
 248 | category = "dev"
 249 | optional = false
 250 | python-versions = "*"
 251 | 
 252 | [[package]]
 253 | name = "numpy"
 254 | version = "1.19.5"
 255 | description = "NumPy is the fundamental package for array computing with Python."
 256 | category = "main"
 257 | optional = false
 258 | python-versions = ">=3.6"
 259 | 
 260 | [[package]]
 261 | name = "packaging"
 262 | version = "21.3"
 263 | description = "Core utilities for Python packages"
 264 | category = "dev"
 265 | optional = false
 266 | python-versions = ">=3.6"
 267 | 
 268 | [package.dependencies]
 269 | pyparsing = ">=2.0.2,<3.0.5 || >3.0.5"
 270 | 
 271 | [[package]]
 272 | name = "pathy"
 273 | version = "0.6.1"
 274 | description = "pathlib.Path subclasses for local and cloud bucket storage"
 275 | category = "dev"
 276 | optional = false
 277 | python-versions = ">= 3.6"
 278 | 
 279 | [package.dependencies]
 280 | dataclasses = {version = ">=0.6,<1.0", markers = "python_version < \"3.7\""}
 281 | smart-open = ">=5.0.0,<6.0.0"
 282 | typer = ">=0.3.0,<1.0.0"
 283 | 
 284 | [package.extras]
 285 | all = ["google-cloud-storage (>=1.26.0,<2.0.0)", "boto3", "pytest", "pytest-coverage", "mock", "typer-cli"]
 286 | gcs = ["google-cloud-storage (>=1.26.0,<2.0.0)"]
 287 | s3 = ["boto3"]
 288 | test = ["pytest", "pytest-coverage", "mock", "typer-cli"]
 289 | 
 290 | [[package]]
 291 | name = "pluggy"
 292 | version = "0.13.1"
 293 | description = "plugin and hook calling mechanisms for python"
 294 | category = "dev"
 295 | optional = false
 296 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 297 | 
 298 | [package.dependencies]
 299 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
 300 | 
 301 | [package.extras]
 302 | dev = ["pre-commit", "tox"]
 303 | 
 304 | [[package]]
 305 | name = "preshed"
 306 | version = "3.0.6"
 307 | description = "Cython hash table that trusts the keys are pre-hashed"
 308 | category = "dev"
 309 | optional = false
 310 | python-versions = "*"
 311 | 
 312 | [package.dependencies]
 313 | cymem = ">=2.0.2,<2.1.0"
 314 | murmurhash = ">=0.28.0,<1.1.0"
 315 | 
 316 | [[package]]
 317 | name = "py"
 318 | version = "1.11.0"
 319 | description = "library with cross-python path, ini-parsing, io, code, log facilities"
 320 | category = "dev"
 321 | optional = false
 322 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 323 | 
 324 | [[package]]
 325 | name = "pydantic"
 326 | version = "1.7.4"
 327 | description = "Data validation and settings management using python 3.6 type hinting"
 328 | category = "dev"
 329 | optional = false
 330 | python-versions = ">=3.6"
 331 | 
 332 | [package.dependencies]
 333 | dataclasses = {version = ">=0.6", markers = "python_version < \"3.7\""}
 334 | 
 335 | [package.extras]
 336 | dotenv = ["python-dotenv (>=0.10.4)"]
 337 | email = ["email-validator (>=1.0.3)"]
 338 | typing_extensions = ["typing-extensions (>=3.7.2)"]
 339 | 
 340 | [[package]]
 341 | name = "pyfunctional"
 342 | version = "1.3.0"
 343 | description = "Package for creating data pipelines with chain functional programming"
 344 | category = "main"
 345 | optional = false
 346 | python-versions = "*"
 347 | 
 348 | [package.dependencies]
 349 | dill = ">=0.2.6,<=0.2.7.1"
 350 | future = "<=1.0.0"
 351 | six = "<=2.0.0"
 352 | tabulate = "<=1.0.0"
 353 | 
 354 | [[package]]
 355 | name = "pyparsing"
 356 | version = "3.0.6"
 357 | description = "Python parsing module"
 358 | category = "dev"
 359 | optional = false
 360 | python-versions = ">=3.6"
 361 | 
 362 | [package.extras]
 363 | diagrams = ["jinja2", "railroad-diagrams"]
 364 | 
 365 | [[package]]
 366 | name = "pytest"
 367 | version = "5.4.3"
 368 | description = "pytest: simple powerful testing with Python"
 369 | category = "dev"
 370 | optional = false
 371 | python-versions = ">=3.5"
 372 | 
 373 | [package.dependencies]
 374 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""}
 375 | attrs = ">=17.4.0"
 376 | colorama = {version = "*", markers = "sys_platform == \"win32\""}
 377 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
 378 | more-itertools = ">=4.0.0"
 379 | packaging = "*"
 380 | pluggy = ">=0.12,<1.0"
 381 | py = ">=1.5.0"
 382 | wcwidth = "*"
 383 | 
 384 | [package.extras]
 385 | checkqa-mypy = ["mypy (==v0.761)"]
 386 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"]
 387 | 
 388 | [[package]]
 389 | name = "requests"
 390 | version = "2.26.0"
 391 | description = "Python HTTP for Humans."
 392 | category = "dev"
 393 | optional = false
 394 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 395 | 
 396 | [package.dependencies]
 397 | certifi = ">=2017.4.17"
 398 | charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""}
 399 | idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""}
 400 | urllib3 = ">=1.21.1,<1.27"
 401 | 
 402 | [package.extras]
 403 | socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
 404 | use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
 405 | 
 406 | [[package]]
 407 | name = "six"
 408 | version = "1.16.0"
 409 | description = "Python 2 and 3 compatibility utilities"
 410 | category = "main"
 411 | optional = false
 412 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 413 | 
 414 | [[package]]
 415 | name = "smart-open"
 416 | version = "5.2.1"
 417 | description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)"
 418 | category = "dev"
 419 | optional = false
 420 | python-versions = ">=3.6,<4.0"
 421 | 
 422 | [package.extras]
 423 | all = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests"]
 424 | azure = ["azure-storage-blob", "azure-common", "azure-core"]
 425 | gcs = ["google-cloud-storage"]
 426 | http = ["requests"]
 427 | s3 = ["boto3"]
 428 | test = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests", "moto[server] (==1.3.14)", "pathlib2", "responses", "paramiko", "parameterizedtestcase", "pytest", "pytest-rerunfailures"]
 429 | webhdfs = ["requests"]
 430 | 
 431 | [[package]]
 432 | name = "spacy"
 433 | version = "3.0.7"
 434 | description = "Industrial-strength Natural Language Processing (NLP) in Python"
 435 | category = "dev"
 436 | optional = false
 437 | python-versions = ">=3.6"
 438 | 
 439 | [package.dependencies]
 440 | blis = ">=0.4.0,<0.8.0"
 441 | catalogue = ">=2.0.4,<2.1.0"
 442 | cymem = ">=2.0.2,<2.1.0"
 443 | jinja2 = "*"
 444 | murmurhash = ">=0.28.0,<1.1.0"
 445 | numpy = ">=1.15.0"
 446 | packaging = ">=20.0"
 447 | pathy = ">=0.3.5"
 448 | preshed = ">=3.0.2,<3.1.0"
 449 | pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.9.0"
 450 | requests = ">=2.13.0,<3.0.0"
 451 | spacy-legacy = ">=3.0.5,<3.1.0"
 452 | srsly = ">=2.4.1,<3.0.0"
 453 | thinc = ">=8.0.3,<8.1.0"
 454 | tqdm = ">=4.38.0,<5.0.0"
 455 | typer = ">=0.3.0,<0.4.0"
 456 | typing-extensions = {version = ">=3.7.4,<4.0.0.0", markers = "python_version < \"3.8\""}
 457 | wasabi = ">=0.8.1,<1.1.0"
 458 | 
 459 | [package.extras]
 460 | cuda = ["cupy (>=5.0.0b4,<10.0.0)"]
 461 | cuda100 = ["cupy-cuda100 (>=5.0.0b4,<10.0.0)"]
 462 | cuda101 = ["cupy-cuda101 (>=5.0.0b4,<10.0.0)"]
 463 | cuda102 = ["cupy-cuda102 (>=5.0.0b4,<10.0.0)"]
 464 | cuda110 = ["cupy-cuda110 (>=5.0.0b4,<10.0.0)"]
 465 | cuda111 = ["cupy-cuda111 (>=5.0.0b4,<10.0.0)"]
 466 | cuda112 = ["cupy-cuda112 (>=5.0.0b4,<10.0.0)"]
 467 | cuda80 = ["cupy-cuda80 (>=5.0.0b4,<10.0.0)"]
 468 | cuda90 = ["cupy-cuda90 (>=5.0.0b4,<10.0.0)"]
 469 | cuda91 = ["cupy-cuda91 (>=5.0.0b4,<10.0.0)"]
 470 | cuda92 = ["cupy-cuda92 (>=5.0.0b4,<10.0.0)"]
 471 | ja = ["sudachipy (>=0.4.9)", "sudachidict-core (>=20200330)"]
 472 | ko = ["natto-py (==0.9.0)"]
 473 | lookups = ["spacy-lookups-data (>=1.0.0,<1.1.0)"]
 474 | ray = ["spacy-ray (>=0.1.0,<1.0.0)"]
 475 | th = ["pythainlp (>=2.0)"]
 476 | transformers = ["spacy-transformers (>=1.0.1,<1.1.0)"]
 477 | 
 478 | [[package]]
 479 | name = "spacy-legacy"
 480 | version = "3.0.8"
 481 | description = "Legacy registered functions for spaCy backwards compatibility"
 482 | category = "dev"
 483 | optional = false
 484 | python-versions = ">=3.6"
 485 | 
 486 | [[package]]
 487 | name = "srsly"
 488 | version = "2.4.2"
 489 | description = "Modern high-performance serialization utilities for Python"
 490 | category = "dev"
 491 | optional = false
 492 | python-versions = ">=3.6"
 493 | 
 494 | [package.dependencies]
 495 | catalogue = ">=2.0.3,<2.1.0"
 496 | 
 497 | [[package]]
 498 | name = "tabulate"
 499 | version = "0.8.9"
 500 | description = "Pretty-print tabular data"
 501 | category = "main"
 502 | optional = false
 503 | python-versions = "*"
 504 | 
 505 | [package.extras]
 506 | widechars = ["wcwidth"]
 507 | 
 508 | [[package]]
 509 | name = "thinc"
 510 | version = "8.0.13"
 511 | description = "A refreshing functional take on deep learning, compatible with your favorite libraries"
 512 | category = "dev"
 513 | optional = false
 514 | python-versions = ">=3.6"
 515 | 
 516 | [package.dependencies]
 517 | blis = ">=0.4.0,<0.8.0"
 518 | catalogue = ">=2.0.4,<2.1.0"
 519 | contextvars = {version = ">=2.4,<3", markers = "python_version < \"3.7\""}
 520 | cymem = ">=2.0.2,<2.1.0"
 521 | dataclasses = {version = ">=0.6,<1.0", markers = "python_version < \"3.7\""}
 522 | murmurhash = ">=0.28.0,<1.1.0"
 523 | numpy = ">=1.15.0"
 524 | preshed = ">=3.0.2,<3.1.0"
 525 | pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.9.0"
 526 | srsly = ">=2.4.0,<3.0.0"
 527 | typing-extensions = {version = ">=3.7.4.1,<4.0.0.0", markers = "python_version < \"3.8\""}
 528 | wasabi = ">=0.8.1,<1.1.0"
 529 | 
 530 | [package.extras]
 531 | cuda = ["cupy (>=5.0.0b4)"]
 532 | cuda100 = ["cupy-cuda100 (>=5.0.0b4)"]
 533 | cuda101 = ["cupy-cuda101 (>=5.0.0b4)"]
 534 | cuda102 = ["cupy-cuda102 (>=5.0.0b4)"]
 535 | cuda110 = ["cupy-cuda110 (>=5.0.0b4)"]
 536 | cuda111 = ["cupy-cuda111 (>=5.0.0b4)"]
 537 | cuda112 = ["cupy-cuda112 (>=5.0.0b4)"]
 538 | cuda113 = ["cupy-cuda113 (>=5.0.0b4)"]
 539 | cuda114 = ["cupy-cuda114 (>=5.0.0b4)"]
 540 | cuda80 = ["cupy-cuda80 (>=5.0.0b4)"]
 541 | cuda90 = ["cupy-cuda90 (>=5.0.0b4)"]
 542 | cuda91 = ["cupy-cuda91 (>=5.0.0b4)"]
 543 | cuda92 = ["cupy-cuda92 (>=5.0.0b4)"]
 544 | datasets = ["ml-datasets (>=0.2.0,<0.3.0)"]
 545 | mxnet = ["mxnet (>=1.5.1,<1.6.0)"]
 546 | tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"]
 547 | torch = ["torch (>=1.5.0)"]
 548 | 
 549 | [[package]]
 550 | name = "tqdm"
 551 | version = "4.62.3"
 552 | description = "Fast, Extensible Progress Meter"
 553 | category = "dev"
 554 | optional = false
 555 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
 556 | 
 557 | [package.dependencies]
 558 | colorama = {version = "*", markers = "platform_system == \"Windows\""}
 559 | 
 560 | [package.extras]
 561 | dev = ["py-make (>=0.1.0)", "twine", "wheel"]
 562 | notebook = ["ipywidgets (>=6)"]
 563 | telegram = ["requests"]
 564 | 
 565 | [[package]]
 566 | name = "typer"
 567 | version = "0.3.2"
 568 | description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
 569 | category = "dev"
 570 | optional = false
 571 | python-versions = ">=3.6"
 572 | 
 573 | [package.dependencies]
 574 | click = ">=7.1.1,<7.2.0"
 575 | 
 576 | [package.extras]
 577 | test = ["pytest-xdist (>=1.32.0,<2.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "mypy (==0.782)", "black (>=19.10b0,<20.0b0)", "isort (>=5.0.6,<6.0.0)", "shellingham (>=1.3.0,<2.0.0)", "pytest (>=4.4.0,<5.4.0)", "pytest-cov (>=2.10.0,<3.0.0)", "coverage (>=5.2,<6.0)"]
 578 | all = ["colorama (>=0.4.3,<0.5.0)", "shellingham (>=1.3.0,<2.0.0)"]
 579 | dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)"]
 580 | doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=5.4.0,<6.0.0)", "markdown-include (>=0.5.1,<0.6.0)"]
 581 | 
 582 | [[package]]
 583 | name = "typing-extensions"
 584 | version = "3.10.0.2"
 585 | description = "Backported and Experimental Type Hints for Python 3.5+"
 586 | category = "dev"
 587 | optional = false
 588 | python-versions = "*"
 589 | 
 590 | [[package]]
 591 | name = "urllib3"
 592 | version = "1.26.7"
 593 | description = "HTTP library with thread-safe connection pooling, file post, and more."
 594 | category = "dev"
 595 | optional = false
 596 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4"
 597 | 
 598 | [package.extras]
 599 | brotli = ["brotlipy (>=0.6.0)"]
 600 | secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"]
 601 | socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 602 | 
 603 | [[package]]
 604 | name = "wasabi"
 605 | version = "0.8.2"
 606 | description = "A lightweight console printing and formatting toolkit"
 607 | category = "dev"
 608 | optional = false
 609 | python-versions = "*"
 610 | 
 611 | [[package]]
 612 | name = "wcwidth"
 613 | version = "0.2.5"
 614 | description = "Measures the displayed width of unicode strings in a terminal"
 615 | category = "dev"
 616 | optional = false
 617 | python-versions = "*"
 618 | 
 619 | [[package]]
 620 | name = "zipp"
 621 | version = "3.6.0"
 622 | description = "Backport of pathlib-compatible object wrapper for zip files"
 623 | category = "dev"
 624 | optional = false
 625 | python-versions = ">=3.6"
 626 | 
 627 | [package.extras]
 628 | docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
 629 | testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"]
 630 | 
 631 | [metadata]
 632 | lock-version = "1.1"
 633 | python-versions = "^3.6"
 634 | content-hash = "066671bb2c96e224b9da938c40dd81e89847a824ce08e82991b295f82528e12e"
 635 | 
 636 | [metadata.files]
 637 | atomicwrites = [
 638 |     {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"},
 639 |     {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"},
 640 | ]
 641 | attrs = [
 642 |     {file = "attrs-21.2.0-py2.py3-none-any.whl", hash = "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1"},
 643 |     {file = "attrs-21.2.0.tar.gz", hash = "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"},
 644 | ]
 645 | blis = [
 646 |     {file = "blis-0.7.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5812a7c04561ae7332cf730f57d9f82cbd12c5f86a5bfad66ee244e51d06266d"},
 647 |     {file = "blis-0.7.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eecfce3d8fce61dede7b0ae0dffa461c22072437b6cde85587db0c1aa75b450"},
 648 |     {file = "blis-0.7.5-cp310-cp310-win_amd64.whl", hash = "sha256:0e476931f0d5703a21c77e7f69b8ebdeeea493fc7858a86f627ac2b376a12c8d"},
 649 |     {file = "blis-0.7.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:5966ddf3bce84aa7bb09ce4ca059309602fa63280a5d5e5365bb2a294bd5a138"},
 650 |     {file = "blis-0.7.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9034dabce4e42e3a1a7b99cc6de430484c8c369e51556ee8d47a53c085de681"},
 651 |     {file = "blis-0.7.5-cp36-cp36m-win_amd64.whl", hash = "sha256:730952f74adb0fa7dde9f1bc11249d5a64f3a3a9cf7dfa23b189a4b767bdf2d0"},
 652 |     {file = "blis-0.7.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2339cb19594134775bda8b86f23a893828fc7e8d63f09ba9a15f30b2b16c966c"},
 653 |     {file = "blis-0.7.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5023781272e0b2868be2f92017aa6836557990f1ca5ba2af5e9f5a0acf04fd8a"},
 654 |     {file = "blis-0.7.5-cp37-cp37m-win_amd64.whl", hash = "sha256:65ba723821cc57eb4227eb8dd05c57fff23d97f826d4325b316cd8a63aac8d6a"},
 655 |     {file = "blis-0.7.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad4af690c37a5953d3aea660ad89b636bfbb80ca1470995554670ca2143f0cb2"},
 656 |     {file = "blis-0.7.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf11c233ea5c2d30683e7c9641c5dc4cd76ed0f64755ba3321dfb8db39feb316"},
 657 |     {file = "blis-0.7.5-cp38-cp38-win_amd64.whl", hash = "sha256:31401da283ed42905f0fbf2f8b88ea424c6a911482426f84b5b88c54d382e4d1"},
 658 |     {file = "blis-0.7.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0c185979f8f528d634f5548b8cd84ab0366d340c27c039ad3937fab186c1c252"},
 659 |     {file = "blis-0.7.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8345bd04777557ef385e2f2d1f14a19d53b2ea9ca5fe107a2cdc50d7bafb8eb2"},
 660 |     {file = "blis-0.7.5-cp39-cp39-win_amd64.whl", hash = "sha256:66204a19e38986645940c887498c7b5520efb5bbc6526bf1b8a58f7d3eb37da0"},
 661 |     {file = "blis-0.7.5.tar.gz", hash = "sha256:833e01e9eaff4c01aa6e049bbc1e6acb9eca6ee513d7b35b5bf135d49705ad33"},
 662 | ]
 663 | catalogue = [
 664 |     {file = "catalogue-2.0.6-py3-none-any.whl", hash = "sha256:34ebb5cd2b98f7fa7421fa0eead3b84e577243532509b3fa8cd04abcc9f61d3c"},
 665 |     {file = "catalogue-2.0.6.tar.gz", hash = "sha256:336a35603f447167042ef504114d6befa46688f03f4c14dabdc633a44587b245"},
 666 | ]
 667 | certifi = [
 668 |     {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
 669 |     {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"},
 670 | ]
 671 | charset-normalizer = [
 672 |     {file = "charset-normalizer-2.0.8.tar.gz", hash = "sha256:735e240d9a8506778cd7a453d97e817e536bb1fc29f4f6961ce297b9c7a917b0"},
 673 |     {file = "charset_normalizer-2.0.8-py3-none-any.whl", hash = "sha256:83fcdeb225499d6344c8f7f34684c2981270beacc32ede2e669e94f7fa544405"},
 674 | ]
 675 | click = [
 676 |     {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"},
 677 |     {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"},
 678 | ]
 679 | colorama = [
 680 |     {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
 681 |     {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
 682 | ]
 683 | contextvars = [
 684 |     {file = "contextvars-2.4.tar.gz", hash = "sha256:f38c908aaa59c14335eeea12abea5f443646216c4e29380d7bf34d2018e2c39e"},
 685 | ]
 686 | cymem = [
 687 |     {file = "cymem-2.0.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2b4e27e739f09f16c7c0190f962ffe60dab39cb6a229d5c13e274d16f46a17e8"},
 688 |     {file = "cymem-2.0.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:971cf0a8437dfb4185c3049c086e463612fe849efadc0f5cc153fc81c501da7d"},
 689 |     {file = "cymem-2.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:6b0d1a6b0a1296f31fa9e4b7ae5ea49394084ecc883b1ae6fec4844403c43468"},
 690 |     {file = "cymem-2.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b8e1c18bb00800425576710468299153caad20c64ddb6819d40a6a34e21ee21c"},
 691 |     {file = "cymem-2.0.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:492084aef23ac2ff3da3729e9d36340bc91a96c2dc8c3a82a1926e384ab52412"},
 692 |     {file = "cymem-2.0.6-cp36-cp36m-win_amd64.whl", hash = "sha256:af3c01e6b20f9e6c07c7d7cdb7f710e49889d3906c9a3e039546ee6636a34b9a"},
 693 |     {file = "cymem-2.0.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d7a59cef8f2fa25d12e2c30138f8623acbd43ad2715e730a709e49c5eef8e1b0"},
 694 |     {file = "cymem-2.0.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd52d8a81881804625df88453611175ab7e0099b34f52204da1f6940cf2e83c9"},
 695 |     {file = "cymem-2.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:4749f220e4c06ec44eb10de13794ff0508cdc4f8eff656cf49cab2cdb3122c0c"},
 696 |     {file = "cymem-2.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2aa3fa467d906cd2c27fa0a2e2952dd7925f5fcc7973fab6d815ef6acb25aad8"},
 697 |     {file = "cymem-2.0.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea535f74ab6024e7416f93de564e5c81fb7c0964b96280de66f60aeb05f0cf53"},
 698 |     {file = "cymem-2.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:4f87fe087f2ae36c3e20e2b1a29d7f76a28c035372d0a97655f26223d975235a"},
 699 |     {file = "cymem-2.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a93fba62fe79dbf6fc4d5b6d804a6e114b44af3ff3d40a28833ee39f21bd336b"},
 700 |     {file = "cymem-2.0.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04676d696596b0db3f3c5a3936bab12fb6f24278921a6622bb185e61765b2b4d"},
 701 |     {file = "cymem-2.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:c59293b232b53ebb47427f16cf648e937022f489cff36c11d1d8a1f0075b6609"},
 702 |     {file = "cymem-2.0.6.tar.gz", hash = "sha256:169725b5816959d34de2545b33fee6a8021a6e08818794a426c5a4f981f17e5e"},
 703 | ]
 704 | dataclasses = [
 705 |     {file = "dataclasses-0.8-py3-none-any.whl", hash = "sha256:0201d89fa866f68c8ebd9d08ee6ff50c0b255f8ec63a71c16fda7af82bb887bf"},
 706 |     {file = "dataclasses-0.8.tar.gz", hash = "sha256:8479067f342acf957dc82ec415d355ab5edb7e7646b90dc6e2fd1d96ad084c97"},
 707 | ]
 708 | dill = [
 709 |     {file = "dill-0.2.7.1.tar.gz", hash = "sha256:97fd758f5fe742d42b11ec8318ecfcff8776bccacbfcec05dfd6276f5d450f73"},
 710 | ]
 711 | en-core-web-sm = []
 712 | future = [
 713 |     {file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"},
 714 | ]
 715 | idna = [
 716 |     {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
 717 |     {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
 718 | ]
 719 | immutables = [
 720 |     {file = "immutables-0.16-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:acbfa79d44228d96296279068441f980dc63dbed52522d9227ff9f4d96c6627e"},
 721 |     {file = "immutables-0.16-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29c9ed003eacb92e630ef200e31f47236c2139b39476894f7963b32bd39bafa3"},
 722 |     {file = "immutables-0.16-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a396314b9024fa55bf83a27813fd76cf9f27dce51f53b0f19b51de035146251"},
 723 |     {file = "immutables-0.16-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:4a2a71678348fb95b13ca108d447f559a754c41b47bd1e7e4fb23974e735682d"},
 724 |     {file = "immutables-0.16-cp36-cp36m-win32.whl", hash = "sha256:064001638ab5d36f6aa05b6101446f4a5793fb71e522bc81b8fc65a1894266ff"},
 725 |     {file = "immutables-0.16-cp36-cp36m-win_amd64.whl", hash = "sha256:1de393f1b188740ca7b38f946f2bbc7edf3910d2048f03bbb8d01f17a038d67c"},
 726 |     {file = "immutables-0.16-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fcf678a3074613119385a02a07c469ec5130559f5ea843c85a0840c80b5b71c6"},
 727 |     {file = "immutables-0.16-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a307eb0984eb43e815dcacea3ac50c11d00a936ecf694c46991cd5a23bcb0ec0"},
 728 |     {file = "immutables-0.16-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7a58825ff2254e2612c5a932174398a4ea8fbddd8a64a02c880cc32ee28b8820"},
 729 |     {file = "immutables-0.16-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:798b095381eb42cf40db6876339e7bed84093e5868018a9e73d8e1f7ab4bb21e"},
 730 |     {file = "immutables-0.16-cp37-cp37m-win32.whl", hash = "sha256:19bdede174847c2ef1292df0f23868ab3918b560febb09fcac6eec621bd4812b"},
 731 |     {file = "immutables-0.16-cp37-cp37m-win_amd64.whl", hash = "sha256:9ccf4c0e3e2e3237012b516c74c49de8872ccdf9129739f7a0b9d7444a8c4862"},
 732 |     {file = "immutables-0.16-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d59beef203a3765db72b1d0943547425c8318ecf7d64c451fd1e130b653c2fbb"},
 733 |     {file = "immutables-0.16-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0020aaa4010b136056c20a46ce53204e1407a9e4464246cb2cf95b90808d9161"},
 734 |     {file = "immutables-0.16-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edd9f67671555af1eb99ad3c7550238487dd7ac0ac5205b40204ed61c9a922ac"},
 735 |     {file = "immutables-0.16-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:298a301f85f307b4c056a0825eb30f060e64d73605e783289f3df37dd762bab8"},
 736 |     {file = "immutables-0.16-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b779617f5b94486bfd0f22162cd72eb5f2beb0214a14b75fdafb7b2c908ed0cb"},
 737 |     {file = "immutables-0.16-cp38-cp38-win32.whl", hash = "sha256:511c93d8b1bbbf103ff3f1f120c5a68a9866ce03dea6ac406537f93ca9b19139"},
 738 |     {file = "immutables-0.16-cp38-cp38-win_amd64.whl", hash = "sha256:b651b61c1af6cda2ee201450f2ffe048a5959bc88e43e6c312f4c93e69c9e929"},
 739 |     {file = "immutables-0.16-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:aa7bf572ae1e006104c584be70dc634849cf0dc62f42f4ee194774f97e7fd17d"},
 740 |     {file = "immutables-0.16-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:50793a44ba0d228ed8cad4d0925e00dfd62ea32f44ddee8854f8066447272d05"},
 741 |     {file = "immutables-0.16-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:799621dcdcdcbb2516546a40123b87bf88de75fe7459f7bd8144f079ace6ec3e"},
 742 |     {file = "immutables-0.16-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7bcf52aeb983bd803b7c6106eae1b2d9a0c7ab1241bc6b45e2174ba2b7283031"},
 743 |     {file = "immutables-0.16-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:734c269e82e5f307fb6e17945953b67659d1731e65309787b8f7ba267d1468f2"},
 744 |     {file = "immutables-0.16-cp39-cp39-win32.whl", hash = "sha256:a454d5d3fee4b7cc627345791eb2ca4b27fa3bbb062ccf362ecaaa51679a07ed"},
 745 |     {file = "immutables-0.16-cp39-cp39-win_amd64.whl", hash = "sha256:2505d93395d3f8ae4223e21465994c3bc6952015a38dc4f03cb3e07a2b8d8325"},
 746 |     {file = "immutables-0.16.tar.gz", hash = "sha256:d67e86859598eed0d926562da33325dac7767b7b1eff84e232c22abea19f4360"},
 747 | ]
 748 | importlib-metadata = [
 749 |     {file = "importlib_metadata-4.8.2-py3-none-any.whl", hash = "sha256:53ccfd5c134223e497627b9815d5030edf77d2ed573922f7a0b8f8bb81a1c100"},
 750 |     {file = "importlib_metadata-4.8.2.tar.gz", hash = "sha256:75bdec14c397f528724c1bfd9709d660b33a4d2e77387a3358f20b848bb5e5fb"},
 751 | ]
 752 | jinja2 = [
 753 |     {file = "Jinja2-3.0.3-py3-none-any.whl", hash = "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8"},
 754 |     {file = "Jinja2-3.0.3.tar.gz", hash = "sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"},
 755 | ]
 756 | jsonschema = [
 757 |     {file = "jsonschema-2.6.0-py2.py3-none-any.whl", hash = "sha256:000e68abd33c972a5248544925a0cae7d1125f9bf6c58280d37546b946769a08"},
 758 |     {file = "jsonschema-2.6.0.tar.gz", hash = "sha256:6ff5f3180870836cae40f06fa10419f557208175f13ad7bc26caa77beb1f6e02"},
 759 | ]
 760 | kenlm = []
 761 | lemminflect = [
 762 |     {file = "lemminflect-0.2.1-py3-none-any.whl", hash = "sha256:96dc0cf32aa1973a00deb369a413d032cf005ac9872a249283264d70b85a1da5"},
 763 |     {file = "lemminflect-0.2.1.tar.gz", hash = "sha256:46f439d8e8237efb429173c9f83d00038e9a4db3c668b436034c9ca783c35a53"},
 764 | ]
 765 | markupsafe = [
 766 |     {file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"},
 767 |     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff"},
 768 |     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b"},
 769 |     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94"},
 770 |     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"},
 771 |     {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f"},
 772 |     {file = "MarkupSafe-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d"},
 773 |     {file = "MarkupSafe-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9"},
 774 |     {file = "MarkupSafe-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567"},
 775 |     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18"},
 776 |     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f"},
 777 |     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f"},
 778 |     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2"},
 779 |     {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d"},
 780 |     {file = "MarkupSafe-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415"},
 781 |     {file = "MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914"},
 782 |     {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066"},
 783 |     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35"},
 784 |     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b"},
 785 |     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298"},
 786 |     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75"},
 787 |     {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb"},
 788 |     {file = "MarkupSafe-2.0.1-cp38-cp38-win32.whl", hash = "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64"},
 789 |     {file = "MarkupSafe-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833"},
 790 |     {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26"},
 791 |     {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7"},
 792 |     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8"},
 793 |     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5"},
 794 |     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135"},
 795 |     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902"},
 796 |     {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509"},
 797 |     {file = "MarkupSafe-2.0.1-cp39-cp39-win32.whl", hash = "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74"},
 798 |     {file = "MarkupSafe-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8"},
 799 |     {file = "MarkupSafe-2.0.1.tar.gz", hash = "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a"},
 800 | ]
 801 | more-itertools = [
 802 |     {file = "more-itertools-8.12.0.tar.gz", hash = "sha256:7dc6ad46f05f545f900dd59e8dfb4e84a4827b97b3cfecb175ea0c7d247f6064"},
 803 |     {file = "more_itertools-8.12.0-py3-none-any.whl", hash = "sha256:43e6dd9942dffd72661a2c4ef383ad7da1e6a3e968a927ad7a6083ab410a688b"},
 804 | ]
 805 | murmurhash = [
 806 |     {file = "murmurhash-1.0.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a814d559afe2a97ad40accf21ce96e8b04a3ff5a08f80c02b7acd427dbb7d567"},
 807 |     {file = "murmurhash-1.0.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c7b8cc4a8db1c821b80f8ca70a25c3166b14d68ecef8693a117c6a0b1d74ace"},
 808 |     {file = "murmurhash-1.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:e40790fdaf65213d70da4ed9229f16f6d6376310dc8fc23eacc98e6151c6ae7e"},
 809 |     {file = "murmurhash-1.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a78d53f047c3410ce4c589d9b47090f628f844ed5694418144e63cfe7f3da7e9"},
 810 |     {file = "murmurhash-1.0.6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d69cc0ffc0ef6d37399b8a0484a44f9877e531ebc164e55105e89738ed52089"},
 811 |     {file = "murmurhash-1.0.6-cp36-cp36m-win_amd64.whl", hash = "sha256:8de08d145c85bb7ba89cb1b591742e3ef54cede73e35f62752af687a4a1859f7"},
 812 |     {file = "murmurhash-1.0.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7dc5a79346afa07f14384926c335c0c455226d687d1305b9378264875b450e51"},
 813 |     {file = "murmurhash-1.0.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab326b172dc470331490bda516d4d6d7578c91445ad83a2a3418ac1b9c5f9f55"},
 814 |     {file = "murmurhash-1.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:2911bc3e8040dfaac536b141539b0351915f1439953f0aa9e957f082cff035a6"},
 815 |     {file = "murmurhash-1.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:de267459d040c96727ba141075d5bc983ec69c6f75b6df1b703e3b5cd7090382"},
 816 |     {file = "murmurhash-1.0.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90a8e06872015d6f9f66a42669e003a1df8be229defef69cd98546f4cb25546d"},
 817 |     {file = "murmurhash-1.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:773411eba268bf524c012e781f4405aacb9ef4edc063d1f6b38bbf06358b988e"},
 818 |     {file = "murmurhash-1.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f4ef3b26229ff192032a12653d637313e1231d23e788b83a2f4a3d8e2bf2d031"},
 819 |     {file = "murmurhash-1.0.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92cd7196974307143ce8e9e9b6e22e0a57abf30bdd5a1effe696b4825677e616"},
 820 |     {file = "murmurhash-1.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:cdd1036688341413e5adef32b3fd58e8b44f24405f394f90129f39ed879e4f24"},
 821 |     {file = "murmurhash-1.0.6.tar.gz", hash = "sha256:00a5252b569d3f914b5bd0bce72d2efe9c0fb91a9703556ea1b608b141c68f2d"},
 822 | ]
 823 | numpy = [
 824 |     {file = "numpy-1.19.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cc6bd4fd593cb261332568485e20a0712883cf631f6f5e8e86a52caa8b2b50ff"},
 825 |     {file = "numpy-1.19.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:aeb9ed923be74e659984e321f609b9ba54a48354bfd168d21a2b072ed1e833ea"},
 826 |     {file = "numpy-1.19.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:8b5e972b43c8fc27d56550b4120fe6257fdc15f9301914380b27f74856299fea"},
 827 |     {file = "numpy-1.19.5-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:43d4c81d5ffdff6bae58d66a3cd7f54a7acd9a0e7b18d97abb255defc09e3140"},
 828 |     {file = "numpy-1.19.5-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:a4646724fba402aa7504cd48b4b50e783296b5e10a524c7a6da62e4a8ac9698d"},
 829 |     {file = "numpy-1.19.5-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:2e55195bc1c6b705bfd8ad6f288b38b11b1af32f3c8289d6c50d47f950c12e76"},
 830 |     {file = "numpy-1.19.5-cp36-cp36m-win32.whl", hash = "sha256:39b70c19ec771805081578cc936bbe95336798b7edf4732ed102e7a43ec5c07a"},
 831 |     {file = "numpy-1.19.5-cp36-cp36m-win_amd64.whl", hash = "sha256:dbd18bcf4889b720ba13a27ec2f2aac1981bd41203b3a3b27ba7a33f88ae4827"},
 832 |     {file = "numpy-1.19.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:603aa0706be710eea8884af807b1b3bc9fb2e49b9f4da439e76000f3b3c6ff0f"},
 833 |     {file = "numpy-1.19.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:cae865b1cae1ec2663d8ea56ef6ff185bad091a5e33ebbadd98de2cfa3fa668f"},
 834 |     {file = "numpy-1.19.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:36674959eed6957e61f11c912f71e78857a8d0604171dfd9ce9ad5cbf41c511c"},
 835 |     {file = "numpy-1.19.5-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:06fab248a088e439402141ea04f0fffb203723148f6ee791e9c75b3e9e82f080"},
 836 |     {file = "numpy-1.19.5-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:6149a185cece5ee78d1d196938b2a8f9d09f5a5ebfbba66969302a778d5ddd1d"},
 837 |     {file = "numpy-1.19.5-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:50a4a0ad0111cc1b71fa32dedd05fa239f7fb5a43a40663269bb5dc7877cfd28"},
 838 |     {file = "numpy-1.19.5-cp37-cp37m-win32.whl", hash = "sha256:d051ec1c64b85ecc69531e1137bb9751c6830772ee5c1c426dbcfe98ef5788d7"},
 839 |     {file = "numpy-1.19.5-cp37-cp37m-win_amd64.whl", hash = "sha256:a12ff4c8ddfee61f90a1633a4c4afd3f7bcb32b11c52026c92a12e1325922d0d"},
 840 |     {file = "numpy-1.19.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cf2402002d3d9f91c8b01e66fbb436a4ed01c6498fffed0e4c7566da1d40ee1e"},
 841 |     {file = "numpy-1.19.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1ded4fce9cfaaf24e7a0ab51b7a87be9038ea1ace7f34b841fe3b6894c721d1c"},
 842 |     {file = "numpy-1.19.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:012426a41bc9ab63bb158635aecccc7610e3eff5d31d1eb43bc099debc979d94"},
 843 |     {file = "numpy-1.19.5-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:759e4095edc3c1b3ac031f34d9459fa781777a93ccc633a472a5468587a190ff"},
 844 |     {file = "numpy-1.19.5-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:a9d17f2be3b427fbb2bce61e596cf555d6f8a56c222bd2ca148baeeb5e5c783c"},
 845 |     {file = "numpy-1.19.5-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:99abf4f353c3d1a0c7a5f27699482c987cf663b1eac20db59b8c7b061eabd7fc"},
 846 |     {file = "numpy-1.19.5-cp38-cp38-win32.whl", hash = "sha256:384ec0463d1c2671170901994aeb6dce126de0a95ccc3976c43b0038a37329c2"},
 847 |     {file = "numpy-1.19.5-cp38-cp38-win_amd64.whl", hash = "sha256:811daee36a58dc79cf3d8bdd4a490e4277d0e4b7d103a001a4e73ddb48e7e6aa"},
 848 |     {file = "numpy-1.19.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c843b3f50d1ab7361ca4f0b3639bf691569493a56808a0b0c54a051d260b7dbd"},
 849 |     {file = "numpy-1.19.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:d6631f2e867676b13026e2846180e2c13c1e11289d67da08d71cacb2cd93d4aa"},
 850 |     {file = "numpy-1.19.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:7fb43004bce0ca31d8f13a6eb5e943fa73371381e53f7074ed21a4cb786c32f8"},
 851 |     {file = "numpy-1.19.5-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:2ea52bd92ab9f768cc64a4c3ef8f4b2580a17af0a5436f6126b08efbd1838371"},
 852 |     {file = "numpy-1.19.5-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:400580cbd3cff6ffa6293df2278c75aef2d58d8d93d3c5614cd67981dae68ceb"},
 853 |     {file = "numpy-1.19.5-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:df609c82f18c5b9f6cb97271f03315ff0dbe481a2a02e56aeb1b1a985ce38e60"},
 854 |     {file = "numpy-1.19.5-cp39-cp39-win32.whl", hash = "sha256:ab83f24d5c52d60dbc8cd0528759532736b56db58adaa7b5f1f76ad551416a1e"},
 855 |     {file = "numpy-1.19.5-cp39-cp39-win_amd64.whl", hash = "sha256:0eef32ca3132a48e43f6a0f5a82cb508f22ce5a3d6f67a8329c81c8e226d3f6e"},
 856 |     {file = "numpy-1.19.5-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:a0d53e51a6cb6f0d9082decb7a4cb6dfb33055308c4c44f53103c073f649af73"},
 857 |     {file = "numpy-1.19.5.zip", hash = "sha256:a76f502430dd98d7546e1ea2250a7360c065a5fdea52b2dffe8ae7180909b6f4"},
 858 | ]
 859 | packaging = [
 860 |     {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
 861 |     {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
 862 | ]
 863 | pathy = [
 864 |     {file = "pathy-0.6.1-py3-none-any.whl", hash = "sha256:25fd04cec6393661113086730ce69c789d121bea83ab1aa18452e8fd42faf29a"},
 865 |     {file = "pathy-0.6.1.tar.gz", hash = "sha256:838624441f799a06b446a657e4ecc9ebc3fdd05234397e044a7c87e8f6e76b1c"},
 866 | ]
 867 | pluggy = [
 868 |     {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"},
 869 |     {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"},
 870 | ]
 871 | preshed = [
 872 |     {file = "preshed-3.0.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a9683730127658b531120b4ed5cff1f2a567318ab75e9ab0f22cc84ae1486c23"},
 873 |     {file = "preshed-3.0.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c98f725d8478f3ade4ab1ea00f50a92d2d9406d37276bc46fd8bab1d47452c4"},
 874 |     {file = "preshed-3.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:ea8aa9610837e907e8442e79300df0a861bfdb4dcaf026a5d9642a688ad04815"},
 875 |     {file = "preshed-3.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e03ae3eee961106a517fcd827b5a7c51f7317236b3e665c989054ab8dc381d28"},
 876 |     {file = "preshed-3.0.6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58661bea8d0d63a648588511407285e43d43627e27f836e30819801fb3c75d70"},
 877 |     {file = "preshed-3.0.6-cp36-cp36m-win_amd64.whl", hash = "sha256:5f99837e7353ce1fa81f0074d4b15f36e0af5af60a2a54d4d11e13cb09768a9e"},
 878 |     {file = "preshed-3.0.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8c60a400babfc5b25ba371fda7041be227f7c625e1fb7a43329c2c08fe00a53b"},
 879 |     {file = "preshed-3.0.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61b2ea656cb1c38d544cc774f1c2ad1cdab23167b46b35310a7e211d4ba9c6d0"},
 880 |     {file = "preshed-3.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:87e1add41b7f6236a3ccc34788f47ab8682bc28e8a2d369089062e274494c1a0"},
 881 |     {file = "preshed-3.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a279c138ad1d5be02547b1545254929588414b01571fe637016367f6a1aa11de"},
 882 |     {file = "preshed-3.0.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3af09f4cfcdaca085fd87dac8107617c4e2bb0ad1458f953841b71e9728287f5"},
 883 |     {file = "preshed-3.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:f92e752a868ea2690e1b38c4b775251a145e0fce36b9bdd972539e8271b7a23a"},
 884 |     {file = "preshed-3.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eaffbc71fdb8625f9aac4fe7e19e20bf318d1421ea05903bebe3e6ffef27b587"},
 885 |     {file = "preshed-3.0.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfe1495fcfc7f479de840ddc4f426dbb55351e218ae5c8712c1269183a4d0060"},
 886 |     {file = "preshed-3.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:92a8f49d17a63537a8beed48a049b62ef168ca07e0042a5b2bcdf178a1fb5d48"},
 887 |     {file = "preshed-3.0.6.tar.gz", hash = "sha256:fb3b7588a3a0f2f2f1bf3fe403361b2b031212b73a37025aea1df7215af3772a"},
 888 | ]
 889 | py = [
 890 |     {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
 891 |     {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
 892 | ]
 893 | pydantic = [
 894 |     {file = "pydantic-1.7.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:3c60039e84552442defbcb5d56711ef0e057028ca7bfc559374917408a88d84e"},
 895 |     {file = "pydantic-1.7.4-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:6e7e314acb170e143c6f3912f93f2ec80a96aa2009ee681356b7ce20d57e5c62"},
 896 |     {file = "pydantic-1.7.4-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:8ef77cd17b73b5ba46788d040c0e820e49a2d80cfcd66fda3ba8be31094fd146"},
 897 |     {file = "pydantic-1.7.4-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:115d8aa6f257a1d469c66b6bfc7aaf04cd87c25095f24542065c68ebcb42fe63"},
 898 |     {file = "pydantic-1.7.4-cp36-cp36m-win_amd64.whl", hash = "sha256:66757d4e1eab69a3cfd3114480cc1d72b6dd847c4d30e676ae838c6740fdd146"},
 899 |     {file = "pydantic-1.7.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4c92863263e4bd89e4f9cf1ab70d918170c51bd96305fe7b00853d80660acb26"},
 900 |     {file = "pydantic-1.7.4-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:3b8154babf30a5e0fa3aa91f188356763749d9b30f7f211fafb247d4256d7877"},
 901 |     {file = "pydantic-1.7.4-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:80cc46378505f7ff202879dcffe4bfbf776c15675028f6e08d1d10bdfbb168ac"},
 902 |     {file = "pydantic-1.7.4-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:dda60d7878a5af2d8560c55c7c47a8908344aa78d32ec1c02d742ede09c534df"},
 903 |     {file = "pydantic-1.7.4-cp37-cp37m-win_amd64.whl", hash = "sha256:4c1979d5cc3e14b35f0825caddea5a243dd6085e2a7539c006bc46997ef7a61a"},
 904 |     {file = "pydantic-1.7.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8857576600c32aa488f18d30833aa833b54a48e3bab3adb6de97e463af71f8f8"},
 905 |     {file = "pydantic-1.7.4-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1f86d4da363badb39426a0ff494bf1d8510cd2f7274f460eee37bdbf2fd495ec"},
 906 |     {file = "pydantic-1.7.4-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:3ea1256a9e782149381e8200119f3e2edea7cd6b123f1c79ab4bbefe4d9ba2c9"},
 907 |     {file = "pydantic-1.7.4-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:e28455b42a0465a7bf2cde5eab530389226ce7dc779de28d17b8377245982b1e"},
 908 |     {file = "pydantic-1.7.4-cp38-cp38-win_amd64.whl", hash = "sha256:47c5b1d44934375a3311891cabd450c150a31cf5c22e84aa172967bf186718be"},
 909 |     {file = "pydantic-1.7.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:00250e5123dd0b123ff72be0e1b69140e0b0b9e404d15be3846b77c6f1b1e387"},
 910 |     {file = "pydantic-1.7.4-cp39-cp39-manylinux1_i686.whl", hash = "sha256:d24aa3f7f791a023888976b600f2f389d3713e4f23b7a4c88217d3fce61cdffc"},
 911 |     {file = "pydantic-1.7.4-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:2c44a9afd4c4c850885436a4209376857989aaf0853c7b118bb2e628d4b78c4e"},
 912 |     {file = "pydantic-1.7.4-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:e87edd753da0ca1d44e308a1b1034859ffeab1f4a4492276bff9e1c3230db4fe"},
 913 |     {file = "pydantic-1.7.4-cp39-cp39-win_amd64.whl", hash = "sha256:a3026ee105b5360855e500b4abf1a1d0b034d88e75a2d0d66a4c35e60858e15b"},
 914 |     {file = "pydantic-1.7.4-py3-none-any.whl", hash = "sha256:a82385c6d5a77e3387e94612e3e34b77e13c39ff1295c26e3ba664e7b98073e2"},
 915 |     {file = "pydantic-1.7.4.tar.gz", hash = "sha256:0a1abcbd525fbb52da58c813d54c2ec706c31a91afdb75411a73dd1dec036595"},
 916 | ]
 917 | pyfunctional = [
 918 |     {file = "PyFunctional-1.3.0-py2-none-any.whl", hash = "sha256:23ef891a3bd34e5e3fb7ccfccf22ddd68309f53367997d9acd61c8b153b99d11"},
 919 |     {file = "PyFunctional-1.3.0-py3-none-any.whl", hash = "sha256:e157b6a387523c64bfcca0e6e823c5c66fc3f9cad458b3cd9ec8be32a7d45cf2"},
 920 |     {file = "PyFunctional-1.3.0.tar.gz", hash = "sha256:d2b735c5bfb3b4d7977734e5e92d03f53389de6dc539c609f48c748b93e94fe0"},
 921 | ]
 922 | pyparsing = [
 923 |     {file = "pyparsing-3.0.6-py3-none-any.whl", hash = "sha256:04ff808a5b90911829c55c4e26f75fa5ca8a2f5f36aa3a51f68e27033341d3e4"},
 924 |     {file = "pyparsing-3.0.6.tar.gz", hash = "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81"},
 925 | ]
 926 | pytest = [
 927 |     {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"},
 928 |     {file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"},
 929 | ]
 930 | requests = [
 931 |     {file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"},
 932 |     {file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"},
 933 | ]
 934 | six = [
 935 |     {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
 936 |     {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
 937 | ]
 938 | smart-open = [
 939 |     {file = "smart_open-5.2.1-py3-none-any.whl", hash = "sha256:71d14489da58b60ce12fc3ecb823facc59a8b23cd1b58edb97175640350d3a62"},
 940 |     {file = "smart_open-5.2.1.tar.gz", hash = "sha256:75abf758717a92a8f53aa96953f0c245c8cedf8e1e4184903db3659b419d4c17"},
 941 | ]
 942 | spacy = [
 943 |     {file = "spacy-3.0.7-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:bd0ecec5a9c86c9b8c24f82d2e71cd7d0d5bc71e4aa79f945e1e6e6860e28b85"},
 944 |     {file = "spacy-3.0.7-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58588a5f4afb49d42843ef7c6a357227ea9f6f8af6330f4e9e9a6cfa0ea65493"},
 945 |     {file = "spacy-3.0.7-cp36-cp36m-win_amd64.whl", hash = "sha256:70053c65f36c89ea367b3f43df5d04540c1cbe54ba5d36e384b43a01b371aa87"},
 946 |     {file = "spacy-3.0.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c441e924b9fae7db6dfdf75547c8ac0b8a91ad89dd9911f2b3a55bfa9cd45fcd"},
 947 |     {file = "spacy-3.0.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11e68ebe122452c49e500c5d3d3fd2a6e68c0bb97e309fda685f06535c934843"},
 948 |     {file = "spacy-3.0.7-cp37-cp37m-win_amd64.whl", hash = "sha256:1771bf6fa93d505a763b314a70f1fe7ea21070d29097ae9afb0ee82e7fd84a23"},
 949 |     {file = "spacy-3.0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2a8724a6b64f05fcb39dda0567b6bfe0925e62fe13f0fe23df0cf0559d818b72"},
 950 |     {file = "spacy-3.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:249df343923a8751a138ef0954dd4544be1c00eb5e0d551d72e3a8aa37e5c39d"},
 951 |     {file = "spacy-3.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:77399e1db1fc7cffdf0e7384011efdb10d799663fcdbb32ee63b9113ad93041d"},
 952 |     {file = "spacy-3.0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:84af984d2f66a2c87a0475657929fd442a25b12e439a18a81a09facba0f0d2d6"},
 953 |     {file = "spacy-3.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebe4f65e161e1445ec7a9209172868bb613214ae8476d53802aaf8ffd5bf8fda"},
 954 |     {file = "spacy-3.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:3ae599df111c91a609e79483fc15fd2561c045ff67d5e637ce93951430337abf"},
 955 |     {file = "spacy-3.0.7.tar.gz", hash = "sha256:f49c903d4a04598c080bc0b31e666522d9ba340d67ca8ce0ab96f4578afd597f"},
 956 | ]
 957 | spacy-legacy = [
 958 |     {file = "spacy-legacy-3.0.8.tar.gz", hash = "sha256:b4725c5c161f0685ab4fce3fc912bc68aefdb7e102ba9848e852bb5842256c2f"},
 959 |     {file = "spacy_legacy-3.0.8-py2.py3-none-any.whl", hash = "sha256:eb37a3540bb461b5fe9348d4976784f18a0e345982e41e2c5c7cd8229889e825"},
 960 | ]
 961 | srsly = [
 962 |     {file = "srsly-2.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5e22bbc1a20abf749fa53adf101c36bc369ec63f496c7a44bf4f5f287d724900"},
 963 |     {file = "srsly-2.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:004d29a5abc0fe632434359c0be170490a69c4dce2c3de8a769944c37da7bb4b"},
 964 |     {file = "srsly-2.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7ced7ec4993b4d4ad73cc442f8f7a518368348054d510864b1aa149e8d71654d"},
 965 |     {file = "srsly-2.4.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:801c7e6e32c6a4721ab78ab7dafd01074fdb144f4876c09b25305c98f95c470f"},
 966 |     {file = "srsly-2.4.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff042c5c3cc1eecd7cbb0a218975a7fd7f331a7f0a3f2e19eb0d6192a98bfdf7"},
 967 |     {file = "srsly-2.4.2-cp36-cp36m-win_amd64.whl", hash = "sha256:11b99f16a95fac43905bc31a4705b80ca8a23f201a5cb611a278e3b2d83c6175"},
 968 |     {file = "srsly-2.4.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0df68c021ed3f481a5b2e408b57dc40caac66d36b17ef5235b14e9e6a2e24d68"},
 969 |     {file = "srsly-2.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d590856db1e639b92c1a78b0cc1fe0d9436dd49037c9961bce959af5d7f66755"},
 970 |     {file = "srsly-2.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:589118f912125742414125b7d671610bf2fe11382e79f1df8ec9324a915a3a18"},
 971 |     {file = "srsly-2.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cadf13096c7157212c53c0a1af868eececf54e86ffb4e0429dff05d1b9bc423a"},
 972 |     {file = "srsly-2.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6259e9904ceb4802bcd4ce1114958ebdc30b756a87b94b0949a57ffd4f63421b"},
 973 |     {file = "srsly-2.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:a2e8ee5f3a2a3a816b1d3d989d1b343d77900fa6b84e11c9fc1ac202d1a5dd17"},
 974 |     {file = "srsly-2.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f7e16f2a34d2d8ac6c6e1691f54ce27a5b4feb923207a9e294496458b98b0510"},
 975 |     {file = "srsly-2.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4bc36962208810d29c72156e0573dcbabd9914f42fede42217ccfcadd96beb6"},
 976 |     {file = "srsly-2.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:090072830cf2d5bd6765705a02463f586db8a586805d1c31a72080f971d311b5"},
 977 |     {file = "srsly-2.4.2.tar.gz", hash = "sha256:2aba252292767875086adf4e4380e27b024d73655456f796f8e07eb3a4dfacc0"},
 978 | ]
 979 | tabulate = [
 980 |     {file = "tabulate-0.8.9-py3-none-any.whl", hash = "sha256:d7c013fe7abbc5e491394e10fa845f8f32fe54f8dc60c6622c6cf482d25d47e4"},
 981 |     {file = "tabulate-0.8.9.tar.gz", hash = "sha256:eb1d13f25760052e8931f2ef80aaf6045a6cceb47514db8beab24cded16f13a7"},
 982 | ]
 983 | thinc = [
 984 |     {file = "thinc-8.0.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f818b9f012169a11beb3561c43dc52080588e50cf495733e492efab8b9b4135e"},
 985 |     {file = "thinc-8.0.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f520daf45b7f42a04363852df43be1b423ae42d9327709d74f6c3279b3f73778"},
 986 |     {file = "thinc-8.0.13-cp310-cp310-win_amd64.whl", hash = "sha256:2b217059c9e126220b77e7d6c9da56912c4e1eb4e8a11af14f17752e198e88cc"},
 987 |     {file = "thinc-8.0.13-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0f956c693d180209075703072fd226a24408cbe80eb67bd3b6eea407f61cb283"},
 988 |     {file = "thinc-8.0.13-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a17d87469082b82c27b7d40dd86c793fc34c60f734209ee056cb02d7609f255b"},
 989 |     {file = "thinc-8.0.13-cp36-cp36m-win_amd64.whl", hash = "sha256:27ea64843d6af0f3de8c788ec2a00598a1e5b4d57aadb52845fa42e95e4038c2"},
 990 |     {file = "thinc-8.0.13-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f274bcaa781aaf1dba5eac7da7d88d9b0cb8c2fd7477647f0ca9d3221dfb958"},
 991 |     {file = "thinc-8.0.13-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d52a5621e1784af5c64af4cfa9b2924358ca07aafd99014c57a736cf032e42f7"},
 992 |     {file = "thinc-8.0.13-cp37-cp37m-win_amd64.whl", hash = "sha256:753f65e07860553551ed8806b934a74f26a4a50985d556ecd5c4ab50c29b3222"},
 993 |     {file = "thinc-8.0.13-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ffe0a4d74f2ba2819193a5d9179156256f44c69255d7ae286ce1861efcefbc64"},
 994 |     {file = "thinc-8.0.13-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b61f78f6f116d23438b034c3552804c9767c4165960b1d7e48f07b2e9a95afb0"},
 995 |     {file = "thinc-8.0.13-cp38-cp38-win_amd64.whl", hash = "sha256:ba576af211ad2b00af78ab3e24e689289b29af8a9e51619ad55fab86871d8652"},
 996 |     {file = "thinc-8.0.13-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:def8e96eddb5a098d07dcf8752266095e14a6cf5d056ff766e2cdc542eb63f02"},
 997 |     {file = "thinc-8.0.13-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce322b66053819654d0444877154a08ed01cf5b45c6b3c9763e59b78af4f6039"},
 998 |     {file = "thinc-8.0.13-cp39-cp39-win_amd64.whl", hash = "sha256:b3ae088f60d3dfe6a88c6be37548aae40023e46a718cffe3e43953b4f0ffc340"},
 999 |     {file = "thinc-8.0.13.tar.gz", hash = "sha256:47662a3ae33d445a77b6ea7b772444805c7bba8991f122e350daf72dedc8171a"},
1000 | ]
1001 | tqdm = [
1002 |     {file = "tqdm-4.62.3-py2.py3-none-any.whl", hash = "sha256:8dd278a422499cd6b727e6ae4061c40b48fce8b76d1ccbf5d34fca9b7f925b0c"},
1003 |     {file = "tqdm-4.62.3.tar.gz", hash = "sha256:d359de7217506c9851b7869f3708d8ee53ed70a1b8edbba4dbcb47442592920d"},
1004 | ]
1005 | typer = [
1006 |     {file = "typer-0.3.2-py3-none-any.whl", hash = "sha256:ba58b920ce851b12a2d790143009fa00ac1d05b3ff3257061ff69dbdfc3d161b"},
1007 |     {file = "typer-0.3.2.tar.gz", hash = "sha256:5455d750122cff96745b0dec87368f56d023725a7ebc9d2e54dd23dc86816303"},
1008 | ]
1009 | typing-extensions = [
1010 |     {file = "typing_extensions-3.10.0.2-py2-none-any.whl", hash = "sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7"},
1011 |     {file = "typing_extensions-3.10.0.2-py3-none-any.whl", hash = "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34"},
1012 |     {file = "typing_extensions-3.10.0.2.tar.gz", hash = "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e"},
1013 | ]
1014 | urllib3 = [
1015 |     {file = "urllib3-1.26.7-py2.py3-none-any.whl", hash = "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"},
1016 |     {file = "urllib3-1.26.7.tar.gz", hash = "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece"},
1017 | ]
1018 | wasabi = [
1019 |     {file = "wasabi-0.8.2-py3-none-any.whl", hash = "sha256:a493e09d86109ec6d9e70d040472f9facc44634d4ae6327182f94091ca73a490"},
1020 |     {file = "wasabi-0.8.2.tar.gz", hash = "sha256:b4a36aaa9ca3a151f0c558f269d442afbb3526f0160fd541acd8a0d5e5712054"},
1021 | ]
1022 | wcwidth = [
1023 |     {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"},
1024 |     {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"},
1025 | ]
1026 | zipp = [
1027 |     {file = "zipp-3.6.0-py3-none-any.whl", hash = "sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"},
1028 |     {file = "zipp-3.6.0.tar.gz", hash = "sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832"},
1029 | ]
1030 | 


--------------------------------------------------------------------------------