├── VERSION ├── MANIFEST.in ├── docs ├── replacy_ex.png └── replacy_logo.png ├── replacy ├── version.py ├── resources │ ├── forms_lookup.json │ ├── test.arpa │ ├── match_dict_schema.json │ ├── match_dict.json │ └── patterns_test_data.json ├── filter_spans_by_cat.py ├── ref_matcher.py ├── default_scorer.py ├── suggestion_joiner.py ├── filter_0distance.py ├── test_helper.py ├── db.py ├── scorer.py ├── inflector.py ├── util.py ├── suggestion.py ├── __init__.py └── default_match_hooks.py ├── pytest.ini ├── tests ├── test_replacy.py ├── resources_test.py ├── test_hooks.py ├── test_scorer.py ├── test_multiple_whitespaces.py ├── test_ref_matcher.py ├── test_custom_props.py ├── test_pipeline.py ├── test_suggestions.py ├── test_inflector.py └── test_max_count.py ├── test.py ├── .github ├── workflows │ ├── pub.yaml │ └── main.yml └── pull_request_template.md ├── pyproject.toml ├── LICENSE.md ├── CHANGELOG.md ├── .gitignore ├── setup.py ├── README.md └── poetry.lock /VERSION: -------------------------------------------------------------------------------- 1 | 3.1.0 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include VERSION 2 | recursive-include replacy/resources * -------------------------------------------------------------------------------- /docs/replacy_ex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/writer/replaCy/HEAD/docs/replacy_ex.png -------------------------------------------------------------------------------- /docs/replacy_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/writer/replaCy/HEAD/docs/replacy_logo.png -------------------------------------------------------------------------------- /replacy/version.py: -------------------------------------------------------------------------------- 1 | # CHANGES HERE HAVE NO EFFECT: ../VERSION is the source of truth 2 | __version__ = "3.1.0" -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | error 4 | ignore::DeprecationWarning 5 | ignore::ImportWarning -------------------------------------------------------------------------------- /tests/test_replacy.py: -------------------------------------------------------------------------------- 1 | from replacy.test_helper import MatchDictTestHelper 2 | 3 | if __name__ == '__main__': 4 | test = MatchDictTestHelper() 5 | test.run() 6 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | """ 2 | Because of the automatic file discovery that happens in QAI, 3 | PYTHONPATH is wrong if you `python -m pytest` 4 | so run `python test.py` with test.py in the root and then it works 5 | """ 6 | 7 | import pytest 8 | 9 | pytest.main() -------------------------------------------------------------------------------- /replacy/resources/forms_lookup.json: -------------------------------------------------------------------------------- 1 | { 2 | "exact": { 3 | "VB": "exact", 4 | "VBP": "exact", 5 | "VBD": "exacted", 6 | "VBN": "exacted", 7 | "VBG": "exacting", 8 | "VBZ": "exacts" 9 | } 10 | } -------------------------------------------------------------------------------- /.github/workflows/pub.yaml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPi 2 | on: 3 | push: 4 | branches: 5 | - master 6 | jobs: 7 | publish: 8 | name: Build and publish to PyPi 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Build and publish to pypi 13 | uses: JRubics/poetry-publish@v1.9 14 | with: 15 | python_version: "3.8" 16 | pypi_token: ${{ secrets.PYPI_TOKEN }} 17 | -------------------------------------------------------------------------------- /tests/resources_test.py: -------------------------------------------------------------------------------- 1 | import json 2 | from replacy import ReplaceMatcher 3 | from replacy.db import get_match_dict 4 | 5 | with open("replacy/resources/match_dict.json", "r") as f: 6 | rules = json.load(f) 7 | 8 | 9 | def test_file_exists(): 10 | assert rules is not None 11 | 12 | 13 | # spacy 3 requires a new schema 14 | # def test_valid_format(): 15 | # match_dict = get_match_dict() 16 | # ReplaceMatcher.validate_match_dict(match_dict) 17 | -------------------------------------------------------------------------------- /replacy/filter_spans_by_cat.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from replacy import ESpan 4 | from spacy.util import filter_spans 5 | 6 | 7 | def filter_spans_by_cat(spans: List[ESpan]) -> List[ESpan]: 8 | if len(spans): 9 | subcats = set(map(lambda c: c.subcategory, spans)) 10 | grouped_spans = [[y for y in spans if y.subcategory == c] for c in subcats] 11 | filtered_spans = [] 12 | for group in grouped_spans: 13 | filtered_spans += filter_spans(group) 14 | return filtered_spans 15 | return spans 16 | -------------------------------------------------------------------------------- /replacy/ref_matcher.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from spacy.matcher import Matcher 4 | 5 | 6 | class RefMatcher: 7 | def __call__(self, span, orig_pattern, alignments): 8 | # not all parameters are needed, adding it to have same signature as RefMatcher 9 | pattern_indexes = set(alignments) 10 | return { 11 | pattern_idx: [ 12 | span_token_idx 13 | for span_token_idx, pattern_index in enumerate(alignments) 14 | if pattern_index == pattern_idx 15 | ] 16 | for pattern_idx in pattern_indexes 17 | } 18 | -------------------------------------------------------------------------------- /replacy/default_scorer.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from spacy.tokens import Span 4 | 5 | 6 | class Scorer: 7 | def __init__(self): 8 | pass 9 | 10 | def __call__(self, text): 11 | """Please override this""" 12 | return 0.5 13 | 14 | def score_suggestion(self, doc, span, suggestion): 15 | """Please override this""" 16 | text = " ".join([doc[: span.start].text] + suggestion + [doc[span.end :].text]) 17 | return self(text) 18 | 19 | def sort_suggestions(self, spans: List[Span]) -> List[Span]: 20 | """Please override this""" 21 | return spans 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "replaCy" 3 | version = "3.4.0" 4 | description = "ReplaCy = spaCy Matcher + pyInflect. Create rules, correct sentences." 5 | authors = [ 6 | "melisa-writer ", 7 | "sam-writer ", 8 | "manhal-daaboul " 9 | ] 10 | readme = "README.md" 11 | license = "MIT" 12 | 13 | [tool.poetry.dependencies] 14 | python = "^3.6" 15 | jsonschema = "^2.6.0" 16 | lemminflect = "0.2.1" 17 | pyfunctional = "^1.2.0" 18 | 19 | [tool.poetry.dev-dependencies] 20 | pytest = "^5.3.2" 21 | spacy= "^3.0.6" 22 | en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz" } 23 | kenlm = { git = "https://github.com/kpu/kenlm", rev = "master" } -------------------------------------------------------------------------------- /replacy/suggestion_joiner.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from spacy.tokens import Span 4 | 5 | 6 | def join_suggestions(spans: List[Span]) -> List[Span]: 7 | for span in spans: 8 | suggestions_separator = span.suggestions_separator if span.has_extension('suggestions_separator') else " " 9 | suggestions: List[str] = [] 10 | for s in span._.suggestions: 11 | # in case of two exactly overlapping spans 12 | # some of suggestions could be already processed 13 | # this could cause problems 14 | # this should be handled by early span filtering 15 | try: 16 | suggestions += [suggestions_separator.join([t.text for t in s])] 17 | except AttributeError: 18 | suggestions.append(s) 19 | 20 | span._.suggestions = suggestions 21 | return spans 22 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | on: push 3 | jobs: 4 | lint: 5 | name: Lint with Black 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: "lgeiger/black-action@master" 9 | with: 10 | args: ". --check" 11 | pytest: 12 | name: pytest 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: [3.8] 17 | steps: 18 | - uses: actions/checkout@master 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: abatilo/actions-poetry@v1.5.0 21 | with: 22 | python_version: ${{ matrix.python-version }} 23 | poetry_version: 1.1.8 24 | args: install 25 | - name: Run pytest 26 | uses: abatilo/actions-poetry@v1.5.0 27 | with: 28 | python_version: ${{ matrix.python-version }} 29 | poetry_version: 1.1.8 30 | args: run python -m pytest -------------------------------------------------------------------------------- /tests/test_hooks.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import spacy 3 | 4 | from replacy import default_match_hooks 5 | from replacy.db import get_patterns_test_data 6 | 7 | nlp = spacy.load("en_core_web_sm") 8 | 9 | examples_list = get_patterns_test_data() 10 | 11 | 12 | @pytest.mark.parametrize("example", examples_list) 13 | def test_custom_patterns(example): 14 | 15 | hook_name = example["hook_name"] 16 | 17 | if example.get("args", False): 18 | hook = getattr(default_match_hooks, hook_name)(example["args"]) 19 | elif example.get("kwargs", False): 20 | hook = getattr(default_match_hooks, hook_name)(**example["kwargs"]) 21 | else: 22 | hook = getattr(default_match_hooks, hook_name)() 23 | 24 | doc = nlp(example["text"]) 25 | start = example["start"] 26 | end = example["end"] 27 | 28 | assert hook(doc, start, end) == example["result"], f"{hook_name} should work" + str(example["result"]) 29 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # PR for replaCy 2 | 3 | ## PR Author 4 | 5 | ### Type of change 6 | 7 | 8 | - [ ] Bug fix (non-breaking change which fixes an issue) 9 | - [ ] New feature (non-breaking change which adds functionality) 10 | - [ ] Breaking change (fix or feature that would cause existing functionality to change) 11 | 12 | ### Reminders 13 | 14 | - [ ] I incremented the version appropriately (now in `pyproject.toml`). 15 | - [ ] I added tests to cover my changes. 16 | - [ ] I tested my changes with a replaCy-based service to confirm my changes don't break it. 17 | - [ ] If my changes require documentation updates, I updated the documentation 18 | 19 | ---- 20 | 21 | ## PR Reviewer 22 | 23 | Confirm that they indeed did everything above in the `reminders` section! Especially important is that they checked this with downstream services and updated the documentation. 24 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2020 Qordoba, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /replacy/filter_0distance.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from replacy import ESpan 4 | 5 | 6 | def filter_0distance(spans: List[ESpan]) -> List[ESpan]: 7 | filtered_spans = [] 8 | for span in spans: 9 | if len(span.suggestions): 10 | suggestions = [] 11 | for suggestion in span.suggestions: 12 | if (span.doc[span.start:span.end].text) == suggestion: 13 | continue 14 | suggestions.append(suggestion) 15 | 16 | if len(suggestions): 17 | span.suggestions = suggestions 18 | filtered_spans.append(span) 19 | else: 20 | filtered_spans.append(span) 21 | return filtered_spans 22 | 23 | 24 | def filter_0distance_with_line_break(spans: List[ESpan]) -> List[ESpan]: 25 | filtered_spans = [] 26 | for span in spans: 27 | if len(span.suggestions): 28 | span_text = span.doc[span.start:span.end].text.rstrip(" \r\n") 29 | suggestions = [] 30 | for suggestion in span.suggestions: 31 | if span_text == suggestion: 32 | continue 33 | suggestions.append(suggestion) 34 | 35 | if len(suggestions): 36 | span.suggestions = suggestions 37 | filtered_spans.append(span) 38 | else: 39 | filtered_spans.append(span) 40 | return filtered_spans -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (sort of. It's early days, and there may be some breaking changes released under a minor version increment). 7 | 8 | ## [0.35.0] - 2020-07-02 9 | 10 | ### Fixed 11 | 12 | - Only import kenlm if asked 13 | 14 | ## [0.31.0] - 2020-06-23 15 | 16 | - Oops forgot to update this for quite a while. See the README for these changes. Will try to add this updating to the CI/CD... one day. 17 | 18 | ## [0.5.0] - 2020-01-02 19 | 20 | ### Changed 21 | 22 | - updated `requirements-dev.txt` to have all needed requirements for development 23 | 24 | - `replacy/db.py:get_forms_lookup` and `replacy/db.py:get_match_dict` now each accept one parameter - the path to the resource they will load. The default value of this parameter is the value that was previously hardcoded. 25 | 26 | - `replacy/__init__.py:ReplaceMatcher.__init__` now does not require a `match_dict` to be passed in as the second parameter. If no `match_dict` is passed, it will load one by calling `replacy/db.py:get_match_dict()` (with no parameter, so it will look in the default location). 27 | 28 | ## [0.4.0] - 2019-12-UNK 29 | 30 | ### UNK 31 | 32 | ## [0.1.0 - 0.3.0] - 2019-12-18 33 | 34 | ### First 35 | 36 | - first pypi release 37 | -------------------------------------------------------------------------------- /tests/test_scorer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import spacy 3 | 4 | from replacy import ReplaceMatcher 5 | from replacy.db import get_match_dict 6 | 7 | nlp = spacy.load("en_core_web_sm") 8 | lm_path = "replacy/resources/test.arpa" 9 | 10 | match_dict = get_match_dict() 11 | r_matcher = ReplaceMatcher(nlp, match_dict, lm_path=lm_path) 12 | 13 | dumb_matcher = ReplaceMatcher(nlp, match_dict, lm_path=None) 14 | 15 | test_examples = [ 16 | { 17 | "sent": "This x a sentence.", 18 | "span_start": 1, 19 | "span_end": 2, 20 | "suggestions": ["are", "were", "is"], 21 | "best_suggestion": "is", 22 | }, 23 | { 24 | "sent": "This is x sentence.", 25 | "span_start": 2, 26 | "span_end": 3, 27 | "suggestions": ["two", "a", "cat"], 28 | "best_suggestion": "a", 29 | }, 30 | { 31 | "sent": "This is a sentences.", 32 | "span_start": 3, 33 | "span_end": 4, 34 | "suggestions": ["sentence", "sentences", "dogs"], 35 | "best_suggestion": "sentence", 36 | }, 37 | ] 38 | 39 | 40 | @pytest.mark.parametrize("example", test_examples) 41 | def test_scorer(example): 42 | doc = nlp(example["sent"]) 43 | span = doc[example["span_start"] : example["span_end"]] 44 | span._.suggestions = example["suggestions"] 45 | 46 | sorted_suggestions = sorted( 47 | span._.suggestions, 48 | key=lambda x: r_matcher.scorer.score_suggestion(doc, span, [x]), 49 | ) 50 | best_suggestion = sorted_suggestions[0] 51 | assert example["best_suggestion"] == best_suggestion 52 | -------------------------------------------------------------------------------- /tests/test_multiple_whitespaces.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import spacy 3 | from replacy import ReplaceMatcher 4 | 5 | nlp = spacy.load("en_core_web_sm") 6 | 7 | # minimal match dict with many whitespaces 8 | match_dict = { 9 | "extract-revenge": { 10 | "patterns": [[{"LEMMA": "extract", "TEMPLATE_ID": 1}]], 11 | "suggestions": [[{"TEXT": "exact", "FROM_TEMPLATE_ID": 1}]], 12 | "match_hook": [ 13 | { 14 | "name": "succeeded_by_phrase", 15 | "args": "revenge", 16 | "match_if_predicate_is": True, 17 | } 18 | ], 19 | "test": { 20 | "positive": [ 21 | "And at the same time extract revenge on those he so despises?", # 0 22 | "Watch as Tampa Bay extracts revenge against his former Los Angeles Rams team.", # 1 23 | "In fact, the farmer was so mean to this young man he determined to extract revenge.", # 2 24 | "And at the same time extract revenge on the whites he so despises?", # 10 sic 25 | ], 26 | "negative": ["Mother flavours her custards with lemon extract."], 27 | }, 28 | } 29 | } 30 | 31 | r_matcher = ReplaceMatcher(nlp, match_dict, allow_multiple_whitespaces=True) 32 | 33 | 34 | def test_multiple_whites(): 35 | sents = match_dict["extract-revenge"]["test"]["positive"] 36 | for sent in sents: 37 | assert len(r_matcher(sent)), "Should correct with multiple whitespaces" 38 | 39 | suggestion = r_matcher(sent)[0].text.strip() 40 | assert "extract" in suggestion, "Should correct with multiple whitespaces" 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | .DS_Store 4 | 5 | # Checkpoints and models: 6 | *.pt 7 | *.bin 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | conf/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # SageMath parsed files 91 | *.sage.py 92 | 93 | # Environments 94 | .env 95 | .venv 96 | env/ 97 | venv/ 98 | ENV/ 99 | env.bak/ 100 | venv.bak/ 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | .spyproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | 109 | # mkdocs documentation 110 | /site 111 | 112 | # mypy 113 | .mypy_cache/ 114 | -------------------------------------------------------------------------------- /replacy/test_helper.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from typing import Any, Dict, List, Tuple 3 | 4 | import spacy 5 | 6 | from replacy import ReplaceMatcher 7 | from replacy.db import get_match_dict 8 | 9 | 10 | class MatchDictTestHelper(unittest.TestCase): 11 | 12 | @staticmethod 13 | def generate_cases(match_dict: Dict[str, Any]) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]: 14 | positives: List[Tuple[str, str]] = [] 15 | negatives: List[Tuple[str, str]] = [] 16 | for rule_name in match_dict: 17 | test_set = match_dict[rule_name]["test"] 18 | positive_cases = test_set["positive"] 19 | negative_cases = test_set["negative"] 20 | for positive_sent in positive_cases: 21 | positives.append((rule_name, positive_sent)) 22 | for negative_sent in negative_cases: 23 | negatives.append((rule_name, negative_sent)) 24 | return positives, negatives 25 | 26 | @classmethod 27 | def setUpClass(cls): 28 | nlp = spacy.load("en_core_web_sm") 29 | match_dict = get_match_dict() 30 | cls.r_matcher = ReplaceMatcher(nlp, match_dict) 31 | cls.positive_cases, cls.negative_cases = MatchDictTestHelper.generate_cases(match_dict) 32 | 33 | def test_positive(self): 34 | for (match_name, positive_sent) in self.positive_cases: 35 | spans = self.r_matcher(positive_sent) 36 | spans_from_this_rule = list(filter(lambda s: s._.match_name == match_name, spans)) 37 | print(match_name, positive_sent) 38 | assert len(spans_from_this_rule) > 0, "Positive case should trigger rule" 39 | 40 | def test_negative(self): 41 | for (match_name, negative_sent) in self.negative_cases: 42 | spans = self.r_matcher(negative_sent) 43 | spans_from_this_rule = list(filter(lambda s: s._.match_name == match_name, spans)) 44 | print(match_name, negative_sent) 45 | assert len(spans_from_this_rule) == 0, "Negative case should NOT trigger rule" 46 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | 4 | from setuptools import setup, find_packages 5 | from setuptools.command.install import install 6 | 7 | here = os.path.abspath(os.path.dirname(__file__)) 8 | 9 | with open(os.path.join(here, "README.md"), encoding="utf-8") as f: 10 | long_description = f.read() 11 | 12 | with open(os.path.join(here, "VERSION"), encoding="utf-8") as f: 13 | __version__ = f.read().strip() 14 | with open(os.path.join(here, "replacy", "version.py"), "w+", encoding="utf-8") as v: 15 | v.write("# CHANGES HERE HAVE NO EFFECT: ../VERSION is the source of truth\n") 16 | v.write(f'__version__ = "{__version__}"') 17 | """ 18 | requirementPath = os.path.abspath("./requirements.txt") 19 | install_requires: List[str] = [] 20 | if os.path.isfile(requirementPath): 21 | with open(requirementPath) as f: 22 | install_requires = f.read().splitlines() 23 | """ 24 | setup( 25 | name="replacy", 26 | description="ReplaCy = spaCy Matcher + pyInflect. Create rules, correct sentences.", 27 | packages=find_packages(), 28 | package_data={"replacy": ["resources/*"]}, 29 | include_package_data=True, 30 | author="Qordoba", 31 | author_email="Sam Havens , Melisa Stal ", 32 | url="https://github.com/Qordobacode/replaCy", 33 | version=__version__, 34 | license="MIT", 35 | long_description=long_description, 36 | long_description_content_type="text/markdown", 37 | install_requires=["pyfunctional>=1.2.0", "jsonschema>=2.6.0", "lemminflect==0.2.1"], 38 | python_requires=">=3.5", 39 | classifiers=[ 40 | "Development Status :: 3 - Alpha", 41 | "Intended Audience :: Developers", 42 | "Intended Audience :: Science/Research", 43 | "License :: OSI Approved :: MIT License", 44 | "Natural Language :: English", 45 | "Programming Language :: Python :: 3.5", 46 | "Programming Language :: Python :: 3.6", 47 | "Programming Language :: Python :: 3.7", 48 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 49 | "Topic :: Text Processing :: Linguistic", 50 | "Typing :: Typed", 51 | ], 52 | ) 53 | -------------------------------------------------------------------------------- /replacy/db.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import Any, Dict, List, Union 4 | 5 | here = os.path.abspath(os.path.dirname(__file__)) 6 | 7 | 8 | def _load_list(paths: List[str]) -> dict: 9 | content: Dict[str, Any] = {} 10 | for p in paths: 11 | with open(p) as h: 12 | t = json.load(h) 13 | content.update(t) 14 | return content 15 | 16 | 17 | def load_json(path_or_dir: Union[str, List[str]]) -> dict: 18 | path_error = ( 19 | "replacy.db.load_json expects a valid path to a json file, " 20 | "a list of (valid) paths to json files, " 21 | "or the (valid) path to a directory with json files" 22 | f", but received {path_or_dir}" 23 | ) 24 | if type(path_or_dir) == str: 25 | json_path = str(path_or_dir) # make mypy happy 26 | if ( 27 | os.path.exists(json_path) 28 | and os.path.isfile(json_path) 29 | and json_path[-5:] == ".json" 30 | ): 31 | with open(json_path) as h: 32 | content = json.load(h) 33 | elif os.path.isdir(json_path): 34 | paths = [ 35 | os.path.join(json_path, f) 36 | for f in os.listdir(json_path) 37 | if f.endswith(".json") 38 | ] 39 | content = _load_list(paths) 40 | else: 41 | raise ValueError(path_error) 42 | elif type(path_or_dir) == list: 43 | paths = list(path_or_dir) # for mypy 44 | content = _load_list(paths) 45 | else: 46 | raise TypeError(path_error) 47 | return content 48 | 49 | 50 | def get_forms_lookup(forms_path="resources/forms_lookup.json"): 51 | matches_path = os.path.join(here, forms_path) 52 | return load_json(matches_path) 53 | 54 | 55 | def get_match_dict(match_path="resources/match_dict.json"): 56 | matches_path = os.path.join(here, match_path) 57 | return load_json(matches_path) 58 | 59 | 60 | def get_match_dict_schema(schema_path="resources/match_dict_schema.json"): 61 | full_schema_path = os.path.join(here, schema_path) 62 | return load_json(full_schema_path) 63 | 64 | 65 | def get_patterns_test_data(data_path="resources/patterns_test_data.json"): 66 | test_data_path = os.path.join(here, data_path) 67 | return load_json(test_data_path) 68 | 69 | 70 | def load_lm(model_path): 71 | import kenlm 72 | return kenlm.Model(model_path) 73 | -------------------------------------------------------------------------------- /tests/test_ref_matcher.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import spacy 3 | 4 | from replacy import ReplaceMatcher 5 | 6 | nlp = spacy.load("en_core_web_sm") 7 | 8 | match_dict = { 9 | "match-1": { 10 | "patterns": [[ 11 | {"POS": {"NOT_IN": ["ADJ"]}, "OP": "*"}, 12 | {"POS": "ADJ", "OP": "*"}, 13 | {"POS": "NOUN"}, 14 | {"LEMMA": "be", "TEMPLATE_ID": 1}, 15 | {"LEMMA": "deliver"}, 16 | {"IS_PUNCT": False, "OP": "*"}, 17 | {"IS_PUNCT": True}, 18 | ]], 19 | "suggestions": [ 20 | [ 21 | {"TEXT": "A"}, 22 | {"TEXT": "delivery"}, 23 | {"TEXT": "of"}, 24 | {"PATTERN_REF": 1}, 25 | {"PATTERN_REF": 2}, 26 | {"TEXT": "be", "FROM_TEMPLATE_ID": 1}, 27 | {"TEXT": "made"}, 28 | {"PATTERN_REF": -2}, 29 | {"PATTERN_REF": -1}, 30 | ] 31 | ], 32 | "test": {"positive": [], "negative": []}, 33 | }, 34 | "match-2": { 35 | "patterns": [[ 36 | {"TEXT": "I"}, 37 | {"POS": "VERB",}, 38 | {"POS": "DET", "OP": "?"}, 39 | {"TEXT": "dog"}, 40 | {"POS": "DET"}, 41 | {"POS": "ADJ", "OP": "*"}, 42 | {"POS": "NOUN"}, 43 | ]], 44 | "suggestions": [ 45 | [ 46 | {"PATTERN_REF": 0}, 47 | {"PATTERN_REF": 1}, 48 | {"PATTERN_REF": 4}, 49 | {"PATTERN_REF": 5}, 50 | {"PATTERN_REF": 6}, 51 | {"TEXT": "to"}, 52 | {"PATTERN_REF": 2}, 53 | {"PATTERN_REF": 3}, 54 | ] 55 | ], 56 | "test": {"positive": [], "negative": []}, 57 | }, 58 | } 59 | 60 | r_matcher = ReplaceMatcher(nlp, match_dict) 61 | 62 | sents = [ 63 | "The fresh juicy sandwiches were delivered to everyone at the shop before lunchtime.", 64 | "Looks like I fed the dog some popcorn.", 65 | ] 66 | 67 | suggestions = [ 68 | "A delivery of fresh juicy sandwiches was made to everyone at the shop before lunchtime .", 69 | "I fed some popcorn to the dog", 70 | ] 71 | 72 | 73 | def test_refs(): 74 | for sent, sugg in zip(sents, suggestions): 75 | span = r_matcher(sent) 76 | print(span[0]) 77 | print(span[0]._.suggestions[0]) 78 | assert span[0]._.suggestions[0] == sugg 79 | -------------------------------------------------------------------------------- /tests/test_custom_props.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | import spacy 5 | from replacy import ReplaceMatcher 6 | from replacy.db import get_match_dict 7 | 8 | nlp = spacy.load("en_core_web_sm") 9 | 10 | with open("replacy/resources/match_dict.json", "r") as md: 11 | match_dict = json.load(md) 12 | r_matcher = ReplaceMatcher(nlp, match_dict) 13 | 14 | r_matcher.match_dict.update( 15 | { 16 | "sometest": { 17 | "patterns": [[{"LOWER": "sometest"}]], 18 | "suggestions": [[{"TEXT": "this part isn't the point"}]], 19 | "test": {"positive": ["positive test"], "negative": ["negative test"]}, 20 | "comment": "this is an example comment", 21 | "description": 'The expression is "make do".', 22 | "category": "R:VERB", 23 | "yo": "yoyo", 24 | "whoa": ["it's", "a", "list"], 25 | "damn": {"a dict": "too?"}, 26 | "nice": 420, 27 | "also_nice": 42.0, 28 | "meh": True, 29 | } 30 | } 31 | ) 32 | new_matcher = ReplaceMatcher(nlp, r_matcher.match_dict) 33 | # This matches the new entry above 34 | matched_span = new_matcher("sometest")[0] 35 | 36 | # This matches a "normal" replaCy match example, so uses defaults 37 | no_match_span = new_matcher("I will extract revenge")[0] 38 | 39 | 40 | def test_custom_properties_string(): 41 | assert no_match_span._.yo == "", "automatically infers string types" 42 | assert matched_span._.yo == "yoyo", "picks up custom string types" 43 | 44 | 45 | def test_custom_properties_list(): 46 | assert no_match_span._.whoa == [], "automatically infers list types" 47 | assert matched_span._.whoa == ["it's", "a", "list"], "picks up custom list types" 48 | 49 | 50 | def test_custom_properties_dict(): 51 | assert no_match_span._.damn == {}, "automatically infers dict types" 52 | assert matched_span._.damn == {"a dict": "too?"}, "picks up custom dict types" 53 | 54 | 55 | def test_custom_properties_int(): 56 | assert no_match_span._.nice == 0, "automatically infers int types" 57 | assert matched_span._.nice == 420, "picks up custom int types" 58 | 59 | 60 | def test_custom_properties_float(): 61 | assert no_match_span._.also_nice == 0.0, "automatically infers float types" 62 | assert matched_span._.also_nice == 42.0, "picks up custom float types" 63 | 64 | 65 | def test_custom_properties_bool(): 66 | assert no_match_span._.meh == False, "automatically infers bool types" 67 | assert matched_span._.meh == True, "picks up custom bool types" 68 | -------------------------------------------------------------------------------- /tests/test_pipeline.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import pytest 4 | import spacy 5 | from spacy.tokens import Span 6 | from spacy.util import filter_spans 7 | 8 | from replacy import ReplaceMatcher 9 | from replacy.suggestion import Suggestion 10 | 11 | nlp = spacy.load("en_core_web_sm") 12 | 13 | match_dict = { 14 | "hyuck": { 15 | "patterns": [[{"LOWER": "hyuck"}]], 16 | "suggestions": [[{"TEXT": "ha"}]], 17 | "test": {"positive": [], "negative": []}, 18 | }, 19 | "hyuck-hyuck": { 20 | "patterns": [[{"LOWER": "hyuck"}, {"LOWER": "hyuck"}]], 21 | "suggestions": [[{"TEXT": "haha"}]], 22 | "test": {"positive": [], "negative": []}, 23 | }, 24 | } 25 | 26 | 27 | def test_default_pipe(): 28 | replaCy = ReplaceMatcher(nlp, match_dict) 29 | assert replaCy.pipe_names == ["sorter", "filter", "joiner"] 30 | 31 | 32 | class NewComponent: 33 | gibberish = "jknasdkjna" 34 | 35 | def __init__(self, name="garbler"): 36 | self.name = name 37 | 38 | def __call__(self, spans: List[Span]): 39 | for s in spans: 40 | s._.suggestions = [[Suggestion(text=self.gibberish, max_count=1, id=69)]] 41 | return spans 42 | 43 | 44 | garbler = NewComponent() 45 | 46 | 47 | def test_add_pipe_first(): 48 | replaCy = ReplaceMatcher(nlp, match_dict) 49 | replaCy.add_pipe(garbler, first=True) 50 | assert replaCy.pipe_names == ["garbler", "sorter", "filter", "joiner"] 51 | 52 | 53 | def test_add_pipe_last(): 54 | replaCy = ReplaceMatcher(nlp, match_dict) 55 | replaCy.add_pipe(garbler, last=True) 56 | assert replaCy.pipe_names == ["sorter", "filter", "joiner", "garbler"] 57 | 58 | 59 | def test_add_pipe_before(): 60 | replaCy = ReplaceMatcher(nlp, match_dict) 61 | replaCy.add_pipe(garbler, before="joiner") 62 | assert replaCy.pipe_names == ["sorter", "filter", "garbler", "joiner"] 63 | 64 | 65 | def test_add_pipe_after(): 66 | replaCy = ReplaceMatcher(nlp, match_dict) 67 | replaCy.add_pipe(garbler, after="filter") 68 | assert replaCy.pipe_names == ["sorter", "filter", "garbler", "joiner"] 69 | 70 | 71 | def test_component_added_after_filter_is_called(): 72 | replaCy = ReplaceMatcher(nlp, match_dict) 73 | replaCy.add_pipe(garbler, after="filter") 74 | spans = replaCy("hyuck, that's funny") 75 | assert spans[0]._.suggestions[0] == NewComponent.gibberish 76 | 77 | 78 | def test_span_filter_component(): 79 | replaCy = ReplaceMatcher(nlp, match_dict) 80 | spans = replaCy("hyuck hyuck") 81 | assert ( 82 | len(spans) == 3 83 | ), "without span overlap filtering there are three spans (one for each hyuck, and one for both)" 84 | replaCy.add_pipe(filter_spans, before="joiner") 85 | spans = replaCy("hyuck hyuck") 86 | assert len(spans) == 1, "with span overlap filtering there is only one span" 87 | -------------------------------------------------------------------------------- /replacy/resources/test.arpa: -------------------------------------------------------------------------------- 1 | 2 | \data\ 3 | ngram 1=37 4 | ngram 2=47 5 | ngram 3=11 6 | ngram 4=6 7 | ngram 5=4 8 | 9 | \1-grams: 10 | -1.383514 , -0.30103 11 | -1.139057 . -0.845098 12 | -1.029493 13 | -99 -0.4149733 14 | -1.995635 -20 15 | -1.285941 a -0.69897 16 | -1.687872 also -0.30103 17 | -1.687872 beyond -0.30103 18 | -1.687872 biarritz -0.30103 19 | -1.687872 call -0.30103 20 | -1.687872 concerns -0.30103 21 | -1.687872 consider -0.30103 22 | -1.687872 considering -0.30103 23 | -1.687872 for -0.30103 24 | -1.509559 higher -0.30103 25 | -1.687872 however -0.30103 26 | -1.687872 i -0.30103 27 | -1.687872 immediate -0.30103 28 | -1.687872 in -0.30103 29 | -1.687872 is -0.30103 30 | -1.285941 little -0.69897 31 | -1.383514 loin -0.30103 32 | -1.687872 look -0.30103 33 | -1.285941 looking -0.4771212 34 | -1.206319 more -0.544068 35 | -1.509559 on -0.4771212 36 | -1.509559 screening -0.4771212 37 | -1.687872 small -0.30103 38 | -1.687872 the -0.30103 39 | -1.687872 to -0.30103 40 | -1.687872 watch -0.30103 41 | -1.687872 watching -0.30103 42 | -1.687872 what -0.30103 43 | -1.687872 would -0.30103 44 | -3.141592 foo 45 | -2.718281 bar 3.0 46 | -6.535897 baz -0.0 47 | 48 | \2-grams: 49 | -0.6925742 , . 50 | -0.7522095 , however 51 | -0.7522095 , is 52 | -0.0602359 . 53 | -0.4846522 looking -0.4771214 54 | -1.051485 screening 55 | -1.07153 the 56 | -1.07153 watching 57 | -1.07153 what 58 | -0.09132547 a little -0.69897 59 | -0.2922095 also call 60 | -0.2922095 beyond immediate 61 | -0.2705918 biarritz . 62 | -0.2922095 call for 63 | -0.2922095 concerns in 64 | -0.2922095 consider watch 65 | -0.2922095 considering consider 66 | -0.2834328 for , 67 | -0.5511513 higher more 68 | -0.5845945 higher small 69 | -0.2834328 however , 70 | -0.2922095 i would 71 | -0.2922095 immediate concerns 72 | -0.2922095 in biarritz 73 | -0.2922095 is to 74 | -0.09021038 little more -0.1998621 75 | -0.7273645 loin , 76 | -0.6925742 loin . 77 | -0.6708385 loin 78 | -0.2922095 look beyond 79 | -0.4638903 looking higher 80 | -0.4638903 looking on -0.4771212 81 | -0.5136299 more . -0.4771212 82 | -0.3561665 more loin 83 | -0.1649931 on a -0.4771213 84 | -0.1649931 screening a -0.4771213 85 | -0.2705918 small . 86 | -0.287799 the screening 87 | -0.2922095 to look 88 | -0.2622373 watch 89 | -0.2922095 watching considering 90 | -0.2922095 what i 91 | -0.2922095 would also 92 | -2 also would -6 93 | -15 -2 94 | -4 however -1 95 | -6 foo bar 96 | 97 | \3-grams: 98 | -0.01916512 more . 99 | -0.0283603 on a little -0.4771212 100 | -0.0283603 screening a little -0.4771212 101 | -0.01660496 a little more -0.09409451 102 | -0.3488368 looking higher 103 | -0.3488368 looking on -0.4771212 104 | -0.1892331 little more loin 105 | -0.04835128 looking on a -0.4771212 106 | -3 also would consider -7 107 | -6 however -12 108 | -7 to look a 109 | 110 | \4-grams: 111 | -0.009249173 looking on a little -0.4771212 112 | -0.005464747 on a little more -0.4771212 113 | -0.005464747 screening a little more 114 | -0.1453306 a little more loin 115 | -0.01552657 looking on a -0.4771212 116 | -4 also would consider higher -8 117 | 118 | \5-grams: 119 | -0.003061223 looking on a little 120 | -0.001813953 looking on a little more 121 | -0.0432557 on a little more loin 122 | -5 also would consider higher looking 123 | 124 | \end\ -------------------------------------------------------------------------------- /tests/test_suggestions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import spacy 3 | 4 | from replacy import ReplaceMatcher 5 | 6 | nlp = spacy.load("en_core_web_sm") 7 | 8 | # They read us the stories they themselves had written. 9 | 10 | match_dict = { 11 | "match-1": { 12 | "patterns": [[ 13 | {"LOWER": {"IN": ["they", "she"]}}, 14 | {"LEMMA": "read", "TEMPLATE_ID": 1}, 15 | {"LOWER": "us"}, 16 | {"LOWER": "the"}, 17 | {"LEMMA": "story", "TEMPLATE_ID": 1}, 18 | {"LOWER": {"IN": ["they", "she"]}}, 19 | {"LOWER": {"IN": ["themselves", "herself"]}}, 20 | {"LEMMA": "have", "OP": "*"}, 21 | {"LEMMA": {"IN": ["write", "made"]}}, 22 | ]], 23 | "suggestions": [ 24 | [ 25 | {"PATTERN_REF": 0}, 26 | {"TEXT": {"IN": ["sing", "give"]}, "FROM_TEMPLATE_ID": 1}, 27 | {"PATTERN_REF": 2}, 28 | {"TEXT": {"IN": ["a", "the", "some"]}}, 29 | {"TEXT": "story", "INFLECTION": "NOUN"}, 30 | {"PATTERN_REF": 5, "REPLACY_OP": "UPPER"}, 31 | {"PATTERN_REF": 6}, 32 | {"TEXT": {"IN": ["write", "made", "create"]}, "INFLECTION": "VBD"}, 33 | ] 34 | ], 35 | "test": {"positive": [], "negative": []}, 36 | } 37 | } 38 | 39 | outputs = [ 40 | "They sang us a stories THEY themselves wrote", 41 | "They sang us a stories THEY themselves made", 42 | "They sang us a stories THEY themselves created", 43 | "They sang us a story THEY themselves wrote", 44 | "They sang us a story THEY themselves made", 45 | "They sang us a story THEY themselves created", 46 | "They sang us the stories THEY themselves wrote", 47 | "They sang us the stories THEY themselves made", 48 | "They sang us the stories THEY themselves created", 49 | "They sang us the story THEY themselves wrote", 50 | "They sang us the story THEY themselves made", 51 | "They sang us the story THEY themselves created", 52 | "They sang us some stories THEY themselves wrote", 53 | "They sang us some stories THEY themselves made", 54 | "They sang us some stories THEY themselves created", 55 | "They sang us some story THEY themselves wrote", 56 | "They sang us some story THEY themselves made", 57 | "They sang us some story THEY themselves created", 58 | "They gave us a stories THEY themselves wrote", 59 | "They gave us a stories THEY themselves made", 60 | "They gave us a stories THEY themselves created", 61 | "They gave us a story THEY themselves wrote", 62 | "They gave us a story THEY themselves made", 63 | "They gave us a story THEY themselves created", 64 | "They gave us the stories THEY themselves wrote", 65 | "They gave us the stories THEY themselves made", 66 | "They gave us the stories THEY themselves created", 67 | "They gave us the story THEY themselves wrote", 68 | "They gave us the story THEY themselves made", 69 | "They gave us the story THEY themselves created", 70 | "They gave us some stories THEY themselves wrote", 71 | "They gave us some stories THEY themselves made", 72 | "They gave us some stories THEY themselves created", 73 | "They gave us some story THEY themselves wrote", 74 | "They gave us some story THEY themselves made", 75 | "They gave us some story THEY themselves created", 76 | ] 77 | 78 | r_matcher = ReplaceMatcher(nlp, match_dict=match_dict) 79 | spans = r_matcher("They read us the stories they themselves had written.") 80 | suggestions = spans[0]._.suggestions 81 | 82 | 83 | def test_suggestions(): 84 | assert set(suggestions) <= set(outputs) 85 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | # replaCy: match & replace with spaCy 6 | 7 | We found that in multiple projects we had duplicate code for using spaCy’s blazing fast matcher to do the same thing: Match-Replace-Grammaticalize. So we wrote replaCy! 8 | 9 | - Match - spaCy’s matcher is great, and lets you match on text, shape, POS, dependency parse, and other features. We extended this with “match hooks”, predicates that get used in the callback function to further refine a match. 10 | - Replace - Not built into spaCy’s matcher syntax, but easily added. You often want to replace a matched word with some other term. 11 | - Grammaticalize - If you match on ”LEMMA”: “dance”, and replace with suggestions: ["sing"], but the actual match is danced, you need to conjugate “sing” appropriately. This is the “killer feature” of replaCy 12 | 13 | [![spaCy](https://img.shields.io/badge/made%20with%20❤%20and-spaCy-09a3d5.svg)](https://spacy.io) 14 | [![pypi Version](https://img.shields.io/pypi/v/replacy.svg?style=flat-square&logo=pypi&logoColor=white)](https://pypi.org/project/replacy/) 15 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?style=flat-square)](https://github.com/ambv/black) 16 | 17 |

18 | 19 |

20 | 21 | 22 | ## Requirements 23 | 24 | - `spacy >= 2.0` (not installed by default, but replaCy needs to be instantiated with an `nlp` object) 25 | 26 | ## Installation 27 | 28 | `pip install replacy` 29 | 30 | ## Quick start 31 | 32 | ```python 33 | from replacy import ReplaceMatcher 34 | from replacy.db import load_json 35 | import spacy 36 | 37 | 38 | match_dict = load_json('/path/to/your/match/dict.json') 39 | # load nlp spacy model of your choice 40 | nlp = spacy.load("en_core_web_sm") 41 | 42 | rmatcher = ReplaceMatcher(nlp, match_dict=match_dict) 43 | 44 | # get inflected suggestions 45 | # look up the first suggestion 46 | span = rmatcher("She extracts revenge.")[0] 47 | span._.suggestions 48 | # >>> ['exacts'] 49 | ``` 50 | 51 | ## Input 52 | 53 | ReplaceMatcher accepts both text and spaCy doc. 54 | 55 | ```python 56 | # text is ok 57 | span = r_matcher("She extracts revenge.")[0] 58 | 59 | # doc is ok too 60 | doc = nlp("She extracts revenge.") 61 | span = r_matcher(doc)[0] 62 | ``` 63 | 64 | ## match_dict.json format 65 | 66 | Here is a minimal `match_dict.json`: 67 | 68 | ```json 69 | { 70 | "extract-revenge": { 71 | "patterns": [ 72 | { 73 | "LEMMA": "extract", 74 | "TEMPLATE_ID": 1 75 | } 76 | ], 77 | "suggestions": [ 78 | [ 79 | { 80 | "TEXT": "exact", 81 | "FROM_TEMPLATE_ID": 1 82 | } 83 | ] 84 | ], 85 | "match_hook": [ 86 | { 87 | "name": "succeeded_by_phrase", 88 | "args": "revenge", 89 | "match_if_predicate_is": true 90 | } 91 | ], 92 | "test": { 93 | "positive": [ 94 | "And at the same time extract revenge on those he so despises?", 95 | "Watch as Tampa Bay extracts revenge against his former Los Angeles Rams team." 96 | ], 97 | "negative": ["Mother flavours her custards with lemon extract."] 98 | } 99 | } 100 | } 101 | ``` 102 | For more information how to compose `match_dict` see our [wiki](https://github.com/Qordobacode/replaCy/wiki/match_dict.json-format): 103 | 104 | 105 | # Citing 106 | 107 | If you use replaCy in your research, please cite with the following BibText 108 | 109 | ```bibtext 110 | @misc{havens2019replacy, 111 | title = {SpaCy match and replace, maintaining conjugation}, 112 | author = {Sam Havens, Aneta Stal, and Manhal Daaboul}, 113 | url = {https://github.com/Qordobacode/replaCy}, 114 | year = {2019} 115 | } 116 | -------------------------------------------------------------------------------- /replacy/scorer.py: -------------------------------------------------------------------------------- 1 | import string 2 | import warnings 3 | from typing import List 4 | 5 | from kenlm import Model as KenLMModel 6 | from spacy.tokens import Doc, Span, Token 7 | 8 | from replacy.default_scorer import Scorer 9 | 10 | 11 | class KenLMScorer(Scorer): 12 | 13 | name = "kenlm" 14 | 15 | def __init__(self, model=None, path=None, nlp=None, lowercase=True): 16 | 17 | if model: 18 | self.model = model 19 | elif path: 20 | self.model = KenLMModel(path) 21 | 22 | self._check_model() 23 | 24 | if nlp: 25 | self.nlp = nlp 26 | else: 27 | import spacy 28 | 29 | self.nlp = spacy.load("en_core_web_sm") 30 | 31 | self.lowercase = lowercase 32 | 33 | def _check_model(self): 34 | assert isinstance(self.model, KenLMModel) 35 | assert self.model.score("testing !") < 0 36 | 37 | def preprocess(self, segment): 38 | """ 39 | SpaCy tokenize + lowercase. Ignore extra whitespaces. 40 | - if Doc, Span, Token - retrieve .lower_ 41 | - if string - convert to Doc first 42 | """ 43 | if isinstance(segment, (Doc, Span, Token)): 44 | # spaCy tokenizer, ignore whitespaces 45 | tok = [token.text for token in segment if not token.is_space] 46 | if self.lowercase: 47 | tok = [token.lower() for token in tok] 48 | 49 | elif isinstance(segment, str): 50 | doc = self.nlp(segment, disable=self.nlp.pipe_names) 51 | return self.preprocess(doc) 52 | 53 | return " ".join(tok) 54 | 55 | def __call__(self, segment, score_type="perplexity"): 56 | 57 | text = self.preprocess(segment) 58 | word_count = len(text.split()) 59 | 60 | if word_count < 2: 61 | warnings.warn(f"Scorer: Received {word_count} tokens, expected >= 2.") 62 | return float("-inf") 63 | 64 | if isinstance(segment, Doc): 65 | # if doc - assume bos, eos=True 66 | bos = True 67 | eos = True 68 | 69 | if isinstance(segment, (Span, Token)): 70 | # if span - assume bos, eos=False 71 | bos = False 72 | eos = False 73 | 74 | if isinstance(segment, str): 75 | # string passed - guess: 76 | bos = text.capitalize() == text 77 | eos = text[-1] in string.punctuation 78 | 79 | # log10 prob 80 | score = self.model.score(text, bos=bos, eos=eos) 81 | 82 | if score_type == "log": 83 | return score 84 | 85 | elif score_type == "perplexity": 86 | prob = 10.0 ** (score) 87 | prob = 0.00000000001 if prob == 0 else prob 88 | return prob ** (-1 / word_count) 89 | else: 90 | raise NotImplementedError 91 | 92 | def score_suggestion(self, doc: Doc, span: Span, suggestion: List[str]) -> float: 93 | """ 94 | between spacy 2.3.2 and 2.3.5 the behavior of slicing docs changed 95 | so doc[len(doc):] now throws an exception (it just returned the empty span before) 96 | 97 | also, we use arrays of text tokens rather than t.text_with_ws_ because 98 | Ken wants space-tokenized strings 99 | """ 100 | if span.start == 0: 101 | head = [] 102 | else: 103 | head = [t.text for t in doc[: span.start]] 104 | if span.end >= len(doc): 105 | tail = [] 106 | else: 107 | tail = [t.text for t in doc[span.end :]] 108 | text = " ".join(head + suggestion + tail) 109 | return self(text) 110 | 111 | def sort_suggestions(self, spans: List[Span]) -> List[Span]: 112 | for span in spans: 113 | if len(span._.suggestions) > 1: 114 | span._.suggestions = sorted( 115 | span._.suggestions, 116 | key=lambda x: self.score_suggestion( 117 | span.doc, span, [t.text for t in x] 118 | ), 119 | ) 120 | return spans 121 | -------------------------------------------------------------------------------- /tests/test_inflector.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from replacy.inflector import Inflector 4 | 5 | xfail = pytest.mark.xfail 6 | 7 | inflector = Inflector() 8 | 9 | inflector_dataset = [ 10 | { 11 | "source": "Those are examples.", 12 | "target": "Those are rabbits.", 13 | "index": 2, 14 | "word": "rabbit", 15 | }, 16 | { 17 | "source": "Stop avoiding the question.", 18 | "target": "Stop evading the question.", 19 | "index": 1, 20 | "word": "evade", 21 | }, 22 | { 23 | "source": "She loves kittens.", 24 | "target": "She hates kittens.", 25 | "index": 1, 26 | "word": "hate", 27 | }, 28 | ] 29 | 30 | 31 | @pytest.mark.parametrize("data", inflector_dataset) 32 | def test_inflector(data): 33 | assert ( 34 | inflector.insert(data["source"], data["word"], data["index"]) == data["target"] 35 | ), "should inflect" 36 | 37 | 38 | """ 39 | Test lemmatization. 40 | Plural and singular forms of nouns should have common (or at least intersecting) sets of lemmas 41 | Important for max count estimation (see: suggestion.py). 42 | 43 | Exceptions to handle separately: 44 | { 45 | "plural":"people", 46 | "singular": "person" 47 | }, 48 | { 49 | "plural": "ox", 50 | "singular": "oxen" 51 | } 52 | 53 | Why do we test this? 54 | ReplaCy uses ML-based lemminflect to lemmatize. 55 | This test assures any lemminflect model upgrades do not break current behaviour. 56 | """ 57 | 58 | irregular_nouns = [ 59 | { 60 | "plural":"elf", 61 | "singular":"elves" 62 | }, 63 | { 64 | "plural":"calf", 65 | "singular": "calves" 66 | }, 67 | { 68 | "plural":"knife", 69 | "singular": "knives" 70 | }, 71 | { 72 | "plural":"loaf", 73 | "singular": "loaves" 74 | }, 75 | { 76 | "plural":"shelf", 77 | "singular": "shelves" 78 | }, 79 | { 80 | "plural":"wolf", 81 | "singular": "wolves" 82 | }, 83 | { 84 | "plural":"loaf", 85 | "singular": "loaves" 86 | }, 87 | { 88 | "plural":"man", 89 | "singular": "men" 90 | }, 91 | { 92 | "plural":"mouse", 93 | "singular": "mice" 94 | }, 95 | { 96 | "plural":"child", 97 | "singular": "children" 98 | }, 99 | { 100 | "plural":"foot", 101 | "singular": "feet" 102 | }, 103 | { 104 | "plural":"goose", 105 | "singular": "geese" 106 | }, 107 | { 108 | "plural":"tooth", 109 | "singular": "teeth" 110 | }, 111 | { 112 | "plural":"louse", 113 | "singular": "lice" 114 | }, 115 | { 116 | "plural":"cactus", 117 | "singular": "cacti" 118 | }, 119 | { 120 | "plural": "appendix", 121 | "singular": "appendices" 122 | }, 123 | { 124 | "plural": "cod", 125 | "singular": "cods" 126 | }, 127 | { 128 | "plural": "shrimp", 129 | "singular": "shrimps" 130 | }, 131 | { 132 | "plural": "fish", 133 | "singular": "fishes" 134 | }, 135 | { 136 | "plural": "quail", 137 | "singular": "quails" 138 | } 139 | ] 140 | 141 | irregular_nouns_lemma_exceptions = [ 142 | { 143 | "plural": "people", 144 | "singular": "person" 145 | }, 146 | { 147 | "plural": "ox", 148 | "singular": "oxen" 149 | } 150 | ] 151 | 152 | @pytest.mark.parametrize("pair", irregular_nouns) 153 | def test_lemmatization(pair): 154 | singular_lemmas = set(inflector.get_lemmas(pair["singular"])) 155 | plural_lemmas = set(inflector.get_lemmas(pair["plural"])) 156 | 157 | assert len(singular_lemmas & plural_lemmas) > 0, "lemmas are different!" 158 | 159 | @xfail(raises=AssertionError) 160 | @pytest.mark.parametrize("pair", irregular_nouns_lemma_exceptions) 161 | def test_lemmatization_exceptions(pair): 162 | singular_lemmas = set(inflector.get_lemmas(pair["singular"])) 163 | plural_lemmas = set(inflector.get_lemmas(pair["plural"])) 164 | 165 | assert len(singular_lemmas & plural_lemmas) > 0, "lemmas are different!" 166 | -------------------------------------------------------------------------------- /tests/test_max_count.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import spacy 3 | 4 | from replacy import ReplaceMatcher 5 | 6 | nlp = spacy.load("en_core_web_sm") 7 | 8 | # They read us the stories they themselves had written. 9 | 10 | match_dict = { 11 | "match-1": { 12 | "patterns": [[ 13 | {"LOWER": {"IN": ["they", "she"]}}, 14 | {"LEMMA": "read", "TEMPLATE_ID": 1}, 15 | {"LOWER": "us"}, 16 | {"LOWER": "the"}, 17 | {"LEMMA": "story", "TEMPLATE_ID": 1}, 18 | {"LOWER": {"IN": ["they", "she"]}}, 19 | {"LOWER": {"IN": ["themselves", "herself"]}}, 20 | {"LEMMA": "have", "OP": "*"}, 21 | {"LEMMA": {"IN": ["write", "made"]}}, 22 | ]], 23 | "suggestions": [ 24 | [ 25 | {"PATTERN_REF": 0}, 26 | {"TEXT": {"IN": ["sing", "give"]}, "FROM_TEMPLATE_ID": 1}, 27 | {"PATTERN_REF": 2}, 28 | {"TEXT": {"IN": ["a", "the", "some"]}}, 29 | {"TEXT": "story", "INFLECTION": "NOUN"}, 30 | {"PATTERN_REF": 5, "REPLACY_OP": "UPPER"}, 31 | {"PATTERN_REF": 6}, 32 | {"TEXT": {"IN": ["write", "made", "create"]}, "INFLECTION": "VBD"}, 33 | ] 34 | ], 35 | "test": {"positive": [], "negative": []}, 36 | } 37 | } 38 | 39 | outputs = [ 40 | "They sang us a stories THEY themselves wrote", 41 | "They sang us a stories THEY themselves made", 42 | "They sang us a stories THEY themselves created", 43 | "They gave us a stories THEY themselves wrote", 44 | "They gave us a stories THEY themselves made", 45 | "They gave us a stories THEY themselves created", 46 | "They sang us the story THEY themselves wrote", 47 | "They sang us the story THEY themselves made", 48 | "They sang us the story THEY themselves created", 49 | "They gave us the story THEY themselves wrote", 50 | "They gave us the story THEY themselves made", 51 | "They gave us the story THEY themselves created", 52 | ] 53 | 54 | output_default_max_count_1 = [ 55 | "They sang us a stories THEY themselves wrote", 56 | "They sang us a story THEY themselves made", 57 | "They gave us a stories THEY themselves made", 58 | "They gave us a story THEY themselves wrote", 59 | "They sang us the stories THEY themselves made", 60 | "They sang us the story THEY themselves wrote", 61 | "They gave us the stories THEY themselves wrote", 62 | "They gave us the story THEY themselves made", 63 | "They sang us some stories THEY themselves created", 64 | "They gave us some story THEY themselves created", 65 | ] 66 | 67 | r_matcher1 = ReplaceMatcher( 68 | nlp, 69 | match_dict=match_dict, 70 | lm_path="./replacy/resources/test.arpa", 71 | filter_suggestions=True, 72 | ) 73 | 74 | spans = r_matcher1("They read us the stories they themselves had written.") 75 | suggestions = spans[0]._.suggestions 76 | 77 | 78 | def test_suggestions(): 79 | assert suggestions == outputs 80 | 81 | 82 | r_matcher_max_count_1 = ReplaceMatcher( 83 | nlp, 84 | match_dict=match_dict, 85 | lm_path="./replacy/resources/test.arpa", 86 | filter_suggestions=True, 87 | default_max_count=1, 88 | ) 89 | 90 | spans_max_count_1 = r_matcher_max_count_1( 91 | "They read us the stories they themselves had written." 92 | ) 93 | suggestions_max_count_1 = spans_max_count_1[0]._.suggestions 94 | 95 | 96 | def test_default_max_count(): 97 | assert suggestions_max_count_1 == output_default_max_count_1 98 | 99 | 100 | short_match_dict_2_sugg = { 101 | "match-1": { 102 | "patterns": [[ 103 | {"LOWER": {"IN": ["they", "she"]}}, 104 | {"LEMMA": "read", "TEMPLATE_ID": 1}, 105 | ]], 106 | "suggestions": [ 107 | [ 108 | {"PATTERN_REF": 0}, 109 | {"FROM_TEMPLATE_ID": 1, "TEXT": {"IN": ["sing", "give"]}}, 110 | ], 111 | [{"PATTERN_REF": 0}, {"FROM_TEMPLATE_ID": 1, "TEXT": "dance"},], 112 | ], 113 | "test": {"negative": [], "positive": []}, 114 | } 115 | } 116 | 117 | 118 | def test_multiple_suggestions_max_count(): 119 | r_matcher = ReplaceMatcher( 120 | nlp, 121 | match_dict=short_match_dict_2_sugg, 122 | lm_path="./replacy/resources/test.arpa", 123 | filter_suggestions=True, 124 | debug=True, 125 | ) 126 | spans = r_matcher("They read us the stories they themselves had written.") 127 | assert len(spans[0]._.suggestions) == 3 128 | 129 | 130 | short_match_dict = { 131 | "match-1": { 132 | "patterns": [[ 133 | {"LOWER": {"IN": ["they", "she"]}}, 134 | {"LEMMA": "read", "TEMPLATE_ID": 1}, 135 | ]], 136 | "suggestions": [ 137 | [ 138 | {"PATTERN_REF": 0}, 139 | {"FROM_TEMPLATE_ID": 1, "TEXT": {"IN": ["sing", "give"]}}, 140 | ] 141 | ], 142 | "test": {"negative": [], "positive": []}, 143 | } 144 | } 145 | 146 | 147 | def test_manual_max_count(): 148 | # use short match dict 149 | # default_max_count=1 150 | # expect 1 suggestion 151 | 152 | r_matcher = ReplaceMatcher( 153 | nlp, 154 | match_dict=short_match_dict, 155 | lm_path="./replacy/resources/test.arpa", 156 | filter_suggestions=True, 157 | default_max_count=1, 158 | debug=True, 159 | ) 160 | spans = r_matcher("They read us the stories they themselves had written.") 161 | assert len(spans[0]._.suggestions) == 1 162 | 163 | # MAX_COUNT=2 for ['sing', 'give'] 164 | # default_max_count=1 165 | # expect 2 suggestions 166 | 167 | short_match_dict["match-1"]["suggestions"][0][1]["MAX_COUNT"] = 2 168 | 169 | r_matcher = ReplaceMatcher( 170 | nlp, 171 | match_dict=short_match_dict, 172 | lm_path="./replacy/resources/test.arpa", 173 | filter_suggestions=True, 174 | default_max_count=1, 175 | debug=True, 176 | ) 177 | spans = r_matcher("They read us the stories they themselves had written.") 178 | suggestions = spans[0]._.suggestions 179 | 180 | assert len(spans[0]._.suggestions) == 2 181 | -------------------------------------------------------------------------------- /replacy/inflector.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import lemminflect 4 | import spacy 5 | from spacy.tokens import Token 6 | 7 | from replacy.db import get_forms_lookup 8 | 9 | 10 | class Inflector: 11 | def __init__(self, nlp=None, forms_lookup=None): 12 | 13 | self.nlp = nlp 14 | if not self.nlp: 15 | self.nlp = spacy.load("en_core_web_sm") 16 | 17 | self.forms_lookup = forms_lookup 18 | if not self.forms_lookup: 19 | self.forms_lookup = get_forms_lookup() 20 | 21 | def get_dict_form(self, word, tag): 22 | for k in self.forms_lookup: 23 | if ( 24 | word in self.forms_lookup[k].values() 25 | and tag in self.forms_lookup[k].keys() 26 | ): 27 | return self.forms_lookup[k][tag] 28 | return None 29 | 30 | def auto_inflect(self, doc, suggestion, index): 31 | """ 32 | Inflect the suggestion using token at position 'index' as template. 33 | ex. (washed, eat) => ate 34 | Returns inflected suggestion as text. 35 | If the inflection is not supported, check verb_forms.json 36 | if not found - returns None. 37 | """ 38 | 39 | try: 40 | doc.text 41 | except AttributeError: 42 | doc = self.nlp(doc) 43 | 44 | sentence = doc.text 45 | 46 | token = doc[index] 47 | token_start = token.idx 48 | token_end = token_start + len(token) 49 | 50 | changed_sentence = "".join( 51 | [sentence[:token_start], suggestion, sentence[token_end:]] 52 | ) 53 | 54 | changed_doc = self.nlp(changed_sentence) 55 | changed_token = changed_doc[index] 56 | 57 | return self.inflect_or_lookup(changed_token, token.tag_) 58 | 59 | @staticmethod 60 | def tag_to_pos(tag): 61 | if tag in ["JJ", "JJR", "JJS"]: 62 | return "ADJ" 63 | elif tag in ["RB", "RBR", "RBS"]: 64 | return "ADV" 65 | elif tag in ["NN", "NNS"]: 66 | return "NOUN" 67 | elif tag in ["NNP", "NNPS"]: 68 | return "PROPN" 69 | elif tag in ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "MD"]: 70 | return "VERB" # AUX 71 | else: 72 | return tag 73 | 74 | def get_inflection_type(self, value: str): 75 | pos_values = ["ADJ", "ADV", "NOUN", "PROPN", "VERB", "AUX"] 76 | if value in pos_values: 77 | return "pos" 78 | elif Inflector.tag_to_pos(value) in pos_values: 79 | return "tag" 80 | elif value == "ALL": 81 | return "all" 82 | else: 83 | warnings.warn( 84 | f"Inflection <<{value}>> not supported, will fallback to <>." 85 | ) 86 | return "all" 87 | 88 | def get_lemmas(self, word, tag=None, pos=None): 89 | 90 | lemmas = [] 91 | 92 | if tag: 93 | # infer pos from tag 94 | pos = Inflector.tag_to_pos(tag) 95 | 96 | if pos: 97 | lemma_dict = lemminflect.getLemma(word, upos=pos) 98 | lemmas = list(lemma_dict) 99 | else: 100 | # no pos provided, return all lemmas 101 | lemma_dict = lemminflect.getAllLemmas(word) 102 | for i in lemma_dict.values(): 103 | lemmas += list(i) 104 | 105 | return lemmas 106 | 107 | def inflect_lemma(self, lemma, tag=None, pos=None): 108 | 109 | inflections = [] 110 | # tag based 111 | if tag: 112 | inflection_tuple = lemminflect.getInflection(lemma, tag=tag) 113 | inflections = list(inflection_tuple) 114 | else: 115 | # pos based, can be None too 116 | inflection_dict = lemminflect.getAllInflections(lemma, upos=pos) 117 | for i in inflection_dict.values(): 118 | inflections += list(i) 119 | 120 | return inflections 121 | 122 | def inflect_token(self, token: Token, tag=None, pos=None): 123 | 124 | if tag: 125 | # dictionary look up 126 | # returns None if not found 127 | inflection = self.get_dict_form(token.lemma_, tag=tag) 128 | 129 | if not inflection: 130 | # tag provided, spaCy inflection (has .lemma_) 131 | inflection = token._.inflect(tag) 132 | 133 | inflections = [inflection] 134 | else: 135 | # fallback to pyinflect inflection 136 | # get all inflections 137 | inflections = self.inflect_lemma(token.lemma_, tag=tag, pos=pos) 138 | 139 | return inflections 140 | 141 | def inflect_string(self, word: str, tag=None, pos=None): 142 | 143 | inflections = [] 144 | 145 | # lemmatize 146 | lemmas = self.get_lemmas(word, tag=tag, pos=pos) 147 | for lemma in lemmas: 148 | # check dict forms first 149 | # those are potential corrections to lemminflect 150 | # returns None if not found 151 | lemma_i = [self.get_dict_form(lemma, tag=tag)] 152 | if not lemma_i[0]: 153 | lemma_i = self.inflect_lemma(lemma, tag=tag, pos=pos) 154 | inflections += lemma_i 155 | 156 | return inflections 157 | 158 | def inflect_or_lookup(self, word, tag=None, pos=None): 159 | 160 | if isinstance(word, Token): 161 | # token inflection tries spaCy ext (._.inflect) 162 | # with spaCy lemmatizer (.lemma_) 163 | return self.inflect_token(word, tag=tag, pos=pos) 164 | 165 | elif isinstance(word, str): 166 | return self.inflect_string(word, tag=tag, pos=pos) 167 | 168 | def insert(self, doc, suggestion: str, index: int): 169 | """ 170 | Returns the sentence with inserted inflected token. 171 | If inflection is not supported - returns the original sentence. 172 | ex. She washed her eggs. -> She ate her eggs. 173 | If many inflections returned, take the first form. 174 | """ 175 | 176 | # if string passed, conversion to doc 177 | try: 178 | doc.text 179 | except AttributeError: 180 | doc = self.nlp(doc) 181 | 182 | infl_tokens = self.auto_inflect(doc, suggestion, index) 183 | 184 | if len(infl_tokens): 185 | infl_token = infl_tokens[0] 186 | 187 | if infl_token: 188 | token = doc[index] 189 | changed_sent = "".join( 190 | [doc.text[: token.idx], infl_token, doc.text[token.idx + len(token) :],] 191 | ) 192 | return changed_sent 193 | else: 194 | return doc.text 195 | -------------------------------------------------------------------------------- /replacy/util.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Any, Callable, Dict, List, Union 3 | 4 | import spacy 5 | from functional import seq 6 | from jsonschema import validate 7 | from spacy.tokens import Doc 8 | 9 | from replacy.db import get_match_dict_schema 10 | 11 | 12 | def set_known_extensions(span_class): 13 | known_string_extensions = ["match_name"] 14 | known_list_extensions = ["suggestions"] 15 | for ext in known_list_extensions: 16 | span_class.set_extension(ext, default=[], force=True) 17 | for ext in known_string_extensions: 18 | span_class.set_extension(ext, default="", force=True) 19 | expected_properties = ( 20 | ["patterns", "match_hook", "test"] 21 | + known_list_extensions 22 | + known_string_extensions 23 | ) 24 | return expected_properties 25 | 26 | 27 | # set custom extensions for any unexpected keys found in the match_dict 28 | def get_novel_prop_defaults(match_dict, span_class, expected_properties): 29 | """ 30 | Also mutates the ~global Span~ passed `span_class` to add any needed extensions 31 | """ 32 | novel_properties = ( 33 | seq(match_dict.values()) 34 | .flat_map(lambda x: x.keys()) 35 | .distinct() 36 | .difference(expected_properties) 37 | ) 38 | novel_prop_defaults: Dict[str, Any] = {} 39 | for x in match_dict.values(): 40 | for k, v in x.items(): 41 | if k in novel_properties and k not in novel_prop_defaults.keys(): 42 | if isinstance(v, str): 43 | novel_prop_defaults[k] = "" 44 | elif isinstance(v, list): 45 | novel_prop_defaults[k] = [] 46 | elif isinstance(v, dict): 47 | novel_prop_defaults[k] = {} 48 | elif isinstance(v, int): 49 | novel_prop_defaults[k] = 0 50 | elif isinstance(v, float): 51 | novel_prop_defaults[k] = 0.0 52 | elif isinstance(v, bool): 53 | novel_prop_defaults[k] = False 54 | else: 55 | # just default to whatever value we find 56 | print(k, v) 57 | novel_prop_defaults[k] = v 58 | for prop, default in novel_prop_defaults.items(): 59 | span_class.set_extension(prop, default=default, force=True) 60 | return novel_prop_defaults 61 | 62 | 63 | def validate_match_dict(match_dict): 64 | match_dict_schema = get_match_dict_schema() 65 | validate(instance=match_dict, schema=match_dict_schema) 66 | 67 | 68 | def equal_except_nth_place(list1, list2, n): 69 | # compares two lists, skips nth place 70 | 71 | # if empty: 72 | if not len(list1) * len(list2): 73 | return False 74 | 75 | # if suggestions come from different suggestions: 76 | if list1[0].id != list2[0].id: 77 | return False 78 | 79 | # if different length - not equal 80 | if len(list1) != len(list2): 81 | return False 82 | 83 | for i in range(len(list1)): 84 | if i != n: 85 | if list1[i].text != list2[i].text: 86 | return False 87 | return True 88 | 89 | 90 | def eliminate_options(elem, chosen, rest): 91 | # use elem to eliminate elements above the max_count limits 92 | for i, item in enumerate(elem): 93 | # item with no max count 94 | max_count = item.max_count 95 | elem_text = item.text 96 | if max_count is None: 97 | continue 98 | # item is exclusive (= max count 1) 99 | elif max_count == 1: 100 | # eliminate equal except i from rest 101 | rest = [r for r in rest if not equal_except_nth_place(elem, r, i)] 102 | # item has a custom max count 103 | else: 104 | # get hom many times this item has been used so far 105 | # it this very context 106 | current_count = [r for r in chosen if equal_except_nth_place(elem, r, i)] 107 | # it this is max (with elem), eliminate other options from rest 108 | if len(current_count) >= max_count: 109 | rest = [r for r in rest if not equal_except_nth_place(elem, r, i)] 110 | return rest 111 | 112 | 113 | def get_predicates( 114 | match_hooks, default_match_hooks, custom_match_hooks 115 | ) -> List[Callable]: 116 | predicates = [] 117 | for hook in match_hooks: 118 | # template - ex. succeeded_by_phrase 119 | try: 120 | template = getattr(default_match_hooks, hook["name"]) 121 | except AttributeError: 122 | # if the hook isn't in custom_match_hooks, this will still 123 | # raise an exception. I think that is the correct behavior 124 | template = getattr(custom_match_hooks, hook["name"]) 125 | 126 | # predicate - filled template ex. succeeded_by_word("to") 127 | # will match "in addition to..." but not "in addition, ..." 128 | args = hook.get("args", None) 129 | kwargs = hook.get("kwargs", None) 130 | if args is None: 131 | if kwargs is None: 132 | # the match_hook is nullary 133 | pred = template() 134 | else: 135 | pred = template(**kwargs) 136 | elif type(args) == dict: 137 | # should we force them to use kwargs? 138 | warnings.warn( 139 | f"WARNING: dict passed as sole args argument. Calling {hook['name']} " 140 | f"with single argument {args}. If you want to call with keyword arguments, use kwargs" 141 | ) 142 | pred = template(args) 143 | else: 144 | # oops, bad design, we assume non-dicts are called directly 145 | pred = template(args) 146 | 147 | # to confuse people for centuries to come ... 148 | # negate, since positive breaks matching 149 | # see cb in get_callback 150 | if bool(hook.get("match_if_predicate_is", False)): 151 | # neg flips the boolean value of a predicate 152 | pred = default_match_hooks.neg(pred) 153 | predicates.append(pred) 154 | return predicates 155 | 156 | 157 | def make_doc_if_not_doc(text_or_doc: Union[str, Doc], nlp) -> Doc: 158 | if hasattr(text_or_doc, "text"): 159 | doc = text_or_doc 160 | else: 161 | doc = nlp(text_or_doc) 162 | return doc 163 | 164 | 165 | def at_most_one_is_not_none(*args) -> bool: 166 | return len(list(filter(bool, [x is not None for x in args]))) <= 1 167 | 168 | 169 | def attach_debug_hook(matches: Dict[str, Dict]) -> Dict[str, Dict]: 170 | new_matches = {} 171 | for match_name, match_dict in matches.items(): 172 | new_dict = match_dict 173 | hooks = match_dict.get("match_hook", []) 174 | hooks.append( 175 | { 176 | "name": "debug_hook", 177 | "args": match_name, 178 | "match_if_predicate_is": True, 179 | } 180 | ) 181 | new_dict["match_hook"] = hooks 182 | new_matches[match_name] = new_dict 183 | return new_matches 184 | -------------------------------------------------------------------------------- /replacy/resources/match_dict_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Schema for validation ReplaCy Match/Replace format", 3 | "type": "object", 4 | "definitions": { 5 | "replacyAttributeItem": { 6 | "type": "object", 7 | "properties": { 8 | "ORTH": { 9 | "$ref": "#/definitions/spacyValue" 10 | }, 11 | "TEXT": { 12 | "$ref": "#/definitions/spacyValue" 13 | }, 14 | "LEMMA": { 15 | "$ref": "#/definitions/spacyValue" 16 | }, 17 | "LOWER": { 18 | "$ref": "#/definitions/spacyValue" 19 | }, 20 | "LENGTH": { 21 | "$ref": "#/definitions/spacyValue" 22 | }, 23 | "POS": { 24 | "$ref": "#/definitions/spacyValue" 25 | }, 26 | "TAG": { 27 | "$ref": "#/definitions/spacyValue" 28 | }, 29 | "DEP": { 30 | "$ref": "#/definitions/spacyValue" 31 | }, 32 | "SHAPE": { 33 | "$ref": "#/definitions/spacyValue" 34 | }, 35 | "ENT_TYPE": { 36 | "$ref": "#/definitions/spacyValue" 37 | }, 38 | "OP": { 39 | "enum": [ 40 | "!", 41 | "?", 42 | "*", 43 | "+" 44 | ] 45 | }, 46 | "TEMPLATE_ID": { 47 | "type": "integer" 48 | } 49 | }, 50 | "patternProperties": { 51 | "^IS_[A-Z]*$": { 52 | "type": "boolean" 53 | } 54 | } 55 | }, 56 | "replacyAttribute": { 57 | "type": "array", 58 | "items": { 59 | "$ref": "#/definitions/replacyAttributeItem" 60 | }, 61 | "minItems": 1 62 | }, 63 | "spacyOperator": { 64 | "type": "object", 65 | "additionalProperties": false, 66 | "properties": { 67 | "REGEX": { 68 | "type": "string" 69 | }, 70 | "IN": { 71 | "type": "array", 72 | "items": { 73 | "type": "string" 74 | } 75 | }, 76 | "NOT_IN": { 77 | "type": "array", 78 | "items": { 79 | "type": "string" 80 | } 81 | }, 82 | "==": { 83 | "type": "number" 84 | }, 85 | ">=": { 86 | "type": "number" 87 | }, 88 | ">": { 89 | "type": "number" 90 | }, 91 | "<=": { 92 | "type": "number" 93 | }, 94 | "<": { 95 | "type": "number" 96 | }, 97 | "OP": { 98 | "enum": [ 99 | "!", 100 | "?", 101 | "*", 102 | "+" 103 | ] 104 | } 105 | } 106 | }, 107 | "textOperator": { 108 | "type": "object", 109 | "additionalProperties": false, 110 | "properties": { 111 | "IN": { 112 | "type": "array", 113 | "items": { 114 | "type": "string" 115 | } 116 | } 117 | } 118 | }, 119 | "spacyValue": { 120 | "oneOf": [ 121 | { 122 | "type": "string" 123 | }, 124 | { 125 | "type": "integer" 126 | }, 127 | { 128 | "$ref": "#/definitions/spacyOperator" 129 | } 130 | ] 131 | }, 132 | "textValue":{ 133 | "oneOf":[ 134 | { 135 | "type": "string" 136 | }, 137 | { 138 | "$ref": "#/definitions/textOperator" 139 | } 140 | ] 141 | }, 142 | "replacySuggestionItem": { 143 | "type": "object", 144 | "additionalProperties": false, 145 | "properties": { 146 | "TEXT": { 147 | "$ref": "#/definitions/textValue" 148 | }, 149 | "FROM_TEMPLATE_ID": { 150 | "type": "integer" 151 | }, 152 | "PATTERN_REF": { 153 | "type": "integer" 154 | }, 155 | "REPLACY_OP": { 156 | "enum": [ 157 | "LOWER", 158 | "UPPER", 159 | "TITLE" 160 | ] 161 | }, 162 | "INFLECTION": { 163 | "enum": [ 164 | "ADJ", 165 | "ADV", 166 | "PROPN", 167 | "VERB", 168 | "AUX", 169 | "JJ", 170 | "JJR", 171 | "JJS", 172 | "RB", 173 | "RBR", 174 | "RBS", 175 | "NN", 176 | "NNS", 177 | "NNP", 178 | "NNPS", 179 | "VB", 180 | "VBD", 181 | "VBG", 182 | "VBN", 183 | "VBP", 184 | "VBZ", 185 | "MD", 186 | "ALL" 187 | ] 188 | } 189 | } 190 | }, 191 | "replacySuggestion": { 192 | "type": "array", 193 | "items": { 194 | "$ref": "#/definitions/replacySuggestionItem" 195 | } 196 | }, 197 | "matchHookItem": { 198 | "type": "object", 199 | "properties": { 200 | "name": { 201 | "type": "string" 202 | }, 203 | "args": { 204 | "oneOf": [ 205 | { 206 | "type": "array" 207 | }, 208 | { 209 | "type": "string" 210 | }, 211 | { 212 | "type": "number" 213 | }, 214 | { 215 | "type": "boolean" 216 | } 217 | ] 218 | }, 219 | "kwargs": { 220 | "type": "object" 221 | }, 222 | "match_if_predicate_is": { 223 | "type": "boolean" 224 | } 225 | }, 226 | "required": [ 227 | "name", 228 | "match_if_predicate_is" 229 | ] 230 | } 231 | }, 232 | "patternProperties": { 233 | "^[a-z_-][A-Za-z0-9_-]*$": { 234 | "type": "object", 235 | "properties": { 236 | "patterns": { 237 | "type": "array", 238 | "items": { 239 | "$ref": "#/definitions/replacyAttribute" 240 | }, 241 | "minItems": 1 242 | }, 243 | "suggestions": { 244 | "type": "array", 245 | "minItems": 0, 246 | "items": { 247 | "$ref": "#/definitions/replacySuggestion" 248 | } 249 | }, 250 | "match_hook": { 251 | "type": "array", 252 | "minItems": 0, 253 | "items": { 254 | "$ref": "#/definitions/matchHookItem" 255 | } 256 | }, 257 | "test": { 258 | "type": "object", 259 | "additionalProperties": false, 260 | "properties": { 261 | "positive": { 262 | "type": "array", 263 | "items": { 264 | "type": "string" 265 | } 266 | }, 267 | "negative": { 268 | "type": "array", 269 | "items": { 270 | "type": "string" 271 | } 272 | } 273 | } 274 | }, 275 | "description": { 276 | "type": "string" 277 | }, 278 | "comment": { 279 | "type": "string" 280 | }, 281 | "category": { 282 | "type": "string" 283 | } 284 | }, 285 | "required": [ 286 | "patterns", 287 | "suggestions" 288 | ] 289 | } 290 | } 291 | } -------------------------------------------------------------------------------- /replacy/resources/match_dict.json: -------------------------------------------------------------------------------- 1 | { 2 | "extract-revenge": { 3 | "patterns": [ 4 | [ 5 | { 6 | "LEMMA": "extract", 7 | "TEMPLATE_ID": 1 8 | } 9 | ] 10 | ], 11 | "suggestions": [ 12 | [ 13 | { 14 | "TEXT": "exact", 15 | "FROM_TEMPLATE_ID": 1 16 | } 17 | ] 18 | ], 19 | "match_hook": [ 20 | { 21 | "name": "succeeded_by_phrase", 22 | "args": "revenge", 23 | "match_if_predicate_is": true 24 | } 25 | ], 26 | "test": { 27 | "positive": [ 28 | "And at the same time extract revenge on those he so despises?", 29 | "Watch as Tampa Bay extracts revenge against his former Los Angeles Rams team." 30 | ], 31 | "negative": [ 32 | "Mother flavours her custards with lemon extract." 33 | ] 34 | } 35 | }, 36 | "make-due": { 37 | "patterns": [ 38 | [ 39 | { 40 | "LEMMA": "make", 41 | "TEMPLATE_ID": 1 42 | }, 43 | { 44 | "LOWER": "due" 45 | } 46 | ] 47 | ], 48 | "suggestions": [ 49 | [ 50 | { 51 | "TEXT": "make", 52 | "FROM_TEMPLATE_ID": 1 53 | }, 54 | { 55 | "TEXT": "do" 56 | } 57 | ] 58 | ], 59 | "test": { 60 | "positive": [ 61 | "Viewers will have to make due with tired re-runs and second-rate movies." 62 | ], 63 | "negative": [ 64 | "The empty vessels make the greatest sound.", 65 | "I'll make do.", 66 | "She only has sons; she'll make dudes." 67 | ] 68 | }, 69 | "comment": "this is an example comment", 70 | "description": "The expression is \"make do\".", 71 | "category": "R:VERB", 72 | "unexpected": "replaCy should handle arbitrary properties here, and attach them to the relevant spans" 73 | }, 74 | "requirement": { 75 | "patterns": [ 76 | [ 77 | { 78 | "LEMMA": "requirement", 79 | "POS": "NOUN", 80 | "TEMPLATE_ID": 1 81 | } 82 | ] 83 | ], 84 | "suggestions": [ 85 | [ 86 | { 87 | "TEXT": "need", 88 | "FROM_TEMPLATE_ID": 1 89 | } 90 | ] 91 | ], 92 | "match_hook": [ 93 | { 94 | "name": "part_of_compound", 95 | "match_if_predicate_is": false 96 | }, 97 | { 98 | "name": "preceded_by_lemma", 99 | "kwargs": { 100 | "lemma": "hello", 101 | "distance": 22 102 | }, 103 | "match_if_predicate_is": false 104 | } 105 | ], 106 | "test": { 107 | "positive": [ 108 | "The system has the following requirements: blood of a virgin, suffering, and cat food.", 109 | "Our immediate requirement is extra staff." 110 | ], 111 | "negative": [ 112 | "There is a residency requirement for obtaining citizenship.", 113 | "What is the minimum entrance requirement for this course?" 114 | ] 115 | } 116 | }, 117 | "lt-example": { 118 | "patterns": [ 119 | [ 120 | { 121 | "LOWER": { 122 | "IN": [ 123 | "have", 124 | "has" 125 | ] 126 | } 127 | }, 128 | { 129 | "TAG": { 130 | "IN": [ 131 | "VBD", 132 | "VBP", 133 | "VB", 134 | "VBN" 135 | ] 136 | } 137 | }, 138 | { 139 | "TAG": { 140 | "NOT_IN": [ 141 | "VBG" 142 | ] 143 | } 144 | } 145 | ] 146 | ], 147 | "suggestions": [ 148 | [ 149 | { 150 | "PATTERN_REF": 0 151 | }, 152 | { 153 | "PATTERN_REF": 1, 154 | "INFLECTION": "VBN" 155 | }, 156 | { 157 | "PATTERN_REF": 2 158 | } 159 | ] 160 | ], 161 | "description": "Possible agreement error -- use past participle here", 162 | "test": { 163 | "positive": [ 164 | "I have eat this" 165 | ], 166 | "negative": [ 167 | "I ate this" 168 | ] 169 | } 170 | }, 171 | "assemble_attach_together": { 172 | "comment": "Match the word together if it is a modifier of any form of assemble or attach, and suggest removing it", 173 | "patterns": [ 174 | [ 175 | { 176 | "LOWER": "together" 177 | } 178 | ] 179 | ], 180 | "match_hook": [ 181 | { 182 | "name": "relative_x_is_y", 183 | "kwargs": { 184 | "children_or_ancestors": "ancestors", 185 | "pos_or_dep": "dep", 186 | "value": "ROOT" 187 | }, 188 | "match_if_predicate_is": true 189 | } 190 | ], 191 | "suggestions": [ 192 | [ 193 | { 194 | "TEXT": "" 195 | } 196 | ] 197 | ], 198 | "test": { 199 | "positive": [ 200 | "Avengers, assemble the team together!", 201 | "We assembled the furniture together." 202 | ], 203 | "negative": [ 204 | "After we assemble, we can go together", 205 | "My arm is attached to my shoulder, I like that they are together." 206 | ] 207 | } 208 | }, 209 | "effective_in_its_ability": { 210 | "patterns": [ 211 | [ 212 | { 213 | "LEMMA": "be", 214 | "TEMPLATE_ID": 1 215 | }, 216 | { 217 | "LOWER": "effective" 218 | }, 219 | { 220 | "LOWER": "in" 221 | }, 222 | { 223 | "DEP": "poss" 224 | }, 225 | { 226 | "LOWER": "ability" 227 | }, 228 | { 229 | "LOWER": "to" 230 | }, 231 | { 232 | "POS": "VERB" 233 | } 234 | ] 235 | ], 236 | "suggestions": [ 237 | [ 238 | { 239 | "TEXT": "effectively" 240 | }, 241 | { 242 | "PATTERN_REF": 6, 243 | "FROM_TEMPLATE_ID": 1 244 | } 245 | ] 246 | ], 247 | "comment": "You can use pattern_ref and from_template_id together", 248 | "test": { 249 | "positive": [ 250 | "The pail was effective in its ability to carry water" 251 | ], 252 | "negative": [ 253 | "The pail wasn't effective in its ability to carry water" 254 | ] 255 | } 256 | }, 257 | "dupe-test": { 258 | "patterns": [ 259 | [ 260 | { 261 | "LEMMA": "make", 262 | "TEMPLATE_ID": 1 263 | } 264 | ] 265 | ], 266 | "suggestions": [ 267 | [ 268 | { 269 | "TEXT": "build", 270 | "FROM_TEMPLATE_ID": 1 271 | } 272 | ] 273 | ], 274 | "comment": "This is a bad match, it is here to demonstrate overlap behavior", 275 | "test": { 276 | "positive": [ 277 | "I will make something" 278 | ], 279 | "negative": [ 280 | "I will build something" 281 | ] 282 | } 283 | }, 284 | "all-caps": { 285 | "patterns": [ 286 | [ 287 | { 288 | "IS_UPPER": true, 289 | "TEXT": { 290 | "REGEX": "^[A-Z]{2,}$" 291 | }, 292 | "OP": "+" 293 | }, 294 | { 295 | "IS_LOWER": true, 296 | "OP": "*" 297 | } 298 | ] 299 | ], 300 | "suggestions": [ 301 | [ 302 | { 303 | "PATTERN_REF": 0, 304 | "REPLACY_OP": "LOWER" 305 | }, 306 | { 307 | "PATTERN_REF": 1, 308 | "REPLACY_OP": "UPPER" 309 | } 310 | ] 311 | ], 312 | "test": { 313 | "positive": [ 314 | "TENNIS is a lovely game.", 315 | "THIS IS SO SILLY", 316 | "THIS IS SO SILLY waay to go" 317 | ], 318 | "negative": [ 319 | "this is so silly" 320 | ] 321 | } 322 | } 323 | } 324 | -------------------------------------------------------------------------------- /replacy/resources/patterns_test_data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "hook_name": "succeeded_by_phrase", 4 | "args": "up", 5 | "text": "Do not, for one repulse, give up the purpose that you resolved to effect.", 6 | "start": 7, 7 | "end": 8, 8 | "result": true 9 | }, 10 | { 11 | "hook_name": "succeeded_by_phrase", 12 | "args": "up", 13 | "text": "Give as good as one gets.", 14 | "start": 0, 15 | "end": 1, 16 | "result": false 17 | }, 18 | { 19 | "hook_name": "succeeded_by_phrase", 20 | "args": [ 21 | "height", 22 | "value", 23 | "size", 24 | "number" 25 | ], 26 | "text": "He was only five feet nine, the minimum height for a policeman.", 27 | "start": 8, 28 | "end": 9, 29 | "result": true 30 | }, 31 | { 32 | "hook_name": "succeeded_by_phrase", 33 | "args": [ 34 | "height", 35 | "value", 36 | "size", 37 | "number" 38 | ], 39 | "text": "Raising the minimum wage would ratchet up real incomes in general.", 40 | "start": 2, 41 | "end": 3, 42 | "result": false 43 | }, 44 | { 45 | "hook_name": "succeeded_by_phrase", 46 | "args": [ 47 | "height", 48 | "value", 49 | "size", 50 | "number" 51 | ], 52 | "text": "You must get a minimum of 60 questions right to pass the examination.", 53 | "start": 4, 54 | "end": 5, 55 | "result": false 56 | }, 57 | { 58 | "hook_name": "succeeded_by_pos", 59 | "args": "ADV", 60 | "text": "Their car was bigger and therefore more comfortable.", 61 | "start": 3, 62 | "end": 5, 63 | "result": true 64 | }, 65 | { 66 | "hook_name": "succeeded_by_pos", 67 | "args": [ 68 | "PRON", 69 | "DET" 70 | ], 71 | "text": "You do it.", 72 | "start": 1, 73 | "end": 2, 74 | "result": true 75 | }, 76 | { 77 | "hook_name": "succeeded_by_pos", 78 | "args": [ 79 | "PRON", 80 | "DET" 81 | ], 82 | "text": "She does a dance.", 83 | "start": 1, 84 | "end": 2, 85 | "result": true 86 | }, 87 | { 88 | "hook_name": "succeeded_by_pos", 89 | "args": [ 90 | "PRON", 91 | "DET" 92 | ], 93 | "text": "I do fun things.", 94 | "start": 1, 95 | "end": 2, 96 | "result": false 97 | }, 98 | { 99 | "hook_name": "succeeded_by_pos", 100 | "args": "ADJ", 101 | "text": "Their car was bigger and therefore more comfortable.", 102 | "start": 3, 103 | "end": 5, 104 | "result": false 105 | }, 106 | { 107 | "hook_name": "preceded_by_pos", 108 | "args": "AUX", 109 | "text": "Their car was bigger and therefore more comfortable.", 110 | "start": 3, 111 | "end": 4, 112 | "result": true 113 | }, 114 | { 115 | "hook_name": "succeeded_by_dep", 116 | "args": "conj", 117 | "text": "Their car was bigger and therefore more comfortable.", 118 | "start": 6, 119 | "end": 7, 120 | "result": true 121 | }, 122 | { 123 | "hook_name": "succeeded_by_dep", 124 | "args": "punct", 125 | "text": "Their car was bigger and therefore more comfortable.", 126 | "start": 6, 127 | "end": 7, 128 | "result": false 129 | }, 130 | { 131 | "hook_name": "preceded_by_dep", 132 | "args": "advmod", 133 | "text": "Their car was bigger and therefore more comfortable.", 134 | "start": 7, 135 | "end": 8, 136 | "result": true 137 | }, 138 | { 139 | "hook_name": "preceded_by_dep", 140 | "args": "cc", 141 | "text": "Their car was bigger and therefore more comfortable.", 142 | "start": 7, 143 | "end": 8, 144 | "result": false 145 | }, 146 | { 147 | "hook_name": "preceded_by_dep", 148 | "args": [ 149 | "ROOT", 150 | "mark" 151 | ], 152 | "text": "If Sam wants to party; let him", 153 | "start": 1, 154 | "end": 2, 155 | "result": true 156 | }, 157 | { 158 | "hook_name": "preceded_by_dep", 159 | "args": [ 160 | "ROOT", 161 | "mark" 162 | ], 163 | "text": "My name is Sam.", 164 | "start": 3, 165 | "end": 4, 166 | "result": true 167 | }, 168 | { 169 | "hook_name": "preceded_by_dep", 170 | "args": [ 171 | "ROOT", 172 | "mark" 173 | ], 174 | "text": "That's called a 'beer', Sam.", 175 | "start": 8, 176 | "end": 9, 177 | "result": false 178 | }, 179 | { 180 | "hook_name": "part_of_compound", 181 | "args": null, 182 | "text": "Our immediate requirement is extra staff.", 183 | "start": 2, 184 | "end": 3, 185 | "result": false 186 | }, 187 | { 188 | "hook_name": "part_of_compound", 189 | "args": null, 190 | "text": "There is a residency requirement for obtaining citizenship.", 191 | "start": 4, 192 | "end": 5, 193 | "result": true 194 | }, 195 | { 196 | "hook_name": "surrounded_by_phrase", 197 | "args": ",", 198 | "text": "The dog is young, well trained, and good natured.", 199 | "start": 5, 200 | "end": 7, 201 | "result": true 202 | }, 203 | { 204 | "hook_name": "surrounded_by_phrase", 205 | "args": ",", 206 | "text": "The dog is young, well trained, and good natured.", 207 | "start": 5, 208 | "end": 6, 209 | "result": false 210 | }, 211 | { 212 | "hook_name": "succeeded_by_num", 213 | "args": null, 214 | "text": "The area is approximately 100 square kilometers.", 215 | "start": 3, 216 | "end": 4, 217 | "result": true 218 | }, 219 | { 220 | "hook_name": "succeeded_by_num", 221 | "args": null, 222 | "text": "The pies have a shelf life of approximately one week.", 223 | "start": 7, 224 | "end": 8, 225 | "result": true 226 | }, 227 | { 228 | "hook_name": "succeeded_by_num", 229 | "args": null, 230 | "text": "The conservatory measures approximately 13ft x 16ft.", 231 | "start": 3, 232 | "end": 4, 233 | "result": true 234 | }, 235 | { 236 | "hook_name": "succeeded_by_num", 237 | "args": null, 238 | "text": "Officials at the school say they received a bomb threat at approximately 11:30 a.m. today.", 239 | "start": 11, 240 | "end": 12, 241 | "result": true 242 | }, 243 | { 244 | "hook_name": "succeeded_by_num", 245 | "args": null, 246 | "text": "One pound is approximately equal to 454 grams.", 247 | "start": 3, 248 | "end": 4, 249 | "result": false 250 | }, 251 | { 252 | "hook_name": "succeeded_by_num", 253 | "args": null, 254 | "text": "The village has approximately doubled in size since 1960.", 255 | "start": 4, 256 | "end": 5, 257 | "result": false 258 | }, 259 | { 260 | "hook_name": "succeeded_by_num", 261 | "args": null, 262 | "text": "Gain got by a lie will burn one’s fingers.", 263 | "start": 7, 264 | "end": 8, 265 | "result": false 266 | }, 267 | { 268 | "hook_name": "succeeded_by_currency", 269 | "args": null, 270 | "text": "Approximately $150 million is to be spent on improvements.", 271 | "start": 0, 272 | "end": 1, 273 | "result": true 274 | }, 275 | { 276 | "hook_name": "succeeded_by_currency", 277 | "args": null, 278 | "text": "I paid them £100 for the damage and I hope that's the last I'll hear of it.", 279 | "start": 2, 280 | "end": 3, 281 | "result": true 282 | }, 283 | { 284 | "hook_name": "succeeded_by_currency", 285 | "args": null, 286 | "text": "I have debited ~100 against your account.", 287 | "start": 2, 288 | "end": 3, 289 | "result": false 290 | }, 291 | { 292 | "hook_name": "relative_x_is_y", 293 | "kwargs": { 294 | "children_or_ancestors": "children", 295 | "pos_or_dep": "dep", 296 | "value": "csubj" 297 | }, 298 | "text": "Your condition is serious and requires surgery.", 299 | "start": 5, 300 | "end": 6, 301 | "result": false 302 | }, 303 | { 304 | "hook_name": "relative_x_is_y", 305 | "kwargs": { 306 | "children_or_ancestors": "children", 307 | "pos_or_dep": "dep", 308 | "value": "csubj" 309 | }, 310 | "text": "I require stimulants to function.", 311 | "start": 1, 312 | "end": 2, 313 | "result": false 314 | }, 315 | { 316 | "hook_name": "relative_x_is_y", 317 | "kwargs": { 318 | "children_or_ancestors": "children", 319 | "pos_or_dep": "dep", 320 | "value": "csubj" 321 | }, 322 | "text": "Deciphering the code requires an expert.", 323 | "start": 3, 324 | "end": 4, 325 | "result": true 326 | }, 327 | { 328 | "hook_name": "relative_x_is_y", 329 | "kwargs": { 330 | "children_or_ancestors": "children", 331 | "pos_or_dep": "dep", 332 | "value": "csubj" 333 | }, 334 | "text": "Making small models requires manual skill.", 335 | "start": 3, 336 | "end": 4, 337 | "result": true 338 | }, 339 | { 340 | "hook_name": "part_of_phrase", 341 | "args": "hungry for apples", 342 | "text": "he seems really hungry for apples today", 343 | "start": 5, 344 | "end": 6, 345 | "result": true 346 | }, 347 | { 348 | "hook_name": "part_of_phrase", 349 | "args": "hungry for apples", 350 | "text": "he seems really hungry for some apples today", 351 | "start": 6, 352 | "end": 7, 353 | "result": false 354 | }, 355 | { 356 | "hook_name": "part_of_phrase", 357 | "args": "hungry for apples today", 358 | "text": "he seems really hungry for apples today", 359 | "start": 5, 360 | "end": 6, 361 | "result": true 362 | }, 363 | { 364 | "hook_name": "part_of_phrase", 365 | "args": "hungry for apples today apples", 366 | "text": "he seems really hungry for apples today apples", 367 | "start": 5, 368 | "end": 6, 369 | "result": true 370 | }, 371 | { 372 | "hook_name": "part_of_phrase", 373 | "args": "hungry for apples today apples", 374 | "text": "he seems really hungry for apples today apples", 375 | "start": 7, 376 | "end": 8, 377 | "result": true 378 | }, 379 | { 380 | "hook_name": "sentence_has", 381 | "args": [ 382 | "rick", 383 | "morty", 384 | "jerry", 385 | "wubba lubba dub dub" 386 | ], 387 | "text": "I turned myself into a pickle. I'm pickle rick!", 388 | "start": 1, 389 | "end": 2, 390 | "result": true 391 | }, 392 | { 393 | "hook_name": "sentence_has", 394 | "args": [ 395 | "rick", 396 | "morty", 397 | "jerry", 398 | "wubba lubba dub dub" 399 | ], 400 | "text": "I turned myself into a pickle. I'm pickle rick!", 401 | "start": 7, 402 | "end": 8, 403 | "result": true 404 | }, 405 | { 406 | "hook_name": "sentence_has", 407 | "args": [ 408 | "rick", 409 | "morty", 410 | "jerry", 411 | "wubba lubba dub dub" 412 | ], 413 | "text": "I turned myself into a pickle. I'm pickle rick!", 414 | "start": 8, 415 | "end": 9, 416 | "result": true 417 | }, 418 | { 419 | "hook_name": "sentence_has", 420 | "args": [ 421 | "rick", 422 | "morty", 423 | "jerry", 424 | "wubba lubba dub dub" 425 | ], 426 | "text": "Wubba lubba dub dub means I am in great pain, please help me", 427 | "start": 6, 428 | "end": 7, 429 | "result": true 430 | } 431 | ] -------------------------------------------------------------------------------- /replacy/suggestion.py: -------------------------------------------------------------------------------- 1 | import re 2 | import warnings 3 | 4 | from functional import seq 5 | 6 | from replacy.inflector import Inflector 7 | from replacy.ref_matcher import RefMatcher 8 | 9 | 10 | class SuggestionGenerator: 11 | def __init__( 12 | self, nlp, forms_lookup=None, filter_suggestions=False, default_max_count=None 13 | ): 14 | self.forms_lookup = forms_lookup 15 | self.inflector = Inflector(nlp=nlp, forms_lookup=self.forms_lookup) 16 | self.ref_matcher = (RefMatcher()) 17 | self.filter_suggestions = filter_suggestions 18 | self.default_max_count = default_max_count 19 | 20 | @staticmethod 21 | def get_options(item, doc, start, end, pattern, pattern_ref): 22 | item_options = [] 23 | # set 24 | if "TEXT" in item: 25 | if isinstance(item["TEXT"], dict): 26 | item_options = item["TEXT"].get("IN", []) 27 | elif isinstance(item["TEXT"], str): 28 | item_options = [item["TEXT"]] 29 | # copy 30 | elif "PATTERN_REF" in item: 31 | ref = int(item["PATTERN_REF"]) 32 | if ref >= 0: 33 | try: 34 | refd_text = None 35 | if ref in pattern_ref: 36 | refd_tokens = pattern_ref[ref] 37 | if len(refd_tokens): 38 | min_i = start + min(refd_tokens) 39 | max_i = start + max(refd_tokens) 40 | refd_text = doc[min_i : max_i + 1].text 41 | except: 42 | warnings.warn( 43 | f"Ref matcher failed for span {doc[start:end]} and {pattern_ref}." 44 | ) 45 | refd_text = doc[start + ref].text 46 | else: 47 | # this is confusing. Example: 48 | # doc = nlp("I like apples, blood oranges, and bananas") 49 | # start = 2, end = 9 gives doc[start:end] == "apples, blood oranges, and bananas" 50 | # but doc[9] != "bananas", it is an IndexError, the last token is end-1 51 | # so, per python conventions, PATTERN_REF = -1 would mean the last matched token 52 | # so we can just add ref and end if ref is negative 53 | # to do: match again to get multi-token 54 | try: 55 | # map ref to positive 56 | ref = len(pattern_ref) + ref 57 | refd_tokens = pattern_ref[ref] 58 | if len(refd_tokens): 59 | min_i = start + min(refd_tokens) 60 | max_i = start + max(refd_tokens) 61 | refd_text = doc[min_i : max_i + 1].text 62 | else: 63 | refd_text = None 64 | except: 65 | warnings.warn( 66 | f"Ref matcher failed for span {doc[start:end]} and {pattern_ref}." 67 | ) 68 | refd_text = doc[end + ref].text 69 | 70 | if refd_text: 71 | if "REGEX" in item: 72 | regex_p = pattern[item["PATTERN_REF"]] 73 | # regex is with ignore case flag 74 | # so having this line to avoid exception when LOWER isn't in the pattern 75 | # if at any point needed to be specific or use case sensitive 76 | # we should add "REGEX_KEY" (TEXT or LOWER) in suggestions 77 | regex_pattern = ( 78 | regex_p["LOWER"]["REGEX"] 79 | if "LOWER" in regex_p 80 | else regex_p["TEXT"]["REGEX"] 81 | ) 82 | regex_replace = item["REGEX"] 83 | refd_text = re.sub( 84 | regex_pattern, regex_replace, refd_text, flags=re.IGNORECASE 85 | ) 86 | 87 | if "SUFFIX" in item: 88 | refd_text += item["SUFFIX"] 89 | 90 | item_options = [refd_text] 91 | else: 92 | item_options = [] 93 | 94 | return item_options 95 | 96 | def get_item_max_count(self, item, item_options): 97 | 98 | # max count can be hard set in match_dict 99 | max_count = item.get("MAX_COUNT", None) 100 | if max_count: 101 | return max_count 102 | 103 | # can be soft set by default 104 | # but no more than possible - ex. list len 105 | # or maximal ie. list len 106 | if self.default_max_count: 107 | max_count = min(self.default_max_count, len(item_options)) 108 | else: 109 | max_count = len(item_options) 110 | 111 | # if we don't want to guess max count 112 | # to eliminate grammatical variants 113 | # end here 114 | if not self.filter_suggestions: 115 | return max_count 116 | 117 | # if max count is not hard set 118 | # try to lower max count in special cases (A - G) 119 | # to eliminate non grammatical suggestions 120 | 121 | # A. empty 122 | # ex. [] 123 | if not len(item_options): 124 | return 1 125 | 126 | # B. contains non letters 127 | # ex. ["", ","] 128 | if not all([o.isalpha() for o in item_options]): 129 | return 1 130 | 131 | # C. is multi token 132 | # ex. ["in a", "for"] 133 | if max([len(o.split()) for o in item_options]) > 1: 134 | return 1 135 | 136 | # D. if inflection is set to tag - good 137 | # other options - will always return many 138 | if "INFLECTION" in item: 139 | inflection = item.get("INFLECTION") 140 | inflection_type = self.inflector.get_inflection_type(inflection) 141 | if inflection_type != "tag": 142 | return 1 143 | 144 | # contains many options 145 | # ex. ["eat", "walk"] 146 | if len(item_options) > 1: 147 | 148 | # E. contains words of the same lemma 149 | # ex. [slow, slowly] 150 | lemmas = set([]) 151 | for option in item_options: 152 | option_lemmas = set(self.inflector.get_lemmas(option)) 153 | if len(lemmas & option_lemmas): 154 | return 1 155 | lemmas |= option_lemmas 156 | 157 | # F. det: 158 | # ex. ["a", "an"] 159 | if any([article in item_options for article in ["a", "an", "the"]]): 160 | return 1 161 | 162 | # G. irregular plurals - only 2 detected so hardcoded 163 | # person / people 164 | # ox / oxen 165 | if all([el in item_options for el in ["person", "people"]]) or all( 166 | [el in item_options for el in ["ox", "oxen"]] 167 | ): 168 | return 1 169 | 170 | return max_count 171 | 172 | def inflect(self, item, item_options, pattern, pattern_ref, doc, start, end): 173 | # set 174 | if "INFLECTION" in item: 175 | inflection_value = item["INFLECTION"] 176 | inflection_type = self.inflector.get_inflection_type(inflection_value) 177 | if inflection_type == "pos": 178 | # set by pos 179 | item_options = ( 180 | seq(item_options) 181 | .map( 182 | lambda x: self.inflector.inflect_or_lookup( 183 | x, pos=inflection_value 184 | ) 185 | ) 186 | .flatten() 187 | .list() 188 | ) 189 | elif inflection_type == "tag": 190 | # set by tag 191 | item_options = ( 192 | seq(item_options) 193 | .map( 194 | lambda x: self.inflector.inflect_or_lookup( 195 | x, tag=inflection_value 196 | ) 197 | ) 198 | .flatten() 199 | .list() 200 | ) 201 | else: 202 | # get all forms 203 | item_options = ( 204 | seq(item_options) 205 | .map(lambda x: self.inflector.inflect_or_lookup(x, pos=None)) 206 | .flatten() 207 | .list() 208 | ) 209 | # copy 210 | elif "FROM_TEMPLATE_ID" in item: 211 | template_id = int(item["FROM_TEMPLATE_ID"]) 212 | index = None 213 | for i, token in enumerate(pattern): 214 | if "TEMPLATE_ID" in token and token["TEMPLATE_ID"] == template_id: 215 | index = i 216 | break 217 | 218 | # use token <-> pattern mapping 219 | # given pattern index, find doc index: 220 | doc_indices = pattern_ref[index] 221 | if len(doc_indices) == 0: 222 | # fallback to direct mapping: 223 | warnings.warn( 224 | f"Ref matcher failed for span {doc[start:end]} and {pattern_ref}." 225 | ) 226 | doc_index = index 227 | elif len(doc_indices) >= 1: 228 | # == 1 good case 229 | # >1 more tokens found, fallback to the first token 230 | doc_index = doc_indices[0] 231 | 232 | if doc_index is not None: 233 | item_options = ( 234 | seq(item_options) 235 | .map( 236 | lambda x: self.inflector.auto_inflect(doc, x, start + doc_index) 237 | ) 238 | .flatten() 239 | .list() 240 | ) 241 | return item_options 242 | 243 | def case(self, item, item_options): 244 | # This should probably be a list of ops 245 | # and we should have a parser class 246 | if "REPLACY_OP" in item: 247 | op = item["REPLACY_OP"] 248 | if op == "LOWER": 249 | item_options = [t.lower() for t in item_options] 250 | if op == "TITLE": 251 | item_options = [t.title() for t in item_options] 252 | if op == "UPPER": 253 | item_options = [t.upper() for t in item_options] 254 | return item_options 255 | 256 | def __call__( 257 | self, pre_suggestion, doc, start, end, pattern, pre_suggestion_id, alignments 258 | ): 259 | """ 260 | Suggestion text: 261 | - set: "TEXT": "cat" 262 | - choose one from: "TEXT": {"IN": ["a", "b"]} 263 | - copy from pattern: "PATTERN_REF": 3 (copy from 3rd pattern match) 264 | Set suggestion text inflection: 265 | - set by tag: "INFLECTION": "VBG" (returns one) 266 | - set by pos: "INFLECTION": "NOUN" (returns many. ex. NNS, NN) 267 | - get all: "INFLECTION": "ALL" (returns a lot, use infrequently) 268 | - copy from pattern: "FROM_TEMPLATE_ID": 2 (copy from token with "TEMPLATE_ID":2) 269 | Suggestions case matching: 270 | - lowercase: "REPLACY_OP: "LOWER" 271 | - title: "REPLACY_OP: "TITLE" 272 | - upper: "REPLACY_OP: "UPPER" 273 | Suggestions item max count: 274 | - set by tag: "MAX_COUNT": n (int) (take best n words from options) 275 | - implied MAX_COUNT = 1 if words share the same lemma or are mutually exclusive, ex. a/an 276 | """ 277 | # get token <-> pattern correspondence 278 | pattern_obj = pattern[0] 279 | pattern_ref = self.ref_matcher(doc[start:end], pattern_obj, alignments) 280 | 281 | suggestions = [] 282 | 283 | for item in pre_suggestion: 284 | # get text 285 | item_options = SuggestionGenerator.get_options( 286 | item, doc, start, end, pattern_obj, pattern_ref 287 | ) 288 | 289 | # guess or read max count count 290 | max_count = self.get_item_max_count(item, item_options) 291 | 292 | # inflect 293 | inflected_options = self.inflect( 294 | item, item_options, pattern_obj, pattern_ref, doc, start, end 295 | ) 296 | 297 | # case 298 | cased_options = self.case(item, inflected_options) 299 | 300 | # if non empty (can be when matching with OP) 301 | if len(cased_options): 302 | suggestion_variant = SuggestionVariants( 303 | cased_options, max_count, pre_suggestion_id 304 | ) 305 | suggestions.append(suggestion_variant) 306 | 307 | return suggestions 308 | 309 | 310 | class SuggestionVariants: 311 | def __init__(self, cased_options, max_count, id): 312 | self.cased_options = cased_options 313 | self.max_count = max_count 314 | self.id = id 315 | 316 | def __len__(self): 317 | return len(self.cased_options) 318 | 319 | def __repr__(self): 320 | return f'(cased_options={",".join(self.cased_options)}, max_count={self.max_count}, id={self.id})' 321 | 322 | def __iter__(self): 323 | for option in self.cased_options: 324 | yield Suggestion(option, self.max_count, self.id) 325 | 326 | 327 | class Suggestion: 328 | def __init__(self, text, max_count, id): 329 | self.text = text 330 | self.max_count = max_count 331 | self.id = id 332 | 333 | def __repr__(self): 334 | return f"(text={self.text}, max_count={self.max_count}, id={self.id})" 335 | -------------------------------------------------------------------------------- /replacy/__init__.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import itertools 3 | import logging 4 | import warnings 5 | from types import ModuleType 6 | from typing import Callable, List, Optional, Tuple 7 | 8 | from functional import seq 9 | from spacy.matcher import Matcher 10 | from spacy.tokens import Span 11 | from spacy.tokens.underscore import get_ext_args 12 | 13 | from replacy import default_match_hooks 14 | from replacy.db import get_forms_lookup, get_match_dict, load_lm 15 | from replacy.default_scorer import Scorer 16 | from replacy.suggestion import SuggestionGenerator 17 | from replacy.suggestion_joiner import join_suggestions 18 | from replacy.util import ( 19 | at_most_one_is_not_none, 20 | attach_debug_hook, 21 | eliminate_options, 22 | get_novel_prop_defaults, 23 | get_predicates, 24 | make_doc_if_not_doc, 25 | set_known_extensions, 26 | validate_match_dict 27 | ) 28 | from replacy.version import __version__ 29 | 30 | logging.basicConfig(level=logging.INFO) 31 | 32 | PipelineComponent = Callable[[List[Span]], List[Span]] 33 | 34 | 35 | class ESpan(Span): 36 | """ 37 | dangerous version of Span class 38 | intentionally bypass the _ attribute so that the class itself has all the properties 39 | this can result in name collisions, etc 40 | 41 | Why use it? there are cases where overlapping spans cause problems for the built in spacy.tokens.Span 42 | but for some reason this works 43 | """ 44 | 45 | def __getattribute__(self, name): 46 | """ 47 | when python attempts to access to underscore property, don't let it, give it self 48 | this means that: 49 | 50 | ```python 51 | >>> doc = nlp("She extracts revenge.") 52 | >>> es = ESpan(doc, 1, 2) 53 | >>> e._.comment = "yo metaprogramming" 54 | >>> e.comment 55 | 'yo metaprogramming' 56 | ``` 57 | """ 58 | if name == "_": 59 | return self 60 | return super().__getattribute__(name) 61 | 62 | @classmethod 63 | def set_extension(cls, name, **kwargs): 64 | # if we only want to allow default values, this works: 65 | default, method, getter, setter = get_ext_args(**kwargs) 66 | setattr(cls, name, default) 67 | # if we want to allow getters and setters or methods for dynamic props, we have to implement that 68 | # I think it is doable using the `property` built-in method as shown here 69 | # https://stackoverflow.com/a/1355444/3518108 70 | 71 | @classmethod 72 | def has_extension(cls, name): 73 | return hasattr(cls, name) 74 | 75 | 76 | class ReplaceMatcher: 77 | """ 78 | The main unit of functionality. Instantiate with `nlp`, (an instance of spaCy) and a match dict. 79 | Usage example, including a module of custom match hooks: 80 | 81 | ```python 82 | from replacy import ReplaceMatcher 83 | from replacy.db import load_json 84 | import spacy 85 | 86 | import my.custom_hooks as ch # suppose this suggests `excepts=>accepts` under some conditions 87 | 88 | 89 | nlp = spacy.load("en_core_web_sm") 90 | rmatch_dict = load_json("./resources/match_dict.json") 91 | rmatcher = ReplaceMatcher(nlp, rmatch_dict, custom_match_hooks=ch) 92 | span = rmatcher("She excepts her fate.")[0] 93 | span._.suggestions 94 | # >>> ['accepts'] 95 | ``` 96 | """ 97 | 98 | validate_match_dict = validate_match_dict 99 | 100 | def __init__( 101 | self, 102 | nlp, 103 | match_dict=None, 104 | forms_lookup=None, 105 | custom_match_hooks: Optional[ModuleType] = None, 106 | allow_multiple_whitespaces=False, 107 | max_suggestions_count=1000, 108 | lm_path=None, 109 | filter_suggestions=False, 110 | default_max_count=None, 111 | debug=False, 112 | SpanClass=Span, 113 | ): 114 | self.debug = debug 115 | # self.extended_span = extended_span 116 | self.Span = SpanClass 117 | self.logger = logging.getLogger("replaCy") 118 | self.default_match_hooks = default_match_hooks 119 | self.custom_match_hooks = custom_match_hooks 120 | self.nlp = nlp 121 | self.match_dict = match_dict if match_dict else get_match_dict() 122 | if self.debug: 123 | self.match_dict = attach_debug_hook(self.match_dict) 124 | self.allow_multiple_whitespaces = allow_multiple_whitespaces 125 | self.matcher = Matcher(self.nlp.vocab) 126 | self.predicates = {} 127 | self._init_matcher() 128 | self.spans: List[Span] = [] 129 | self.max_suggestions_count = max_suggestions_count 130 | self.forms_lookup = forms_lookup if forms_lookup else get_forms_lookup() 131 | self.suggestion_gen = SuggestionGenerator( 132 | nlp, forms_lookup, filter_suggestions, default_max_count 133 | ) 134 | expected_properties = set_known_extensions(self.Span) 135 | self.novel_prop_defaults = get_novel_prop_defaults( 136 | self.match_dict, self.Span, expected_properties 137 | ) 138 | self._set_scorer(lm_path) 139 | # Pipeline doesn't include matcher, since doesn't have the signature List[Span] -> None 140 | self.pipeline: List[Tuple[str, PipelineComponent]] = [ 141 | ("sorter", self.scorer.sort_suggestions), 142 | ("filter", self.max_count_filter), 143 | ("joiner", join_suggestions), 144 | ] 145 | 146 | @classmethod 147 | def with_espan(cls, *args, **kwargs): 148 | return cls(*args, **kwargs, SpanClass=ESpan) 149 | 150 | def _init_matcher(self): 151 | for match_name, ps in self.match_dict.items(): 152 | patterns = copy.deepcopy(ps["patterns"]) 153 | 154 | patterns = self._allow_multiple_whitespaces(patterns) 155 | patterns = self._remove_unsupported(patterns) 156 | 157 | match_hooks = ps.get("match_hook", []) 158 | self.predicates[match_name] = get_predicates( 159 | match_hooks, self.default_match_hooks, self.custom_match_hooks 160 | ) 161 | self.matcher.add(match_name, patterns) 162 | 163 | @staticmethod 164 | def _fix_alignment_multiple_whitespaces(alignments): 165 | return [int(a / 2) for a in alignments] 166 | 167 | @staticmethod 168 | def _allow_multiple_whitespaces(patterns): 169 | """ 170 | allow matching tokens separated by multiple whitespaces 171 | they may appear after normalizing nonstandard whitespaces 172 | ex. "Here␣is␣a\u180E\u200Bproblem." -> "Here␣is␣a␣␣problem." 173 | pattern can be preceded and followed by whitespace tokens 174 | to keep preceded_by... with and succeeded_by... with match hooks working 175 | """ 176 | if True: 177 | white_pattern = {"IS_SPACE": True, "OP": "?"} 178 | normalized_patterns = [] 179 | for pattern in patterns: 180 | normalized_pattern = [white_pattern] 181 | for p in pattern: 182 | normalized_pattern += [p, white_pattern] 183 | normalized_patterns.append(normalized_pattern) 184 | patterns = normalized_patterns 185 | return patterns 186 | 187 | @staticmethod 188 | def _remove_unsupported(patterns): 189 | # remove custom attributes not supported by spaCy Matcher 190 | for pattern in patterns: 191 | for p in pattern: 192 | if "TEMPLATE_ID" in p: 193 | del p["TEMPLATE_ID"] 194 | return patterns 195 | 196 | def _callback(self, doc, match): 197 | match_id, start, end, alignments = match 198 | alignments = ReplaceMatcher._fix_alignment_multiple_whitespaces(alignments) 199 | 200 | match_name = self.nlp.vocab[match_id].text 201 | 202 | for pred in self.predicates[match_name]: 203 | try: 204 | if pred(doc, start, end): 205 | return None 206 | except IndexError: 207 | break 208 | 209 | span = self.Span(doc, start, end) 210 | 211 | # find in match_dict if needed 212 | span._.match_name = match_name 213 | 214 | pre_suggestions = self.match_dict[match_name]["suggestions"] 215 | 216 | span._.suggestions = [] 217 | 218 | for i, x in enumerate(pre_suggestions): 219 | span._.suggestions += self.process_suggestions( 220 | x, doc, start, end, match_name, i, alignments 221 | ) 222 | 223 | for novel_prop, default_value in self.novel_prop_defaults.items(): 224 | setattr( 225 | span._, 226 | novel_prop, 227 | self.match_dict[match_name].get(novel_prop, default_value), 228 | ) 229 | self.spans.append(span) 230 | 231 | def _set_scorer(self, lm_path): 232 | # The following is not ideal 233 | # We should update replaCy to accept a Scorer as a parameter 234 | if lm_path: 235 | from replacy.scorer import KenLMScorer 236 | 237 | self.scorer: Scorer = KenLMScorer(nlp=self.nlp, model=load_lm(lm_path)) 238 | else: 239 | self.scorer = Scorer() 240 | 241 | def max_count_filter(self, spans: List[Span]) -> List[Span]: 242 | # for each span, reduce number of suggestions 243 | # based on max_count of each suggestion text item 244 | # assumption - elements are already sorted 245 | for span in spans: 246 | suggestions = span._.suggestions 247 | if len(suggestions): 248 | rest = suggestions 249 | chosen = [] 250 | 251 | while len(rest): 252 | elem = rest[0] 253 | rest = rest[1:] 254 | 255 | # the first element in rest 256 | # not eliminated => good 257 | chosen.append(elem) 258 | rest = eliminate_options(elem, chosen, rest) 259 | 260 | # log matched span and filtered out suggestions 261 | if self.debug: 262 | 263 | self.logger.info( 264 | f"{span._.match_name} matched '{span.text}' token indices {span.start}:{span.end}" 265 | ) 266 | self.logger.info(f"Accepted suggestions: {chosen}") 267 | 268 | suggestions_diff = [f for f in suggestions if f not in chosen] 269 | if len(suggestions_diff): 270 | self.logger.info(f"Ignored suggestions: {suggestions_diff}") 271 | 272 | span._.suggestions = chosen 273 | return spans 274 | 275 | def process_suggestions( 276 | self, pre_suggestion, doc, start, end, match_name, pre_suggestion_id, alignments 277 | ): 278 | # get token <-> pattern correspondence 279 | pattern = self.match_dict[match_name]["patterns"] 280 | 281 | suggestion_variants = self.suggestion_gen( 282 | pre_suggestion, doc, start, end, pattern, pre_suggestion_id, alignments 283 | ) 284 | # assert there aren't more than max_suggestions_count 285 | # otherwise raise warning and return [] 286 | suggestions_count = ( 287 | seq(suggestion_variants).map(lambda x: len(x)).reduce(lambda x, y: x * y, 1) 288 | ) 289 | 290 | if suggestions_count > self.max_suggestions_count: 291 | warnings.warn( 292 | f"Got {suggestions_count} suggestions, max is {self.max_suggestions_count}. \ 293 | Will fallback to empty suggestions." 294 | ) 295 | opt_combinations = [] 296 | else: 297 | opt_combinations = list(itertools.product(*suggestion_variants)) 298 | opt_combinations = [list(o) for o in opt_combinations] 299 | return opt_combinations 300 | 301 | @property 302 | def pipe_names(self): 303 | return [x[0] for x in self.pipeline] 304 | 305 | def add_pipe( 306 | self, 307 | component: PipelineComponent, 308 | name: str = None, 309 | before: str = None, 310 | after: str = None, 311 | first: bool = None, 312 | last: bool = None, 313 | ): 314 | """ 315 | Add a component to the pipeline 316 | A component must take one argument, a list of spans, and return None (modify the spans) 317 | 318 | Optionally, you can either specify a component to add it before or after, 319 | tell replaCy to add it first or last in the pipeline, or define a custom name. 320 | If no name is set and no name attribute is present on your component, the function/class name is used. 321 | """ 322 | if not at_most_one_is_not_none(before, after, first, last): 323 | raise ValueError("Only one of before, after, first, last can be set") 324 | if name is None: 325 | if hasattr(component, "name"): 326 | name = getattr(component, "name") 327 | else: 328 | name = component.__name__ 329 | 330 | if name in self.pipe_names: 331 | raise ValueError( 332 | f"Component {component} has name collision with existing pipeline component. \ 333 | current pipeline: {self.pipeline}" 334 | ) 335 | pipeline_step = (name, component) 336 | 337 | if last or all([before == None, after == None, first == None, last == None]): 338 | self.pipeline.append(pipeline_step) 339 | elif first: 340 | self.pipeline.insert(0, pipeline_step) 341 | elif before: 342 | if before not in self.pipe_names: 343 | raise ValueError( 344 | f"can't insert component before {before}; no component of that name in pipeline" 345 | ) 346 | reference_component_index = next( 347 | i for i, tup in enumerate(self.pipeline) if tup[0] == before 348 | ) 349 | self.pipeline.insert(reference_component_index, pipeline_step) 350 | elif after: 351 | if after == "matcher": 352 | # same as "first" 353 | self.pipeline.insert(0, pipeline_step) 354 | if after not in self.pipe_names: 355 | raise ValueError( 356 | f"can't insert component after {after}; no component of that name in pipeline" 357 | ) 358 | reference_component_index = next( 359 | i for i, tup in enumerate(self.pipeline) if tup[0] == after 360 | ) 361 | self.pipeline.insert(reference_component_index + 1, pipeline_step) 362 | else: 363 | warnings.warn( 364 | f"Weird values passes to add_pipe, appending {name} to the end of the pipeline" 365 | ) 366 | self.pipeline.append(pipeline_step) 367 | 368 | def remove_pipe(self, name): 369 | pipelines = [] 370 | for p in self.pipeline: 371 | if p[0] == name: 372 | continue 373 | pipelines.append(p) 374 | self.pipeline = pipelines 375 | 376 | def __call__(self, sent): 377 | # self.spans must be cleared - global 378 | self.spans = [] 379 | doc = make_doc_if_not_doc(sent, self.nlp) 380 | # this fills up self.spans 381 | matches = self.matcher(doc, with_alignments=True) 382 | 383 | # do the callback here instead of to pass it as callback on match 384 | # here we alignment information to use for pattern ref 385 | # we don't have this info on match callback 386 | for match in matches: 387 | self._callback(doc, match) 388 | 389 | for _, component in self.pipeline: 390 | # the default pipeline will: 391 | # sort suggestions by lm score 392 | # filter out based on max_count 393 | # merge lists of words into phrases 394 | self.spans = component(self.spans) 395 | # this works because a component's signature is List[Span] -> List[Span] 396 | return self.spans 397 | -------------------------------------------------------------------------------- /replacy/default_match_hooks.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains predicates which influence what counts as a match 3 | If the predicate (function) returns True, the match will be ignored 4 | """ 5 | import operator 6 | import re 7 | import sys 8 | from typing import Callable, List, Union 9 | 10 | from spacy.tokens.doc import Doc 11 | 12 | SpacyMatchPredicate = Callable[[Doc, int, int], bool] 13 | 14 | 15 | def _check_args(x): 16 | """ 17 | get calling function name to give a nice error message 18 | """ 19 | caller = sys._getframe(1).f_code.co_name 20 | if not isinstance(x, (list, str)): 21 | raise ValueError(f"args of {caller} should be a string or list of strings") 22 | 23 | 24 | def compose(f, g): 25 | return lambda doc, start, end: f(g(doc, start, end)) 26 | 27 | 28 | def neg(f): 29 | # function negation, ex. neg(preceded_by_pos(pos)) 30 | return compose(operator.not_, f) 31 | 32 | 33 | def succeeded_by_phrase(phrases) -> SpacyMatchPredicate: 34 | _check_args(phrases) 35 | if not isinstance(phrases, list): 36 | phrases = [phrases] 37 | 38 | def _succeeded_by_phrase(doc, start, end): 39 | if end >= len(doc): 40 | return False 41 | return any([doc[end:].text.lower().startswith(p.lower()) for p in phrases]) 42 | 43 | return _succeeded_by_phrase 44 | 45 | 46 | def preceded_by_phrase(phrases) -> SpacyMatchPredicate: 47 | _check_args(phrases) 48 | if not isinstance(phrases, list): 49 | phrases = [phrases] 50 | 51 | def _preceded_by_phrase(doc, start, end): 52 | if start <= 0: 53 | return False 54 | return any([doc[:start].text.lower().endswith(p.lower()) for p in phrases]) 55 | 56 | return _preceded_by_phrase 57 | 58 | 59 | def succeeded_by_pos(pos) -> SpacyMatchPredicate: 60 | _check_args(pos) 61 | if not isinstance(pos, list): 62 | pos = [pos] 63 | 64 | def _succeeded_by_pos(doc, start, end): 65 | if end >= len(doc): 66 | return False 67 | bools = [doc[end].pos_ == p for p in pos] 68 | return any(bools) 69 | 70 | return _succeeded_by_pos 71 | 72 | 73 | def preceded_by_pos(pos) -> SpacyMatchPredicate: 74 | _check_args(pos) 75 | if not isinstance(pos, list): 76 | pos = [pos] 77 | 78 | def _preceded_by_pos(doc, start, end): 79 | if start <= 0: 80 | return False 81 | bools = [doc[start - 1].pos_ == p for p in pos] 82 | return any(bools) 83 | 84 | return _preceded_by_pos 85 | 86 | 87 | def succeeded_by_lemma(lemma) -> SpacyMatchPredicate: 88 | _check_args(lemma) 89 | if not isinstance(lemma, list): 90 | lemma = [lemma] 91 | 92 | def _succeeded_by_lemma(doc, start, end): 93 | if end >= len(doc): 94 | return False 95 | bools = [doc[end].lemma_ == l for l in lemma] 96 | return any(bools) 97 | 98 | return _succeeded_by_lemma 99 | 100 | 101 | def preceded_by_lemma(lemma, distance=1) -> SpacyMatchPredicate: 102 | _check_args(lemma) 103 | if not isinstance(lemma, list): 104 | lemma = [lemma] 105 | 106 | def _preceded_by_lemma(doc, start, end): 107 | if start < distance: 108 | return False 109 | bools = [doc[start - distance].lemma_ == l for l in lemma] 110 | return any(bools) 111 | 112 | return _preceded_by_lemma 113 | 114 | 115 | def succeeded_by_dep(dep) -> SpacyMatchPredicate: 116 | _check_args(dep) 117 | if not isinstance(dep, list): 118 | dep = [dep] 119 | 120 | def _succeeded_by_dep(doc, start, end): 121 | if end >= len(doc): 122 | return False 123 | bools = [doc[end].dep_ == d for d in dep] 124 | return any(bools) 125 | 126 | return _succeeded_by_dep 127 | 128 | 129 | def preceded_by_dep(dep) -> SpacyMatchPredicate: 130 | _check_args(dep) 131 | if not isinstance(dep, list): 132 | dep = [dep] 133 | 134 | def _preceded_by_dep(doc, start, end): 135 | if start <= 0: 136 | return False 137 | bools = [doc[start - 1].dep_ == d for d in dep] 138 | return any(bools) 139 | 140 | return _preceded_by_dep 141 | 142 | 143 | def sentence_has( 144 | phrases: Union[str, List[str]], case_sensitive=False 145 | ) -> SpacyMatchPredicate: 146 | _check_args(phrases) 147 | if not isinstance(phrases, list): 148 | phrases = [phrases] 149 | 150 | def _sentence_has(doc, start, end): 151 | if case_sensitive: 152 | return any(p in doc.text for p in phrases) 153 | return any(p.lower() in doc.text.lower() for p in phrases) 154 | 155 | return _sentence_has 156 | 157 | 158 | def surrounded_by_phrase(phrase) -> SpacyMatchPredicate: 159 | def _surrounded_by_hook(doc, start, end): 160 | if start <= 0 or end >= len(doc): 161 | return False 162 | precedes = doc[:start].text.lower().endswith(phrase.lower()) 163 | follows = doc[end:].text.lower().startswith(phrase.lower()) 164 | return precedes and follows 165 | 166 | return _surrounded_by_hook 167 | 168 | 169 | def part_of_compound() -> SpacyMatchPredicate: 170 | def _word_is_part_of_compound_hook(doc, start, end): 171 | head = doc[start] 172 | is_compound = head.dep_ == "compound" 173 | is_part_of_compound = any( 174 | [t.dep_ == "compound" and t.head == head for t in doc] 175 | ) 176 | return is_compound or is_part_of_compound 177 | 178 | return _word_is_part_of_compound_hook 179 | 180 | 181 | def relative_x_is_y( 182 | children_or_ancestors: str, pos_or_dep: str, value: Union[str, List[str]] 183 | ) -> SpacyMatchPredicate: 184 | """ 185 | This hook looks at all the tokens in a matched span to determine 186 | whether any of the children or the first ancestor have a given .pos_ or 187 | .dep_. This replaces the implementation of the Dependency Matcher in 188 | the previous version by looking at token.children or token.ancestors in 189 | the matched span. 190 | 191 | Example hook: 192 | { 193 | "name": "relative_x_is_y", 194 | "kwargs": { 195 | "children_or_ancestors": "children", 196 | "pos_or_dep": "dep", 197 | "value": "pobj" 198 | }, 199 | "match_if_predicate_is": false 200 | } 201 | """ 202 | 203 | if not isinstance(value, list): 204 | value = [value] 205 | 206 | if not isinstance(children_or_ancestors, str): 207 | raise TypeError("children_or_ancestors must be a string!") 208 | 209 | if not isinstance(pos_or_dep, str): 210 | raise TypeError("pos_or_dep must be a string!") 211 | 212 | if children_or_ancestors not in ["children", "ancestors"]: 213 | raise ValueError( 214 | "children_or_ancestors must be set to either `children` or `ancestors`" 215 | ) 216 | 217 | if pos_or_dep not in ["pos", "dep", "tag"]: 218 | raise ValueError("pos_or_dep must be set to either `pos`, `dep`, or `tag`!") 219 | 220 | def _in_children(doc, start, end): 221 | if end >= len(doc): 222 | return False 223 | for val in value: 224 | match_span = doc[start:end] 225 | if pos_or_dep == "pos": 226 | return any( 227 | [child.pos_ == val for tok in match_span for child in tok.children] 228 | ) 229 | elif pos_or_dep == "dep": 230 | return any([child.dep_ == val for tok in match_span for child in tok.children]) 231 | elif pos_or_dep == "tag": 232 | return any([child.tag_ == val for tok in match_span for child in tok.children]) 233 | 234 | def _in_ancestors(doc, start, end): 235 | if end >= len(doc): 236 | return False 237 | for val in value: 238 | match_span = doc[start:end] 239 | if pos_or_dep == "pos": 240 | for t in match_span: 241 | ancestor = list(t.ancestors)[0] if len(list(t.ancestors)) else None 242 | if ancestor and ancestor.pos_ == val: 243 | return True 244 | return False 245 | if pos_or_dep == "dep": 246 | for t in match_span: 247 | ancestor = list(t.ancestors)[0] if len(list(t.ancestors)) else None 248 | if ancestor and ancestor.dep_ == val: 249 | return True 250 | return False 251 | if pos_or_dep == "tag": 252 | for t in match_span: 253 | ancestor = list(t.ancestors)[0] if len(list(t.ancestors)) else None 254 | if ancestor and ancestor.tag_ == val: 255 | return True 256 | return False 257 | 258 | if children_or_ancestors == "children": 259 | return _in_children 260 | 261 | if children_or_ancestors == "ancestors": 262 | return _in_ancestors 263 | 264 | 265 | def part_of_phrase(phrase) -> SpacyMatchPredicate: 266 | def _part_of_phrase(doc, start, end): 267 | matched = doc[start:end].text.lower() 268 | parts = phrase.split(matched) 269 | for i in range(len(parts) - 1): 270 | firstpart = "" 271 | secondpart = "" 272 | for part in parts[: i - 1]: 273 | firstpart += part 274 | for part in parts[i + 1 :]: 275 | secondpart += part 276 | precedes = doc.text.lower()[: doc[start:end].start_char].endswith(firstpart) 277 | follows = doc.text.lower()[doc[start:end].end_char :].startswith(secondpart) 278 | if precedes and follows: 279 | return True 280 | return False 281 | 282 | return _part_of_phrase 283 | 284 | 285 | def succeeded_by_num() -> SpacyMatchPredicate: 286 | def _succeeded_by_num(doc, start, end): 287 | if end >= len(doc): 288 | return False 289 | return doc[end].like_num or doc[end].pos_ == "NUM" or doc[end].is_digit 290 | 291 | return _succeeded_by_num 292 | 293 | 294 | def succeeded_by_currency() -> SpacyMatchPredicate: 295 | def _succeeded_by_currency(doc, start, end): 296 | if end >= len(doc): 297 | return False 298 | return doc[end].is_currency 299 | 300 | return _succeeded_by_currency 301 | 302 | 303 | def debug_hook(match_name: str) -> SpacyMatchPredicate: 304 | """ 305 | Don't use this manually. 306 | if debug is set (i.e. ReplaceMatcher.debug), then run utils.attach_debug_hook on your match_dict when you load it 307 | it will return a new match_dict with the debug hook attached to each match 308 | """ 309 | 310 | def _print_match(doc: Doc, start: int, end: int): 311 | print( 312 | f"DEBUG: {match_name} matched '{doc[start: end].text}' token indices {start}:{end}" 313 | ) 314 | return True 315 | 316 | return _print_match 317 | 318 | 319 | def preceded_by_space() -> SpacyMatchPredicate: 320 | def _preceded_by_space(doc, start, end): 321 | span = doc[start:end] 322 | return doc.text[span.start_char - 1] == " " 323 | 324 | return _preceded_by_space 325 | 326 | 327 | def preceded_by_punct() -> SpacyMatchPredicate: 328 | def _preceded_by_punct(doc, start, end): 329 | if start == 0: 330 | return False 331 | previous_token = doc[start - 1] 332 | return previous_token.is_punct 333 | 334 | return _preceded_by_punct 335 | 336 | 337 | def preceded_by_num() -> SpacyMatchPredicate: 338 | def _preceded_by_number(doc, start, end): 339 | if start == 0: 340 | return False 341 | previous_token = doc[start - 1] 342 | return ( 343 | previous_token.like_num 344 | or previous_token.pos_ == "NUM" 345 | or previous_token.is_digit 346 | ) 347 | 348 | return _preceded_by_number 349 | 350 | 351 | def preceded_by_currency() -> SpacyMatchPredicate: 352 | def _preceded_by_currency(doc, start, end): 353 | if start == 0: 354 | return False 355 | previous_token = doc[start - 1] 356 | return previous_token.is_currency 357 | 358 | return _preceded_by_currency 359 | 360 | 361 | def preceded_by_token(token) -> SpacyMatchPredicate: 362 | token_list = token if isinstance(token, list) else [token] 363 | 364 | def _preceded_by_token(doc, start, end): 365 | if start == 0: 366 | return False 367 | previous_token = doc[start - 1] 368 | return any([previous_token.lower_ == t.lower() for t in token_list]) 369 | 370 | return _preceded_by_token 371 | 372 | 373 | def succeeded_by_token(token) -> SpacyMatchPredicate: 374 | token_list = token if isinstance(token, list) else [token] 375 | 376 | def _succeeded_by_token(doc, start, end): 377 | if end == len(doc): 378 | return False 379 | next_token = doc[end] 380 | return any([next_token.lower_ == t.lower() for t in token_list]) 381 | 382 | return _succeeded_by_token 383 | 384 | 385 | def preceded_by_tag(tag) -> SpacyMatchPredicate: 386 | tag_list = tag if isinstance(tag, list) else [tag] 387 | 388 | def _preceded_by_tag(doc, start, end): 389 | if start == 0: 390 | return False 391 | previous_token = doc[start - 1] 392 | return any([previous_token.tag_ == t for t in tag_list]) 393 | 394 | return _preceded_by_tag 395 | 396 | 397 | def preceded_by_regex(regex, sensitive=False) -> SpacyMatchPredicate: 398 | def _preceded_by_regex(doc, start, end): 399 | if start == 0: 400 | return False 401 | previous_token = doc[start - 1] 402 | flags = 0 if sensitive == True else re.IGNORECASE 403 | return re.search(regex, previous_token.text, flags) is not None 404 | 405 | return _preceded_by_regex 406 | 407 | 408 | def succeeded_by_tag(tag) -> SpacyMatchPredicate: 409 | tag_list = tag if isinstance(tag, list) else [tag] 410 | 411 | def _succeeded_by_tag(doc, start, end): 412 | if end == len(doc): 413 | return False 414 | next_token = doc[end] 415 | return any([next_token.tag_ == t for t in tag_list]) 416 | 417 | return _succeeded_by_tag 418 | 419 | 420 | def succeeded_by_regex(regex, sensitive=False) -> SpacyMatchPredicate: 421 | def _succeeded_by_regex(doc, start, end): 422 | if end == len(doc): 423 | return False 424 | next_token = doc[end] 425 | flags = 0 if sensitive == True else re.IGNORECASE 426 | return re.search(regex, next_token.text, flags) is not None 427 | 428 | return _succeeded_by_regex 429 | 430 | 431 | def succeeded_by_same_token() -> SpacyMatchPredicate: 432 | def _succeeded_by_same_token(doc, start, end): 433 | if end == len(doc): 434 | return False 435 | token = doc[start] 436 | next_token = doc[end] 437 | return token.lower_ == next_token.lower_ 438 | 439 | return _succeeded_by_same_token 440 | 441 | 442 | def succeeded_by_punct() -> SpacyMatchPredicate: 443 | def _succeeded_by_punct(doc, start, end): 444 | if end == len(doc): 445 | return False 446 | next_token = doc[end] 447 | return next_token.is_punct 448 | 449 | return _succeeded_by_punct 450 | 451 | 452 | def succeeded_by_word() -> SpacyMatchPredicate: 453 | def _succeeded_by_word(doc, start, end): 454 | if end == len(doc): 455 | return False 456 | next_token = doc[end] 457 | return ( 458 | not next_token.is_punct 459 | and not next_token.is_digit 460 | and not next_token.is_space 461 | ) 462 | 463 | return _succeeded_by_word 464 | 465 | 466 | def is_start_of_sentence() -> SpacyMatchPredicate: 467 | return lambda doc, start, end: doc[start].is_sent_start 468 | 469 | 470 | def is_end_of_sentence() -> SpacyMatchPredicate: 471 | return lambda doc, start, end: end == len(doc) or doc[end].is_sent_end 472 | 473 | 474 | def sentence_ends_with(phrase) -> SpacyMatchPredicate: 475 | def _sentence_ends_with(doc, start, end): 476 | return doc[end:].text.lower().strip().endswith(phrase.lower()) 477 | 478 | return _sentence_ends_with 479 | 480 | 481 | # for compatibility with a previous version with spelling errors 482 | # point incorrectly spelled versions to correct versions 483 | # eventually deprecate these 484 | preceeded_by_phrase = preceded_by_phrase 485 | preceeded_by_pos = preceded_by_pos 486 | preceeded_by_dep = preceded_by_dep 487 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "atomicwrites" 3 | version = "1.4.0" 4 | description = "Atomic file writes." 5 | category = "dev" 6 | optional = false 7 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 8 | 9 | [[package]] 10 | name = "attrs" 11 | version = "21.2.0" 12 | description = "Classes Without Boilerplate" 13 | category = "dev" 14 | optional = false 15 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 16 | 17 | [package.extras] 18 | dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit"] 19 | docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] 20 | tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"] 21 | tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"] 22 | 23 | [[package]] 24 | name = "blis" 25 | version = "0.7.5" 26 | description = "The Blis BLAS-like linear algebra library, as a self-contained C-extension." 27 | category = "dev" 28 | optional = false 29 | python-versions = "*" 30 | 31 | [package.dependencies] 32 | numpy = ">=1.15.0" 33 | 34 | [[package]] 35 | name = "catalogue" 36 | version = "2.0.6" 37 | description = "Super lightweight function registries for your library" 38 | category = "dev" 39 | optional = false 40 | python-versions = ">=3.6" 41 | 42 | [package.dependencies] 43 | typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} 44 | zipp = {version = ">=0.5", markers = "python_version < \"3.8\""} 45 | 46 | [[package]] 47 | name = "certifi" 48 | version = "2021.10.8" 49 | description = "Python package for providing Mozilla's CA Bundle." 50 | category = "dev" 51 | optional = false 52 | python-versions = "*" 53 | 54 | [[package]] 55 | name = "charset-normalizer" 56 | version = "2.0.8" 57 | description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." 58 | category = "dev" 59 | optional = false 60 | python-versions = ">=3.5.0" 61 | 62 | [package.extras] 63 | unicode_backport = ["unicodedata2"] 64 | 65 | [[package]] 66 | name = "click" 67 | version = "7.1.2" 68 | description = "Composable command line interface toolkit" 69 | category = "dev" 70 | optional = false 71 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 72 | 73 | [[package]] 74 | name = "colorama" 75 | version = "0.4.4" 76 | description = "Cross-platform colored terminal text." 77 | category = "dev" 78 | optional = false 79 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 80 | 81 | [[package]] 82 | name = "contextvars" 83 | version = "2.4" 84 | description = "PEP 567 Backport" 85 | category = "dev" 86 | optional = false 87 | python-versions = "*" 88 | 89 | [package.dependencies] 90 | immutables = ">=0.9" 91 | 92 | [[package]] 93 | name = "cymem" 94 | version = "2.0.6" 95 | description = "Manage calls to calloc/free through Cython" 96 | category = "dev" 97 | optional = false 98 | python-versions = "*" 99 | 100 | [[package]] 101 | name = "dataclasses" 102 | version = "0.8" 103 | description = "A backport of the dataclasses module for Python 3.6" 104 | category = "dev" 105 | optional = false 106 | python-versions = ">=3.6, <3.7" 107 | 108 | [[package]] 109 | name = "dill" 110 | version = "0.2.7.1" 111 | description = "serialize all of python" 112 | category = "main" 113 | optional = false 114 | python-versions = "*" 115 | 116 | [[package]] 117 | name = "en-core-web-sm" 118 | version = "3.0.0" 119 | description = "English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer." 120 | category = "dev" 121 | optional = false 122 | python-versions = "*" 123 | 124 | [package.dependencies] 125 | spacy = ">=3.0.0,<3.1.0" 126 | 127 | [package.source] 128 | type = "url" 129 | url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz" 130 | [[package]] 131 | name = "future" 132 | version = "0.18.2" 133 | description = "Clean single-source support for Python 3 and 2" 134 | category = "main" 135 | optional = false 136 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" 137 | 138 | [[package]] 139 | name = "idna" 140 | version = "3.3" 141 | description = "Internationalized Domain Names in Applications (IDNA)" 142 | category = "dev" 143 | optional = false 144 | python-versions = ">=3.5" 145 | 146 | [[package]] 147 | name = "immutables" 148 | version = "0.16" 149 | description = "Immutable Collections" 150 | category = "dev" 151 | optional = false 152 | python-versions = ">=3.6" 153 | 154 | [package.dependencies] 155 | typing-extensions = {version = ">=3.7.4.3", markers = "python_version < \"3.8\""} 156 | 157 | [package.extras] 158 | test = ["flake8 (>=3.8.4,<3.9.0)", "pycodestyle (>=2.6.0,<2.7.0)", "mypy (>=0.910)", "pytest (>=6.2.4,<6.3.0)"] 159 | 160 | [[package]] 161 | name = "importlib-metadata" 162 | version = "4.8.2" 163 | description = "Read metadata from Python packages" 164 | category = "dev" 165 | optional = false 166 | python-versions = ">=3.6" 167 | 168 | [package.dependencies] 169 | typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} 170 | zipp = ">=0.5" 171 | 172 | [package.extras] 173 | docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] 174 | perf = ["ipython"] 175 | testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pep517", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"] 176 | 177 | [[package]] 178 | name = "jinja2" 179 | version = "3.0.3" 180 | description = "A very fast and expressive template engine." 181 | category = "dev" 182 | optional = false 183 | python-versions = ">=3.6" 184 | 185 | [package.dependencies] 186 | MarkupSafe = ">=2.0" 187 | 188 | [package.extras] 189 | i18n = ["Babel (>=2.7)"] 190 | 191 | [[package]] 192 | name = "jsonschema" 193 | version = "2.6.0" 194 | description = "An implementation of JSON Schema validation for Python" 195 | category = "main" 196 | optional = false 197 | python-versions = "*" 198 | 199 | [package.extras] 200 | format = ["rfc3987", "strict-rfc3339", "webcolors"] 201 | 202 | [[package]] 203 | name = "kenlm" 204 | version = "0.0.0" 205 | description = "" 206 | category = "dev" 207 | optional = false 208 | python-versions = "*" 209 | develop = false 210 | 211 | [package.source] 212 | type = "git" 213 | url = "https://github.com/kpu/kenlm" 214 | reference = "master" 215 | resolved_reference = "f01e12d83c7fd03ebe6656e0ad6d73a3e022bd50" 216 | 217 | [[package]] 218 | name = "lemminflect" 219 | version = "0.2.1" 220 | description = "A python module for English lemmatization and inflection." 221 | category = "main" 222 | optional = false 223 | python-versions = "*" 224 | 225 | [package.dependencies] 226 | numpy = "*" 227 | 228 | [[package]] 229 | name = "markupsafe" 230 | version = "2.0.1" 231 | description = "Safely add untrusted strings to HTML/XML markup." 232 | category = "dev" 233 | optional = false 234 | python-versions = ">=3.6" 235 | 236 | [[package]] 237 | name = "more-itertools" 238 | version = "8.12.0" 239 | description = "More routines for operating on iterables, beyond itertools" 240 | category = "dev" 241 | optional = false 242 | python-versions = ">=3.5" 243 | 244 | [[package]] 245 | name = "murmurhash" 246 | version = "1.0.6" 247 | description = "Cython bindings for MurmurHash" 248 | category = "dev" 249 | optional = false 250 | python-versions = "*" 251 | 252 | [[package]] 253 | name = "numpy" 254 | version = "1.19.5" 255 | description = "NumPy is the fundamental package for array computing with Python." 256 | category = "main" 257 | optional = false 258 | python-versions = ">=3.6" 259 | 260 | [[package]] 261 | name = "packaging" 262 | version = "21.3" 263 | description = "Core utilities for Python packages" 264 | category = "dev" 265 | optional = false 266 | python-versions = ">=3.6" 267 | 268 | [package.dependencies] 269 | pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" 270 | 271 | [[package]] 272 | name = "pathy" 273 | version = "0.6.1" 274 | description = "pathlib.Path subclasses for local and cloud bucket storage" 275 | category = "dev" 276 | optional = false 277 | python-versions = ">= 3.6" 278 | 279 | [package.dependencies] 280 | dataclasses = {version = ">=0.6,<1.0", markers = "python_version < \"3.7\""} 281 | smart-open = ">=5.0.0,<6.0.0" 282 | typer = ">=0.3.0,<1.0.0" 283 | 284 | [package.extras] 285 | all = ["google-cloud-storage (>=1.26.0,<2.0.0)", "boto3", "pytest", "pytest-coverage", "mock", "typer-cli"] 286 | gcs = ["google-cloud-storage (>=1.26.0,<2.0.0)"] 287 | s3 = ["boto3"] 288 | test = ["pytest", "pytest-coverage", "mock", "typer-cli"] 289 | 290 | [[package]] 291 | name = "pluggy" 292 | version = "0.13.1" 293 | description = "plugin and hook calling mechanisms for python" 294 | category = "dev" 295 | optional = false 296 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 297 | 298 | [package.dependencies] 299 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} 300 | 301 | [package.extras] 302 | dev = ["pre-commit", "tox"] 303 | 304 | [[package]] 305 | name = "preshed" 306 | version = "3.0.6" 307 | description = "Cython hash table that trusts the keys are pre-hashed" 308 | category = "dev" 309 | optional = false 310 | python-versions = "*" 311 | 312 | [package.dependencies] 313 | cymem = ">=2.0.2,<2.1.0" 314 | murmurhash = ">=0.28.0,<1.1.0" 315 | 316 | [[package]] 317 | name = "py" 318 | version = "1.11.0" 319 | description = "library with cross-python path, ini-parsing, io, code, log facilities" 320 | category = "dev" 321 | optional = false 322 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 323 | 324 | [[package]] 325 | name = "pydantic" 326 | version = "1.7.4" 327 | description = "Data validation and settings management using python 3.6 type hinting" 328 | category = "dev" 329 | optional = false 330 | python-versions = ">=3.6" 331 | 332 | [package.dependencies] 333 | dataclasses = {version = ">=0.6", markers = "python_version < \"3.7\""} 334 | 335 | [package.extras] 336 | dotenv = ["python-dotenv (>=0.10.4)"] 337 | email = ["email-validator (>=1.0.3)"] 338 | typing_extensions = ["typing-extensions (>=3.7.2)"] 339 | 340 | [[package]] 341 | name = "pyfunctional" 342 | version = "1.3.0" 343 | description = "Package for creating data pipelines with chain functional programming" 344 | category = "main" 345 | optional = false 346 | python-versions = "*" 347 | 348 | [package.dependencies] 349 | dill = ">=0.2.6,<=0.2.7.1" 350 | future = "<=1.0.0" 351 | six = "<=2.0.0" 352 | tabulate = "<=1.0.0" 353 | 354 | [[package]] 355 | name = "pyparsing" 356 | version = "3.0.6" 357 | description = "Python parsing module" 358 | category = "dev" 359 | optional = false 360 | python-versions = ">=3.6" 361 | 362 | [package.extras] 363 | diagrams = ["jinja2", "railroad-diagrams"] 364 | 365 | [[package]] 366 | name = "pytest" 367 | version = "5.4.3" 368 | description = "pytest: simple powerful testing with Python" 369 | category = "dev" 370 | optional = false 371 | python-versions = ">=3.5" 372 | 373 | [package.dependencies] 374 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} 375 | attrs = ">=17.4.0" 376 | colorama = {version = "*", markers = "sys_platform == \"win32\""} 377 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} 378 | more-itertools = ">=4.0.0" 379 | packaging = "*" 380 | pluggy = ">=0.12,<1.0" 381 | py = ">=1.5.0" 382 | wcwidth = "*" 383 | 384 | [package.extras] 385 | checkqa-mypy = ["mypy (==v0.761)"] 386 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] 387 | 388 | [[package]] 389 | name = "requests" 390 | version = "2.26.0" 391 | description = "Python HTTP for Humans." 392 | category = "dev" 393 | optional = false 394 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" 395 | 396 | [package.dependencies] 397 | certifi = ">=2017.4.17" 398 | charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""} 399 | idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""} 400 | urllib3 = ">=1.21.1,<1.27" 401 | 402 | [package.extras] 403 | socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] 404 | use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] 405 | 406 | [[package]] 407 | name = "six" 408 | version = "1.16.0" 409 | description = "Python 2 and 3 compatibility utilities" 410 | category = "main" 411 | optional = false 412 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 413 | 414 | [[package]] 415 | name = "smart-open" 416 | version = "5.2.1" 417 | description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" 418 | category = "dev" 419 | optional = false 420 | python-versions = ">=3.6,<4.0" 421 | 422 | [package.extras] 423 | all = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests"] 424 | azure = ["azure-storage-blob", "azure-common", "azure-core"] 425 | gcs = ["google-cloud-storage"] 426 | http = ["requests"] 427 | s3 = ["boto3"] 428 | test = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests", "moto[server] (==1.3.14)", "pathlib2", "responses", "paramiko", "parameterizedtestcase", "pytest", "pytest-rerunfailures"] 429 | webhdfs = ["requests"] 430 | 431 | [[package]] 432 | name = "spacy" 433 | version = "3.0.7" 434 | description = "Industrial-strength Natural Language Processing (NLP) in Python" 435 | category = "dev" 436 | optional = false 437 | python-versions = ">=3.6" 438 | 439 | [package.dependencies] 440 | blis = ">=0.4.0,<0.8.0" 441 | catalogue = ">=2.0.4,<2.1.0" 442 | cymem = ">=2.0.2,<2.1.0" 443 | jinja2 = "*" 444 | murmurhash = ">=0.28.0,<1.1.0" 445 | numpy = ">=1.15.0" 446 | packaging = ">=20.0" 447 | pathy = ">=0.3.5" 448 | preshed = ">=3.0.2,<3.1.0" 449 | pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.9.0" 450 | requests = ">=2.13.0,<3.0.0" 451 | spacy-legacy = ">=3.0.5,<3.1.0" 452 | srsly = ">=2.4.1,<3.0.0" 453 | thinc = ">=8.0.3,<8.1.0" 454 | tqdm = ">=4.38.0,<5.0.0" 455 | typer = ">=0.3.0,<0.4.0" 456 | typing-extensions = {version = ">=3.7.4,<4.0.0.0", markers = "python_version < \"3.8\""} 457 | wasabi = ">=0.8.1,<1.1.0" 458 | 459 | [package.extras] 460 | cuda = ["cupy (>=5.0.0b4,<10.0.0)"] 461 | cuda100 = ["cupy-cuda100 (>=5.0.0b4,<10.0.0)"] 462 | cuda101 = ["cupy-cuda101 (>=5.0.0b4,<10.0.0)"] 463 | cuda102 = ["cupy-cuda102 (>=5.0.0b4,<10.0.0)"] 464 | cuda110 = ["cupy-cuda110 (>=5.0.0b4,<10.0.0)"] 465 | cuda111 = ["cupy-cuda111 (>=5.0.0b4,<10.0.0)"] 466 | cuda112 = ["cupy-cuda112 (>=5.0.0b4,<10.0.0)"] 467 | cuda80 = ["cupy-cuda80 (>=5.0.0b4,<10.0.0)"] 468 | cuda90 = ["cupy-cuda90 (>=5.0.0b4,<10.0.0)"] 469 | cuda91 = ["cupy-cuda91 (>=5.0.0b4,<10.0.0)"] 470 | cuda92 = ["cupy-cuda92 (>=5.0.0b4,<10.0.0)"] 471 | ja = ["sudachipy (>=0.4.9)", "sudachidict-core (>=20200330)"] 472 | ko = ["natto-py (==0.9.0)"] 473 | lookups = ["spacy-lookups-data (>=1.0.0,<1.1.0)"] 474 | ray = ["spacy-ray (>=0.1.0,<1.0.0)"] 475 | th = ["pythainlp (>=2.0)"] 476 | transformers = ["spacy-transformers (>=1.0.1,<1.1.0)"] 477 | 478 | [[package]] 479 | name = "spacy-legacy" 480 | version = "3.0.8" 481 | description = "Legacy registered functions for spaCy backwards compatibility" 482 | category = "dev" 483 | optional = false 484 | python-versions = ">=3.6" 485 | 486 | [[package]] 487 | name = "srsly" 488 | version = "2.4.2" 489 | description = "Modern high-performance serialization utilities for Python" 490 | category = "dev" 491 | optional = false 492 | python-versions = ">=3.6" 493 | 494 | [package.dependencies] 495 | catalogue = ">=2.0.3,<2.1.0" 496 | 497 | [[package]] 498 | name = "tabulate" 499 | version = "0.8.9" 500 | description = "Pretty-print tabular data" 501 | category = "main" 502 | optional = false 503 | python-versions = "*" 504 | 505 | [package.extras] 506 | widechars = ["wcwidth"] 507 | 508 | [[package]] 509 | name = "thinc" 510 | version = "8.0.13" 511 | description = "A refreshing functional take on deep learning, compatible with your favorite libraries" 512 | category = "dev" 513 | optional = false 514 | python-versions = ">=3.6" 515 | 516 | [package.dependencies] 517 | blis = ">=0.4.0,<0.8.0" 518 | catalogue = ">=2.0.4,<2.1.0" 519 | contextvars = {version = ">=2.4,<3", markers = "python_version < \"3.7\""} 520 | cymem = ">=2.0.2,<2.1.0" 521 | dataclasses = {version = ">=0.6,<1.0", markers = "python_version < \"3.7\""} 522 | murmurhash = ">=0.28.0,<1.1.0" 523 | numpy = ">=1.15.0" 524 | preshed = ">=3.0.2,<3.1.0" 525 | pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.9.0" 526 | srsly = ">=2.4.0,<3.0.0" 527 | typing-extensions = {version = ">=3.7.4.1,<4.0.0.0", markers = "python_version < \"3.8\""} 528 | wasabi = ">=0.8.1,<1.1.0" 529 | 530 | [package.extras] 531 | cuda = ["cupy (>=5.0.0b4)"] 532 | cuda100 = ["cupy-cuda100 (>=5.0.0b4)"] 533 | cuda101 = ["cupy-cuda101 (>=5.0.0b4)"] 534 | cuda102 = ["cupy-cuda102 (>=5.0.0b4)"] 535 | cuda110 = ["cupy-cuda110 (>=5.0.0b4)"] 536 | cuda111 = ["cupy-cuda111 (>=5.0.0b4)"] 537 | cuda112 = ["cupy-cuda112 (>=5.0.0b4)"] 538 | cuda113 = ["cupy-cuda113 (>=5.0.0b4)"] 539 | cuda114 = ["cupy-cuda114 (>=5.0.0b4)"] 540 | cuda80 = ["cupy-cuda80 (>=5.0.0b4)"] 541 | cuda90 = ["cupy-cuda90 (>=5.0.0b4)"] 542 | cuda91 = ["cupy-cuda91 (>=5.0.0b4)"] 543 | cuda92 = ["cupy-cuda92 (>=5.0.0b4)"] 544 | datasets = ["ml-datasets (>=0.2.0,<0.3.0)"] 545 | mxnet = ["mxnet (>=1.5.1,<1.6.0)"] 546 | tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"] 547 | torch = ["torch (>=1.5.0)"] 548 | 549 | [[package]] 550 | name = "tqdm" 551 | version = "4.62.3" 552 | description = "Fast, Extensible Progress Meter" 553 | category = "dev" 554 | optional = false 555 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" 556 | 557 | [package.dependencies] 558 | colorama = {version = "*", markers = "platform_system == \"Windows\""} 559 | 560 | [package.extras] 561 | dev = ["py-make (>=0.1.0)", "twine", "wheel"] 562 | notebook = ["ipywidgets (>=6)"] 563 | telegram = ["requests"] 564 | 565 | [[package]] 566 | name = "typer" 567 | version = "0.3.2" 568 | description = "Typer, build great CLIs. Easy to code. Based on Python type hints." 569 | category = "dev" 570 | optional = false 571 | python-versions = ">=3.6" 572 | 573 | [package.dependencies] 574 | click = ">=7.1.1,<7.2.0" 575 | 576 | [package.extras] 577 | test = ["pytest-xdist (>=1.32.0,<2.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "mypy (==0.782)", "black (>=19.10b0,<20.0b0)", "isort (>=5.0.6,<6.0.0)", "shellingham (>=1.3.0,<2.0.0)", "pytest (>=4.4.0,<5.4.0)", "pytest-cov (>=2.10.0,<3.0.0)", "coverage (>=5.2,<6.0)"] 578 | all = ["colorama (>=0.4.3,<0.5.0)", "shellingham (>=1.3.0,<2.0.0)"] 579 | dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)"] 580 | doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=5.4.0,<6.0.0)", "markdown-include (>=0.5.1,<0.6.0)"] 581 | 582 | [[package]] 583 | name = "typing-extensions" 584 | version = "3.10.0.2" 585 | description = "Backported and Experimental Type Hints for Python 3.5+" 586 | category = "dev" 587 | optional = false 588 | python-versions = "*" 589 | 590 | [[package]] 591 | name = "urllib3" 592 | version = "1.26.7" 593 | description = "HTTP library with thread-safe connection pooling, file post, and more." 594 | category = "dev" 595 | optional = false 596 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" 597 | 598 | [package.extras] 599 | brotli = ["brotlipy (>=0.6.0)"] 600 | secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "ipaddress"] 601 | socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] 602 | 603 | [[package]] 604 | name = "wasabi" 605 | version = "0.8.2" 606 | description = "A lightweight console printing and formatting toolkit" 607 | category = "dev" 608 | optional = false 609 | python-versions = "*" 610 | 611 | [[package]] 612 | name = "wcwidth" 613 | version = "0.2.5" 614 | description = "Measures the displayed width of unicode strings in a terminal" 615 | category = "dev" 616 | optional = false 617 | python-versions = "*" 618 | 619 | [[package]] 620 | name = "zipp" 621 | version = "3.6.0" 622 | description = "Backport of pathlib-compatible object wrapper for zip files" 623 | category = "dev" 624 | optional = false 625 | python-versions = ">=3.6" 626 | 627 | [package.extras] 628 | docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] 629 | testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"] 630 | 631 | [metadata] 632 | lock-version = "1.1" 633 | python-versions = "^3.6" 634 | content-hash = "066671bb2c96e224b9da938c40dd81e89847a824ce08e82991b295f82528e12e" 635 | 636 | [metadata.files] 637 | atomicwrites = [ 638 | {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, 639 | {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, 640 | ] 641 | attrs = [ 642 | {file = "attrs-21.2.0-py2.py3-none-any.whl", hash = "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1"}, 643 | {file = "attrs-21.2.0.tar.gz", hash = "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"}, 644 | ] 645 | blis = [ 646 | {file = "blis-0.7.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5812a7c04561ae7332cf730f57d9f82cbd12c5f86a5bfad66ee244e51d06266d"}, 647 | {file = "blis-0.7.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eecfce3d8fce61dede7b0ae0dffa461c22072437b6cde85587db0c1aa75b450"}, 648 | {file = "blis-0.7.5-cp310-cp310-win_amd64.whl", hash = "sha256:0e476931f0d5703a21c77e7f69b8ebdeeea493fc7858a86f627ac2b376a12c8d"}, 649 | {file = "blis-0.7.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:5966ddf3bce84aa7bb09ce4ca059309602fa63280a5d5e5365bb2a294bd5a138"}, 650 | {file = "blis-0.7.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9034dabce4e42e3a1a7b99cc6de430484c8c369e51556ee8d47a53c085de681"}, 651 | {file = "blis-0.7.5-cp36-cp36m-win_amd64.whl", hash = "sha256:730952f74adb0fa7dde9f1bc11249d5a64f3a3a9cf7dfa23b189a4b767bdf2d0"}, 652 | {file = "blis-0.7.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2339cb19594134775bda8b86f23a893828fc7e8d63f09ba9a15f30b2b16c966c"}, 653 | {file = "blis-0.7.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5023781272e0b2868be2f92017aa6836557990f1ca5ba2af5e9f5a0acf04fd8a"}, 654 | {file = "blis-0.7.5-cp37-cp37m-win_amd64.whl", hash = "sha256:65ba723821cc57eb4227eb8dd05c57fff23d97f826d4325b316cd8a63aac8d6a"}, 655 | {file = "blis-0.7.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad4af690c37a5953d3aea660ad89b636bfbb80ca1470995554670ca2143f0cb2"}, 656 | {file = "blis-0.7.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf11c233ea5c2d30683e7c9641c5dc4cd76ed0f64755ba3321dfb8db39feb316"}, 657 | {file = "blis-0.7.5-cp38-cp38-win_amd64.whl", hash = "sha256:31401da283ed42905f0fbf2f8b88ea424c6a911482426f84b5b88c54d382e4d1"}, 658 | {file = "blis-0.7.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0c185979f8f528d634f5548b8cd84ab0366d340c27c039ad3937fab186c1c252"}, 659 | {file = "blis-0.7.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8345bd04777557ef385e2f2d1f14a19d53b2ea9ca5fe107a2cdc50d7bafb8eb2"}, 660 | {file = "blis-0.7.5-cp39-cp39-win_amd64.whl", hash = "sha256:66204a19e38986645940c887498c7b5520efb5bbc6526bf1b8a58f7d3eb37da0"}, 661 | {file = "blis-0.7.5.tar.gz", hash = "sha256:833e01e9eaff4c01aa6e049bbc1e6acb9eca6ee513d7b35b5bf135d49705ad33"}, 662 | ] 663 | catalogue = [ 664 | {file = "catalogue-2.0.6-py3-none-any.whl", hash = "sha256:34ebb5cd2b98f7fa7421fa0eead3b84e577243532509b3fa8cd04abcc9f61d3c"}, 665 | {file = "catalogue-2.0.6.tar.gz", hash = "sha256:336a35603f447167042ef504114d6befa46688f03f4c14dabdc633a44587b245"}, 666 | ] 667 | certifi = [ 668 | {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, 669 | {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"}, 670 | ] 671 | charset-normalizer = [ 672 | {file = "charset-normalizer-2.0.8.tar.gz", hash = "sha256:735e240d9a8506778cd7a453d97e817e536bb1fc29f4f6961ce297b9c7a917b0"}, 673 | {file = "charset_normalizer-2.0.8-py3-none-any.whl", hash = "sha256:83fcdeb225499d6344c8f7f34684c2981270beacc32ede2e669e94f7fa544405"}, 674 | ] 675 | click = [ 676 | {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, 677 | {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"}, 678 | ] 679 | colorama = [ 680 | {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, 681 | {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, 682 | ] 683 | contextvars = [ 684 | {file = "contextvars-2.4.tar.gz", hash = "sha256:f38c908aaa59c14335eeea12abea5f443646216c4e29380d7bf34d2018e2c39e"}, 685 | ] 686 | cymem = [ 687 | {file = "cymem-2.0.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2b4e27e739f09f16c7c0190f962ffe60dab39cb6a229d5c13e274d16f46a17e8"}, 688 | {file = "cymem-2.0.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:971cf0a8437dfb4185c3049c086e463612fe849efadc0f5cc153fc81c501da7d"}, 689 | {file = "cymem-2.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:6b0d1a6b0a1296f31fa9e4b7ae5ea49394084ecc883b1ae6fec4844403c43468"}, 690 | {file = "cymem-2.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b8e1c18bb00800425576710468299153caad20c64ddb6819d40a6a34e21ee21c"}, 691 | {file = "cymem-2.0.6-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:492084aef23ac2ff3da3729e9d36340bc91a96c2dc8c3a82a1926e384ab52412"}, 692 | {file = "cymem-2.0.6-cp36-cp36m-win_amd64.whl", hash = "sha256:af3c01e6b20f9e6c07c7d7cdb7f710e49889d3906c9a3e039546ee6636a34b9a"}, 693 | {file = "cymem-2.0.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d7a59cef8f2fa25d12e2c30138f8623acbd43ad2715e730a709e49c5eef8e1b0"}, 694 | {file = "cymem-2.0.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd52d8a81881804625df88453611175ab7e0099b34f52204da1f6940cf2e83c9"}, 695 | {file = "cymem-2.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:4749f220e4c06ec44eb10de13794ff0508cdc4f8eff656cf49cab2cdb3122c0c"}, 696 | {file = "cymem-2.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2aa3fa467d906cd2c27fa0a2e2952dd7925f5fcc7973fab6d815ef6acb25aad8"}, 697 | {file = "cymem-2.0.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea535f74ab6024e7416f93de564e5c81fb7c0964b96280de66f60aeb05f0cf53"}, 698 | {file = "cymem-2.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:4f87fe087f2ae36c3e20e2b1a29d7f76a28c035372d0a97655f26223d975235a"}, 699 | {file = "cymem-2.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a93fba62fe79dbf6fc4d5b6d804a6e114b44af3ff3d40a28833ee39f21bd336b"}, 700 | {file = "cymem-2.0.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04676d696596b0db3f3c5a3936bab12fb6f24278921a6622bb185e61765b2b4d"}, 701 | {file = "cymem-2.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:c59293b232b53ebb47427f16cf648e937022f489cff36c11d1d8a1f0075b6609"}, 702 | {file = "cymem-2.0.6.tar.gz", hash = "sha256:169725b5816959d34de2545b33fee6a8021a6e08818794a426c5a4f981f17e5e"}, 703 | ] 704 | dataclasses = [ 705 | {file = "dataclasses-0.8-py3-none-any.whl", hash = "sha256:0201d89fa866f68c8ebd9d08ee6ff50c0b255f8ec63a71c16fda7af82bb887bf"}, 706 | {file = "dataclasses-0.8.tar.gz", hash = "sha256:8479067f342acf957dc82ec415d355ab5edb7e7646b90dc6e2fd1d96ad084c97"}, 707 | ] 708 | dill = [ 709 | {file = "dill-0.2.7.1.tar.gz", hash = "sha256:97fd758f5fe742d42b11ec8318ecfcff8776bccacbfcec05dfd6276f5d450f73"}, 710 | ] 711 | en-core-web-sm = [] 712 | future = [ 713 | {file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"}, 714 | ] 715 | idna = [ 716 | {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, 717 | {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, 718 | ] 719 | immutables = [ 720 | {file = "immutables-0.16-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:acbfa79d44228d96296279068441f980dc63dbed52522d9227ff9f4d96c6627e"}, 721 | {file = "immutables-0.16-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29c9ed003eacb92e630ef200e31f47236c2139b39476894f7963b32bd39bafa3"}, 722 | {file = "immutables-0.16-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a396314b9024fa55bf83a27813fd76cf9f27dce51f53b0f19b51de035146251"}, 723 | {file = "immutables-0.16-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:4a2a71678348fb95b13ca108d447f559a754c41b47bd1e7e4fb23974e735682d"}, 724 | {file = "immutables-0.16-cp36-cp36m-win32.whl", hash = "sha256:064001638ab5d36f6aa05b6101446f4a5793fb71e522bc81b8fc65a1894266ff"}, 725 | {file = "immutables-0.16-cp36-cp36m-win_amd64.whl", hash = "sha256:1de393f1b188740ca7b38f946f2bbc7edf3910d2048f03bbb8d01f17a038d67c"}, 726 | {file = "immutables-0.16-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:fcf678a3074613119385a02a07c469ec5130559f5ea843c85a0840c80b5b71c6"}, 727 | {file = "immutables-0.16-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a307eb0984eb43e815dcacea3ac50c11d00a936ecf694c46991cd5a23bcb0ec0"}, 728 | {file = "immutables-0.16-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7a58825ff2254e2612c5a932174398a4ea8fbddd8a64a02c880cc32ee28b8820"}, 729 | {file = "immutables-0.16-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:798b095381eb42cf40db6876339e7bed84093e5868018a9e73d8e1f7ab4bb21e"}, 730 | {file = "immutables-0.16-cp37-cp37m-win32.whl", hash = "sha256:19bdede174847c2ef1292df0f23868ab3918b560febb09fcac6eec621bd4812b"}, 731 | {file = "immutables-0.16-cp37-cp37m-win_amd64.whl", hash = "sha256:9ccf4c0e3e2e3237012b516c74c49de8872ccdf9129739f7a0b9d7444a8c4862"}, 732 | {file = "immutables-0.16-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d59beef203a3765db72b1d0943547425c8318ecf7d64c451fd1e130b653c2fbb"}, 733 | {file = "immutables-0.16-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0020aaa4010b136056c20a46ce53204e1407a9e4464246cb2cf95b90808d9161"}, 734 | {file = "immutables-0.16-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edd9f67671555af1eb99ad3c7550238487dd7ac0ac5205b40204ed61c9a922ac"}, 735 | {file = "immutables-0.16-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:298a301f85f307b4c056a0825eb30f060e64d73605e783289f3df37dd762bab8"}, 736 | {file = "immutables-0.16-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b779617f5b94486bfd0f22162cd72eb5f2beb0214a14b75fdafb7b2c908ed0cb"}, 737 | {file = "immutables-0.16-cp38-cp38-win32.whl", hash = "sha256:511c93d8b1bbbf103ff3f1f120c5a68a9866ce03dea6ac406537f93ca9b19139"}, 738 | {file = "immutables-0.16-cp38-cp38-win_amd64.whl", hash = "sha256:b651b61c1af6cda2ee201450f2ffe048a5959bc88e43e6c312f4c93e69c9e929"}, 739 | {file = "immutables-0.16-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:aa7bf572ae1e006104c584be70dc634849cf0dc62f42f4ee194774f97e7fd17d"}, 740 | {file = "immutables-0.16-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:50793a44ba0d228ed8cad4d0925e00dfd62ea32f44ddee8854f8066447272d05"}, 741 | {file = "immutables-0.16-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:799621dcdcdcbb2516546a40123b87bf88de75fe7459f7bd8144f079ace6ec3e"}, 742 | {file = "immutables-0.16-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7bcf52aeb983bd803b7c6106eae1b2d9a0c7ab1241bc6b45e2174ba2b7283031"}, 743 | {file = "immutables-0.16-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:734c269e82e5f307fb6e17945953b67659d1731e65309787b8f7ba267d1468f2"}, 744 | {file = "immutables-0.16-cp39-cp39-win32.whl", hash = "sha256:a454d5d3fee4b7cc627345791eb2ca4b27fa3bbb062ccf362ecaaa51679a07ed"}, 745 | {file = "immutables-0.16-cp39-cp39-win_amd64.whl", hash = "sha256:2505d93395d3f8ae4223e21465994c3bc6952015a38dc4f03cb3e07a2b8d8325"}, 746 | {file = "immutables-0.16.tar.gz", hash = "sha256:d67e86859598eed0d926562da33325dac7767b7b1eff84e232c22abea19f4360"}, 747 | ] 748 | importlib-metadata = [ 749 | {file = "importlib_metadata-4.8.2-py3-none-any.whl", hash = "sha256:53ccfd5c134223e497627b9815d5030edf77d2ed573922f7a0b8f8bb81a1c100"}, 750 | {file = "importlib_metadata-4.8.2.tar.gz", hash = "sha256:75bdec14c397f528724c1bfd9709d660b33a4d2e77387a3358f20b848bb5e5fb"}, 751 | ] 752 | jinja2 = [ 753 | {file = "Jinja2-3.0.3-py3-none-any.whl", hash = "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8"}, 754 | {file = "Jinja2-3.0.3.tar.gz", hash = "sha256:611bb273cd68f3b993fabdc4064fc858c5b47a973cb5aa7999ec1ba405c87cd7"}, 755 | ] 756 | jsonschema = [ 757 | {file = "jsonschema-2.6.0-py2.py3-none-any.whl", hash = "sha256:000e68abd33c972a5248544925a0cae7d1125f9bf6c58280d37546b946769a08"}, 758 | {file = "jsonschema-2.6.0.tar.gz", hash = "sha256:6ff5f3180870836cae40f06fa10419f557208175f13ad7bc26caa77beb1f6e02"}, 759 | ] 760 | kenlm = [] 761 | lemminflect = [ 762 | {file = "lemminflect-0.2.1-py3-none-any.whl", hash = "sha256:96dc0cf32aa1973a00deb369a413d032cf005ac9872a249283264d70b85a1da5"}, 763 | {file = "lemminflect-0.2.1.tar.gz", hash = "sha256:46f439d8e8237efb429173c9f83d00038e9a4db3c668b436034c9ca783c35a53"}, 764 | ] 765 | markupsafe = [ 766 | {file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"}, 767 | {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff"}, 768 | {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b"}, 769 | {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94"}, 770 | {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"}, 771 | {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f"}, 772 | {file = "MarkupSafe-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d"}, 773 | {file = "MarkupSafe-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9"}, 774 | {file = "MarkupSafe-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567"}, 775 | {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6557b31b5e2c9ddf0de32a691f2312a32f77cd7681d8af66c2692efdbef84c18"}, 776 | {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:49e3ceeabbfb9d66c3aef5af3a60cc43b85c33df25ce03d0031a608b0a8b2e3f"}, 777 | {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f"}, 778 | {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2"}, 779 | {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d"}, 780 | {file = "MarkupSafe-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415"}, 781 | {file = "MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914"}, 782 | {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066"}, 783 | {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35"}, 784 | {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b"}, 785 | {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298"}, 786 | {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75"}, 787 | {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb"}, 788 | {file = "MarkupSafe-2.0.1-cp38-cp38-win32.whl", hash = "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64"}, 789 | {file = "MarkupSafe-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833"}, 790 | {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26"}, 791 | {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3c112550557578c26af18a1ccc9e090bfe03832ae994343cfdacd287db6a6ae7"}, 792 | {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux1_i686.whl", hash = "sha256:53edb4da6925ad13c07b6d26c2a852bd81e364f95301c66e930ab2aef5b5ddd8"}, 793 | {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:f5653a225f31e113b152e56f154ccbe59eeb1c7487b39b9d9f9cdb58e6c79dc5"}, 794 | {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135"}, 795 | {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902"}, 796 | {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509"}, 797 | {file = "MarkupSafe-2.0.1-cp39-cp39-win32.whl", hash = "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74"}, 798 | {file = "MarkupSafe-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8"}, 799 | {file = "MarkupSafe-2.0.1.tar.gz", hash = "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a"}, 800 | ] 801 | more-itertools = [ 802 | {file = "more-itertools-8.12.0.tar.gz", hash = "sha256:7dc6ad46f05f545f900dd59e8dfb4e84a4827b97b3cfecb175ea0c7d247f6064"}, 803 | {file = "more_itertools-8.12.0-py3-none-any.whl", hash = "sha256:43e6dd9942dffd72661a2c4ef383ad7da1e6a3e968a927ad7a6083ab410a688b"}, 804 | ] 805 | murmurhash = [ 806 | {file = "murmurhash-1.0.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a814d559afe2a97ad40accf21ce96e8b04a3ff5a08f80c02b7acd427dbb7d567"}, 807 | {file = "murmurhash-1.0.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c7b8cc4a8db1c821b80f8ca70a25c3166b14d68ecef8693a117c6a0b1d74ace"}, 808 | {file = "murmurhash-1.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:e40790fdaf65213d70da4ed9229f16f6d6376310dc8fc23eacc98e6151c6ae7e"}, 809 | {file = "murmurhash-1.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a78d53f047c3410ce4c589d9b47090f628f844ed5694418144e63cfe7f3da7e9"}, 810 | {file = "murmurhash-1.0.6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d69cc0ffc0ef6d37399b8a0484a44f9877e531ebc164e55105e89738ed52089"}, 811 | {file = "murmurhash-1.0.6-cp36-cp36m-win_amd64.whl", hash = "sha256:8de08d145c85bb7ba89cb1b591742e3ef54cede73e35f62752af687a4a1859f7"}, 812 | {file = "murmurhash-1.0.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7dc5a79346afa07f14384926c335c0c455226d687d1305b9378264875b450e51"}, 813 | {file = "murmurhash-1.0.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab326b172dc470331490bda516d4d6d7578c91445ad83a2a3418ac1b9c5f9f55"}, 814 | {file = "murmurhash-1.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:2911bc3e8040dfaac536b141539b0351915f1439953f0aa9e957f082cff035a6"}, 815 | {file = "murmurhash-1.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:de267459d040c96727ba141075d5bc983ec69c6f75b6df1b703e3b5cd7090382"}, 816 | {file = "murmurhash-1.0.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90a8e06872015d6f9f66a42669e003a1df8be229defef69cd98546f4cb25546d"}, 817 | {file = "murmurhash-1.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:773411eba268bf524c012e781f4405aacb9ef4edc063d1f6b38bbf06358b988e"}, 818 | {file = "murmurhash-1.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f4ef3b26229ff192032a12653d637313e1231d23e788b83a2f4a3d8e2bf2d031"}, 819 | {file = "murmurhash-1.0.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92cd7196974307143ce8e9e9b6e22e0a57abf30bdd5a1effe696b4825677e616"}, 820 | {file = "murmurhash-1.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:cdd1036688341413e5adef32b3fd58e8b44f24405f394f90129f39ed879e4f24"}, 821 | {file = "murmurhash-1.0.6.tar.gz", hash = "sha256:00a5252b569d3f914b5bd0bce72d2efe9c0fb91a9703556ea1b608b141c68f2d"}, 822 | ] 823 | numpy = [ 824 | {file = "numpy-1.19.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cc6bd4fd593cb261332568485e20a0712883cf631f6f5e8e86a52caa8b2b50ff"}, 825 | {file = "numpy-1.19.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:aeb9ed923be74e659984e321f609b9ba54a48354bfd168d21a2b072ed1e833ea"}, 826 | {file = "numpy-1.19.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:8b5e972b43c8fc27d56550b4120fe6257fdc15f9301914380b27f74856299fea"}, 827 | {file = "numpy-1.19.5-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:43d4c81d5ffdff6bae58d66a3cd7f54a7acd9a0e7b18d97abb255defc09e3140"}, 828 | {file = "numpy-1.19.5-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:a4646724fba402aa7504cd48b4b50e783296b5e10a524c7a6da62e4a8ac9698d"}, 829 | {file = "numpy-1.19.5-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:2e55195bc1c6b705bfd8ad6f288b38b11b1af32f3c8289d6c50d47f950c12e76"}, 830 | {file = "numpy-1.19.5-cp36-cp36m-win32.whl", hash = "sha256:39b70c19ec771805081578cc936bbe95336798b7edf4732ed102e7a43ec5c07a"}, 831 | {file = "numpy-1.19.5-cp36-cp36m-win_amd64.whl", hash = "sha256:dbd18bcf4889b720ba13a27ec2f2aac1981bd41203b3a3b27ba7a33f88ae4827"}, 832 | {file = "numpy-1.19.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:603aa0706be710eea8884af807b1b3bc9fb2e49b9f4da439e76000f3b3c6ff0f"}, 833 | {file = "numpy-1.19.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:cae865b1cae1ec2663d8ea56ef6ff185bad091a5e33ebbadd98de2cfa3fa668f"}, 834 | {file = "numpy-1.19.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:36674959eed6957e61f11c912f71e78857a8d0604171dfd9ce9ad5cbf41c511c"}, 835 | {file = "numpy-1.19.5-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:06fab248a088e439402141ea04f0fffb203723148f6ee791e9c75b3e9e82f080"}, 836 | {file = "numpy-1.19.5-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:6149a185cece5ee78d1d196938b2a8f9d09f5a5ebfbba66969302a778d5ddd1d"}, 837 | {file = "numpy-1.19.5-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:50a4a0ad0111cc1b71fa32dedd05fa239f7fb5a43a40663269bb5dc7877cfd28"}, 838 | {file = "numpy-1.19.5-cp37-cp37m-win32.whl", hash = "sha256:d051ec1c64b85ecc69531e1137bb9751c6830772ee5c1c426dbcfe98ef5788d7"}, 839 | {file = "numpy-1.19.5-cp37-cp37m-win_amd64.whl", hash = "sha256:a12ff4c8ddfee61f90a1633a4c4afd3f7bcb32b11c52026c92a12e1325922d0d"}, 840 | {file = "numpy-1.19.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cf2402002d3d9f91c8b01e66fbb436a4ed01c6498fffed0e4c7566da1d40ee1e"}, 841 | {file = "numpy-1.19.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1ded4fce9cfaaf24e7a0ab51b7a87be9038ea1ace7f34b841fe3b6894c721d1c"}, 842 | {file = "numpy-1.19.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:012426a41bc9ab63bb158635aecccc7610e3eff5d31d1eb43bc099debc979d94"}, 843 | {file = "numpy-1.19.5-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:759e4095edc3c1b3ac031f34d9459fa781777a93ccc633a472a5468587a190ff"}, 844 | {file = "numpy-1.19.5-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:a9d17f2be3b427fbb2bce61e596cf555d6f8a56c222bd2ca148baeeb5e5c783c"}, 845 | {file = "numpy-1.19.5-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:99abf4f353c3d1a0c7a5f27699482c987cf663b1eac20db59b8c7b061eabd7fc"}, 846 | {file = "numpy-1.19.5-cp38-cp38-win32.whl", hash = "sha256:384ec0463d1c2671170901994aeb6dce126de0a95ccc3976c43b0038a37329c2"}, 847 | {file = "numpy-1.19.5-cp38-cp38-win_amd64.whl", hash = "sha256:811daee36a58dc79cf3d8bdd4a490e4277d0e4b7d103a001a4e73ddb48e7e6aa"}, 848 | {file = "numpy-1.19.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c843b3f50d1ab7361ca4f0b3639bf691569493a56808a0b0c54a051d260b7dbd"}, 849 | {file = "numpy-1.19.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:d6631f2e867676b13026e2846180e2c13c1e11289d67da08d71cacb2cd93d4aa"}, 850 | {file = "numpy-1.19.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:7fb43004bce0ca31d8f13a6eb5e943fa73371381e53f7074ed21a4cb786c32f8"}, 851 | {file = "numpy-1.19.5-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:2ea52bd92ab9f768cc64a4c3ef8f4b2580a17af0a5436f6126b08efbd1838371"}, 852 | {file = "numpy-1.19.5-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:400580cbd3cff6ffa6293df2278c75aef2d58d8d93d3c5614cd67981dae68ceb"}, 853 | {file = "numpy-1.19.5-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:df609c82f18c5b9f6cb97271f03315ff0dbe481a2a02e56aeb1b1a985ce38e60"}, 854 | {file = "numpy-1.19.5-cp39-cp39-win32.whl", hash = "sha256:ab83f24d5c52d60dbc8cd0528759532736b56db58adaa7b5f1f76ad551416a1e"}, 855 | {file = "numpy-1.19.5-cp39-cp39-win_amd64.whl", hash = "sha256:0eef32ca3132a48e43f6a0f5a82cb508f22ce5a3d6f67a8329c81c8e226d3f6e"}, 856 | {file = "numpy-1.19.5-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:a0d53e51a6cb6f0d9082decb7a4cb6dfb33055308c4c44f53103c073f649af73"}, 857 | {file = "numpy-1.19.5.zip", hash = "sha256:a76f502430dd98d7546e1ea2250a7360c065a5fdea52b2dffe8ae7180909b6f4"}, 858 | ] 859 | packaging = [ 860 | {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, 861 | {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, 862 | ] 863 | pathy = [ 864 | {file = "pathy-0.6.1-py3-none-any.whl", hash = "sha256:25fd04cec6393661113086730ce69c789d121bea83ab1aa18452e8fd42faf29a"}, 865 | {file = "pathy-0.6.1.tar.gz", hash = "sha256:838624441f799a06b446a657e4ecc9ebc3fdd05234397e044a7c87e8f6e76b1c"}, 866 | ] 867 | pluggy = [ 868 | {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, 869 | {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, 870 | ] 871 | preshed = [ 872 | {file = "preshed-3.0.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a9683730127658b531120b4ed5cff1f2a567318ab75e9ab0f22cc84ae1486c23"}, 873 | {file = "preshed-3.0.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c98f725d8478f3ade4ab1ea00f50a92d2d9406d37276bc46fd8bab1d47452c4"}, 874 | {file = "preshed-3.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:ea8aa9610837e907e8442e79300df0a861bfdb4dcaf026a5d9642a688ad04815"}, 875 | {file = "preshed-3.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e03ae3eee961106a517fcd827b5a7c51f7317236b3e665c989054ab8dc381d28"}, 876 | {file = "preshed-3.0.6-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58661bea8d0d63a648588511407285e43d43627e27f836e30819801fb3c75d70"}, 877 | {file = "preshed-3.0.6-cp36-cp36m-win_amd64.whl", hash = "sha256:5f99837e7353ce1fa81f0074d4b15f36e0af5af60a2a54d4d11e13cb09768a9e"}, 878 | {file = "preshed-3.0.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8c60a400babfc5b25ba371fda7041be227f7c625e1fb7a43329c2c08fe00a53b"}, 879 | {file = "preshed-3.0.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61b2ea656cb1c38d544cc774f1c2ad1cdab23167b46b35310a7e211d4ba9c6d0"}, 880 | {file = "preshed-3.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:87e1add41b7f6236a3ccc34788f47ab8682bc28e8a2d369089062e274494c1a0"}, 881 | {file = "preshed-3.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a279c138ad1d5be02547b1545254929588414b01571fe637016367f6a1aa11de"}, 882 | {file = "preshed-3.0.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3af09f4cfcdaca085fd87dac8107617c4e2bb0ad1458f953841b71e9728287f5"}, 883 | {file = "preshed-3.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:f92e752a868ea2690e1b38c4b775251a145e0fce36b9bdd972539e8271b7a23a"}, 884 | {file = "preshed-3.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eaffbc71fdb8625f9aac4fe7e19e20bf318d1421ea05903bebe3e6ffef27b587"}, 885 | {file = "preshed-3.0.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfe1495fcfc7f479de840ddc4f426dbb55351e218ae5c8712c1269183a4d0060"}, 886 | {file = "preshed-3.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:92a8f49d17a63537a8beed48a049b62ef168ca07e0042a5b2bcdf178a1fb5d48"}, 887 | {file = "preshed-3.0.6.tar.gz", hash = "sha256:fb3b7588a3a0f2f2f1bf3fe403361b2b031212b73a37025aea1df7215af3772a"}, 888 | ] 889 | py = [ 890 | {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, 891 | {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, 892 | ] 893 | pydantic = [ 894 | {file = "pydantic-1.7.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:3c60039e84552442defbcb5d56711ef0e057028ca7bfc559374917408a88d84e"}, 895 | {file = "pydantic-1.7.4-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:6e7e314acb170e143c6f3912f93f2ec80a96aa2009ee681356b7ce20d57e5c62"}, 896 | {file = "pydantic-1.7.4-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:8ef77cd17b73b5ba46788d040c0e820e49a2d80cfcd66fda3ba8be31094fd146"}, 897 | {file = "pydantic-1.7.4-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:115d8aa6f257a1d469c66b6bfc7aaf04cd87c25095f24542065c68ebcb42fe63"}, 898 | {file = "pydantic-1.7.4-cp36-cp36m-win_amd64.whl", hash = "sha256:66757d4e1eab69a3cfd3114480cc1d72b6dd847c4d30e676ae838c6740fdd146"}, 899 | {file = "pydantic-1.7.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4c92863263e4bd89e4f9cf1ab70d918170c51bd96305fe7b00853d80660acb26"}, 900 | {file = "pydantic-1.7.4-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:3b8154babf30a5e0fa3aa91f188356763749d9b30f7f211fafb247d4256d7877"}, 901 | {file = "pydantic-1.7.4-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:80cc46378505f7ff202879dcffe4bfbf776c15675028f6e08d1d10bdfbb168ac"}, 902 | {file = "pydantic-1.7.4-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:dda60d7878a5af2d8560c55c7c47a8908344aa78d32ec1c02d742ede09c534df"}, 903 | {file = "pydantic-1.7.4-cp37-cp37m-win_amd64.whl", hash = "sha256:4c1979d5cc3e14b35f0825caddea5a243dd6085e2a7539c006bc46997ef7a61a"}, 904 | {file = "pydantic-1.7.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8857576600c32aa488f18d30833aa833b54a48e3bab3adb6de97e463af71f8f8"}, 905 | {file = "pydantic-1.7.4-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1f86d4da363badb39426a0ff494bf1d8510cd2f7274f460eee37bdbf2fd495ec"}, 906 | {file = "pydantic-1.7.4-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:3ea1256a9e782149381e8200119f3e2edea7cd6b123f1c79ab4bbefe4d9ba2c9"}, 907 | {file = "pydantic-1.7.4-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:e28455b42a0465a7bf2cde5eab530389226ce7dc779de28d17b8377245982b1e"}, 908 | {file = "pydantic-1.7.4-cp38-cp38-win_amd64.whl", hash = "sha256:47c5b1d44934375a3311891cabd450c150a31cf5c22e84aa172967bf186718be"}, 909 | {file = "pydantic-1.7.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:00250e5123dd0b123ff72be0e1b69140e0b0b9e404d15be3846b77c6f1b1e387"}, 910 | {file = "pydantic-1.7.4-cp39-cp39-manylinux1_i686.whl", hash = "sha256:d24aa3f7f791a023888976b600f2f389d3713e4f23b7a4c88217d3fce61cdffc"}, 911 | {file = "pydantic-1.7.4-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:2c44a9afd4c4c850885436a4209376857989aaf0853c7b118bb2e628d4b78c4e"}, 912 | {file = "pydantic-1.7.4-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:e87edd753da0ca1d44e308a1b1034859ffeab1f4a4492276bff9e1c3230db4fe"}, 913 | {file = "pydantic-1.7.4-cp39-cp39-win_amd64.whl", hash = "sha256:a3026ee105b5360855e500b4abf1a1d0b034d88e75a2d0d66a4c35e60858e15b"}, 914 | {file = "pydantic-1.7.4-py3-none-any.whl", hash = "sha256:a82385c6d5a77e3387e94612e3e34b77e13c39ff1295c26e3ba664e7b98073e2"}, 915 | {file = "pydantic-1.7.4.tar.gz", hash = "sha256:0a1abcbd525fbb52da58c813d54c2ec706c31a91afdb75411a73dd1dec036595"}, 916 | ] 917 | pyfunctional = [ 918 | {file = "PyFunctional-1.3.0-py2-none-any.whl", hash = "sha256:23ef891a3bd34e5e3fb7ccfccf22ddd68309f53367997d9acd61c8b153b99d11"}, 919 | {file = "PyFunctional-1.3.0-py3-none-any.whl", hash = "sha256:e157b6a387523c64bfcca0e6e823c5c66fc3f9cad458b3cd9ec8be32a7d45cf2"}, 920 | {file = "PyFunctional-1.3.0.tar.gz", hash = "sha256:d2b735c5bfb3b4d7977734e5e92d03f53389de6dc539c609f48c748b93e94fe0"}, 921 | ] 922 | pyparsing = [ 923 | {file = "pyparsing-3.0.6-py3-none-any.whl", hash = "sha256:04ff808a5b90911829c55c4e26f75fa5ca8a2f5f36aa3a51f68e27033341d3e4"}, 924 | {file = "pyparsing-3.0.6.tar.gz", hash = "sha256:d9bdec0013ef1eb5a84ab39a3b3868911598afa494f5faa038647101504e2b81"}, 925 | ] 926 | pytest = [ 927 | {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"}, 928 | {file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"}, 929 | ] 930 | requests = [ 931 | {file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"}, 932 | {file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"}, 933 | ] 934 | six = [ 935 | {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, 936 | {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, 937 | ] 938 | smart-open = [ 939 | {file = "smart_open-5.2.1-py3-none-any.whl", hash = "sha256:71d14489da58b60ce12fc3ecb823facc59a8b23cd1b58edb97175640350d3a62"}, 940 | {file = "smart_open-5.2.1.tar.gz", hash = "sha256:75abf758717a92a8f53aa96953f0c245c8cedf8e1e4184903db3659b419d4c17"}, 941 | ] 942 | spacy = [ 943 | {file = "spacy-3.0.7-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:bd0ecec5a9c86c9b8c24f82d2e71cd7d0d5bc71e4aa79f945e1e6e6860e28b85"}, 944 | {file = "spacy-3.0.7-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58588a5f4afb49d42843ef7c6a357227ea9f6f8af6330f4e9e9a6cfa0ea65493"}, 945 | {file = "spacy-3.0.7-cp36-cp36m-win_amd64.whl", hash = "sha256:70053c65f36c89ea367b3f43df5d04540c1cbe54ba5d36e384b43a01b371aa87"}, 946 | {file = "spacy-3.0.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c441e924b9fae7db6dfdf75547c8ac0b8a91ad89dd9911f2b3a55bfa9cd45fcd"}, 947 | {file = "spacy-3.0.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11e68ebe122452c49e500c5d3d3fd2a6e68c0bb97e309fda685f06535c934843"}, 948 | {file = "spacy-3.0.7-cp37-cp37m-win_amd64.whl", hash = "sha256:1771bf6fa93d505a763b314a70f1fe7ea21070d29097ae9afb0ee82e7fd84a23"}, 949 | {file = "spacy-3.0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2a8724a6b64f05fcb39dda0567b6bfe0925e62fe13f0fe23df0cf0559d818b72"}, 950 | {file = "spacy-3.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:249df343923a8751a138ef0954dd4544be1c00eb5e0d551d72e3a8aa37e5c39d"}, 951 | {file = "spacy-3.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:77399e1db1fc7cffdf0e7384011efdb10d799663fcdbb32ee63b9113ad93041d"}, 952 | {file = "spacy-3.0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:84af984d2f66a2c87a0475657929fd442a25b12e439a18a81a09facba0f0d2d6"}, 953 | {file = "spacy-3.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebe4f65e161e1445ec7a9209172868bb613214ae8476d53802aaf8ffd5bf8fda"}, 954 | {file = "spacy-3.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:3ae599df111c91a609e79483fc15fd2561c045ff67d5e637ce93951430337abf"}, 955 | {file = "spacy-3.0.7.tar.gz", hash = "sha256:f49c903d4a04598c080bc0b31e666522d9ba340d67ca8ce0ab96f4578afd597f"}, 956 | ] 957 | spacy-legacy = [ 958 | {file = "spacy-legacy-3.0.8.tar.gz", hash = "sha256:b4725c5c161f0685ab4fce3fc912bc68aefdb7e102ba9848e852bb5842256c2f"}, 959 | {file = "spacy_legacy-3.0.8-py2.py3-none-any.whl", hash = "sha256:eb37a3540bb461b5fe9348d4976784f18a0e345982e41e2c5c7cd8229889e825"}, 960 | ] 961 | srsly = [ 962 | {file = "srsly-2.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5e22bbc1a20abf749fa53adf101c36bc369ec63f496c7a44bf4f5f287d724900"}, 963 | {file = "srsly-2.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:004d29a5abc0fe632434359c0be170490a69c4dce2c3de8a769944c37da7bb4b"}, 964 | {file = "srsly-2.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7ced7ec4993b4d4ad73cc442f8f7a518368348054d510864b1aa149e8d71654d"}, 965 | {file = "srsly-2.4.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:801c7e6e32c6a4721ab78ab7dafd01074fdb144f4876c09b25305c98f95c470f"}, 966 | {file = "srsly-2.4.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff042c5c3cc1eecd7cbb0a218975a7fd7f331a7f0a3f2e19eb0d6192a98bfdf7"}, 967 | {file = "srsly-2.4.2-cp36-cp36m-win_amd64.whl", hash = "sha256:11b99f16a95fac43905bc31a4705b80ca8a23f201a5cb611a278e3b2d83c6175"}, 968 | {file = "srsly-2.4.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0df68c021ed3f481a5b2e408b57dc40caac66d36b17ef5235b14e9e6a2e24d68"}, 969 | {file = "srsly-2.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d590856db1e639b92c1a78b0cc1fe0d9436dd49037c9961bce959af5d7f66755"}, 970 | {file = "srsly-2.4.2-cp37-cp37m-win_amd64.whl", hash = "sha256:589118f912125742414125b7d671610bf2fe11382e79f1df8ec9324a915a3a18"}, 971 | {file = "srsly-2.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cadf13096c7157212c53c0a1af868eececf54e86ffb4e0429dff05d1b9bc423a"}, 972 | {file = "srsly-2.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6259e9904ceb4802bcd4ce1114958ebdc30b756a87b94b0949a57ffd4f63421b"}, 973 | {file = "srsly-2.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:a2e8ee5f3a2a3a816b1d3d989d1b343d77900fa6b84e11c9fc1ac202d1a5dd17"}, 974 | {file = "srsly-2.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f7e16f2a34d2d8ac6c6e1691f54ce27a5b4feb923207a9e294496458b98b0510"}, 975 | {file = "srsly-2.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4bc36962208810d29c72156e0573dcbabd9914f42fede42217ccfcadd96beb6"}, 976 | {file = "srsly-2.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:090072830cf2d5bd6765705a02463f586db8a586805d1c31a72080f971d311b5"}, 977 | {file = "srsly-2.4.2.tar.gz", hash = "sha256:2aba252292767875086adf4e4380e27b024d73655456f796f8e07eb3a4dfacc0"}, 978 | ] 979 | tabulate = [ 980 | {file = "tabulate-0.8.9-py3-none-any.whl", hash = "sha256:d7c013fe7abbc5e491394e10fa845f8f32fe54f8dc60c6622c6cf482d25d47e4"}, 981 | {file = "tabulate-0.8.9.tar.gz", hash = "sha256:eb1d13f25760052e8931f2ef80aaf6045a6cceb47514db8beab24cded16f13a7"}, 982 | ] 983 | thinc = [ 984 | {file = "thinc-8.0.13-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f818b9f012169a11beb3561c43dc52080588e50cf495733e492efab8b9b4135e"}, 985 | {file = "thinc-8.0.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f520daf45b7f42a04363852df43be1b423ae42d9327709d74f6c3279b3f73778"}, 986 | {file = "thinc-8.0.13-cp310-cp310-win_amd64.whl", hash = "sha256:2b217059c9e126220b77e7d6c9da56912c4e1eb4e8a11af14f17752e198e88cc"}, 987 | {file = "thinc-8.0.13-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0f956c693d180209075703072fd226a24408cbe80eb67bd3b6eea407f61cb283"}, 988 | {file = "thinc-8.0.13-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a17d87469082b82c27b7d40dd86c793fc34c60f734209ee056cb02d7609f255b"}, 989 | {file = "thinc-8.0.13-cp36-cp36m-win_amd64.whl", hash = "sha256:27ea64843d6af0f3de8c788ec2a00598a1e5b4d57aadb52845fa42e95e4038c2"}, 990 | {file = "thinc-8.0.13-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f274bcaa781aaf1dba5eac7da7d88d9b0cb8c2fd7477647f0ca9d3221dfb958"}, 991 | {file = "thinc-8.0.13-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d52a5621e1784af5c64af4cfa9b2924358ca07aafd99014c57a736cf032e42f7"}, 992 | {file = "thinc-8.0.13-cp37-cp37m-win_amd64.whl", hash = "sha256:753f65e07860553551ed8806b934a74f26a4a50985d556ecd5c4ab50c29b3222"}, 993 | {file = "thinc-8.0.13-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ffe0a4d74f2ba2819193a5d9179156256f44c69255d7ae286ce1861efcefbc64"}, 994 | {file = "thinc-8.0.13-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b61f78f6f116d23438b034c3552804c9767c4165960b1d7e48f07b2e9a95afb0"}, 995 | {file = "thinc-8.0.13-cp38-cp38-win_amd64.whl", hash = "sha256:ba576af211ad2b00af78ab3e24e689289b29af8a9e51619ad55fab86871d8652"}, 996 | {file = "thinc-8.0.13-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:def8e96eddb5a098d07dcf8752266095e14a6cf5d056ff766e2cdc542eb63f02"}, 997 | {file = "thinc-8.0.13-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce322b66053819654d0444877154a08ed01cf5b45c6b3c9763e59b78af4f6039"}, 998 | {file = "thinc-8.0.13-cp39-cp39-win_amd64.whl", hash = "sha256:b3ae088f60d3dfe6a88c6be37548aae40023e46a718cffe3e43953b4f0ffc340"}, 999 | {file = "thinc-8.0.13.tar.gz", hash = "sha256:47662a3ae33d445a77b6ea7b772444805c7bba8991f122e350daf72dedc8171a"}, 1000 | ] 1001 | tqdm = [ 1002 | {file = "tqdm-4.62.3-py2.py3-none-any.whl", hash = "sha256:8dd278a422499cd6b727e6ae4061c40b48fce8b76d1ccbf5d34fca9b7f925b0c"}, 1003 | {file = "tqdm-4.62.3.tar.gz", hash = "sha256:d359de7217506c9851b7869f3708d8ee53ed70a1b8edbba4dbcb47442592920d"}, 1004 | ] 1005 | typer = [ 1006 | {file = "typer-0.3.2-py3-none-any.whl", hash = "sha256:ba58b920ce851b12a2d790143009fa00ac1d05b3ff3257061ff69dbdfc3d161b"}, 1007 | {file = "typer-0.3.2.tar.gz", hash = "sha256:5455d750122cff96745b0dec87368f56d023725a7ebc9d2e54dd23dc86816303"}, 1008 | ] 1009 | typing-extensions = [ 1010 | {file = "typing_extensions-3.10.0.2-py2-none-any.whl", hash = "sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7"}, 1011 | {file = "typing_extensions-3.10.0.2-py3-none-any.whl", hash = "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34"}, 1012 | {file = "typing_extensions-3.10.0.2.tar.gz", hash = "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e"}, 1013 | ] 1014 | urllib3 = [ 1015 | {file = "urllib3-1.26.7-py2.py3-none-any.whl", hash = "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"}, 1016 | {file = "urllib3-1.26.7.tar.gz", hash = "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece"}, 1017 | ] 1018 | wasabi = [ 1019 | {file = "wasabi-0.8.2-py3-none-any.whl", hash = "sha256:a493e09d86109ec6d9e70d040472f9facc44634d4ae6327182f94091ca73a490"}, 1020 | {file = "wasabi-0.8.2.tar.gz", hash = "sha256:b4a36aaa9ca3a151f0c558f269d442afbb3526f0160fd541acd8a0d5e5712054"}, 1021 | ] 1022 | wcwidth = [ 1023 | {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, 1024 | {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, 1025 | ] 1026 | zipp = [ 1027 | {file = "zipp-3.6.0-py3-none-any.whl", hash = "sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc"}, 1028 | {file = "zipp-3.6.0.tar.gz", hash = "sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832"}, 1029 | ] 1030 | --------------------------------------------------------------------------------