├── tests ├── __init__.py ├── data │ ├── seq2seq │ │ ├── seq2seq.csv │ │ └── seq2seq.jsonl │ ├── classification │ │ ├── classification.csv │ │ └── classification.jsonl │ └── labeling │ │ ├── labeling.conll2003.user1 │ │ ├── labeling_text_label.jsonl │ │ ├── labeling.conll2003.user2 │ │ ├── labeling_text_label.conll2003.user0 │ │ ├── labeling.jsonl │ │ ├── labeling.spacy.user1 │ │ ├── labeling.spacy.user2 │ │ └── labeling_text_label.spacy.user0 ├── test_utils.py └── test_datasets.py ├── doccano_transformer ├── __init__.py ├── datasets.py ├── utils.py └── examples.py ├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── 04-request.md │ ├── 02-bug.md │ ├── 01-question.md │ └── 03-install.md ├── workflows │ ├── ci.yml │ ├── package-installation.yml │ ├── pypi-publish.yml │ └── codeql-analysis.yml └── PULL_REQUEST_TEMPLATE.md ├── Pipfile ├── LICENSE ├── setup.py ├── README.md ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md └── Pipfile.lock /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doccano_transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 80 3 | max-complexity = 18 4 | -------------------------------------------------------------------------------- /tests/data/seq2seq/seq2seq.csv: -------------------------------------------------------------------------------- 1 | annotation_approver,id,text,user 2 | ,2588,誰 が 一番 に 着 く か 私 に は 分か り ま せ ん 。,1 3 | ,2589,多く の 動物 が 人間 に よ っ て 滅ぼ さ れ た 。,2 4 | ,2590,私 は テニス 部員 で す 。,1 5 | -------------------------------------------------------------------------------- /tests/data/classification/classification.csv: -------------------------------------------------------------------------------- 1 | annotation_approver,id,label,text,user 2 | ,2591,15,Terrible customer service.,1 3 | ,2592,17,Really great transaction.,1 4 | ,2593,17,Great price.,1 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/04-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F381 Feature Request" 3 | about: For the proposal to improve or enhance doccano-transformer 4 | 5 | --- 6 | 7 | Feature description 8 | --------- 9 | 10 | -------------------------------------------------------------------------------- /tests/data/classification/classification.jsonl: -------------------------------------------------------------------------------- 1 | {"id": 2591, "text": "Terrible customer service.", "annotations": [{"label": 15, "user": 1}], "meta": {}, "annotation_approver": null} 2 | {"id": 2592, "text": "Really great transaction.", "annotations": [{"label": 17, "user": 1}], "meta": {}, "annotation_approver": null} 3 | {"id": 2593, "text": "Great price.", "annotations": [{"label": 17, "user": 1}], "meta": {}, "annotation_approver": null} 4 | -------------------------------------------------------------------------------- /tests/data/labeling/labeling.conll2003.user1: -------------------------------------------------------------------------------- 1 | -DOCSTART- -X- -X- O 2 | 3 | SOCCER _ _ O 4 | - _ _ O 5 | JAPAN _ _ B-12 6 | GET _ _ O 7 | LUCKY _ _ O 8 | WIN _ _ O 9 | , _ _ O 10 | CHINA _ _ B-13 11 | IN _ _ O 12 | SURPRISE _ _ O 13 | DEFEAT _ _ O 14 | . _ _ O 15 | 16 | Nadim _ _ B-13 17 | Ladki _ _ I-13 18 | 19 | AL-AIN _ _ B-12 20 | , _ _ O 21 | United _ _ B-12 22 | Arab _ _ I-12 23 | Emirates _ _ I-12 24 | 1996-12-06 _ _ O 25 | 26 | -------------------------------------------------------------------------------- /tests/data/seq2seq/seq2seq.jsonl: -------------------------------------------------------------------------------- 1 | {"id": 2588, "text": "i can 't tell who will arrive first .", "annotations": [{"text": "誰 が 一番 に 着 く か 私 に は 分か り ま せ ん 。", "user": 1}], "meta": {}, "annotation_approver": null} 2 | {"id": 2589, "text": "many animals have been destroyed by men .", "annotations": [{"text": "多く の 動物 が 人間 に よ っ て 滅ぼ さ れ た 。", "user": 2}], "meta": {}, "annotation_approver": null} 3 | {"id": 2590, "text": "i 'm in the tennis club .", "annotations": [{"text": "私 は テニス 部員 で す 。", "user": 1}], "meta": {}, "annotation_approver": null} 4 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | pytest = "*" 8 | flake8 = "*" 9 | isort = {extras = ["pipfile"],version = "*"} 10 | autopep8 = "*" 11 | ipython = "*" 12 | pytest-datadir = "*" 13 | pytest-cov = "*" 14 | 15 | [packages] 16 | spacy = "*" 17 | 18 | [requires] 19 | python_version = "3.8" 20 | 21 | [scripts] 22 | isort = "isort . -c" 23 | test = "pytest tests --cov=doccano_transformer --cov-report=term-missing -vv" 24 | flake8 = "flake8 doccano_transformer --ignore=F401,E741" 25 | -------------------------------------------------------------------------------- /tests/data/labeling/labeling_text_label.jsonl: -------------------------------------------------------------------------------- 1 | {"id": 2578, "text": "SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRISE DEFEAT .\nNadim Ladki\nAL-AIN , United Arab Emirates 1996-12-06", "meta": {}, "annotation_approver": null, "labels": [[9, 14, "LOC"], [31, 36, "PER"], [58, 69, "PER"], [70, 76, "LOC"], [79, 99, "LOC"]]} 2 | {"id": 2580, "text": "RUGBY UNION - CUTTITTA BACK FOR ITALY AFTER A YEAR .\nROME 1996-12-06\nItaly recalled Marcello Cuttitta", "meta": {}, "annotation_approver": null, "labels": [[0, 11, "ORG"], [14, 22, "PER"], [32, 37, "LOC"], [53, 57, "LOC"], [69, 74, "LOC"], [84, 101, "PER"]]} 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/02-bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F6A8 Bug Report" 3 | about: For the bug report or unexpected behavior differing from the docs 4 | 5 | --- 6 | 7 | How to reproduce the behaviour 8 | --------- 9 | 10 | 11 | 12 | 13 | Your Environment 14 | --------- 15 | 16 | * Operating System: 17 | * Python Version Used: 18 | * doccano-transformer Version: 19 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v1 12 | - name: Set up Python 3.8 13 | uses: actions/setup-python@v1 14 | with: 15 | python-version: 3.8 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install pipenv 20 | pipenv install --dev 21 | - name: Lint with flake8 22 | run: | 23 | pipenv run flake8 24 | - name: Lint with isort 25 | run: | 26 | pipenv run isort 27 | - name: Test with pytest 28 | run: | 29 | pipenv run test 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/01-question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F4AC Question" 3 | about: For the question to confirm suspiciously behaviors or feature usage. Please use StackOverflow if your question is general usage or help with your environment 4 | 5 | --- 6 | 7 | How to reproduce the behaviour 8 | --------- 9 | 10 | 11 | 13 | 14 | Your Environment 15 | --------- 16 | 17 | 18 | * Operating System: 19 | * Python Version Used: 20 | * doccano-transformer Version: 21 | -------------------------------------------------------------------------------- /.github/workflows/package-installation.yml: -------------------------------------------------------------------------------- 1 | name: test package installation 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0 * * *" 6 | 7 | jobs: 8 | build: 9 | if: contains(github.event.head_commit.message, '[skip ci]') == false 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | python-version: [3.6, 3.7, 3.8] 14 | os: [ubuntu-latest, macos-latest] 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | - name: Set up Python 19 | uses: actions/setup-python@v2 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Install dependencies 23 | run: | 24 | pip install --upgrade pip 25 | pip install -U setuptools 26 | - run: pip install doccano-transformer 27 | -------------------------------------------------------------------------------- /tests/data/labeling/labeling.conll2003.user2: -------------------------------------------------------------------------------- 1 | -DOCSTART- -X- -X- O 2 | 3 | SOCCER _ _ O 4 | - _ _ O 5 | JAPAN _ _ B-12 6 | GET _ _ O 7 | LUCKY _ _ O 8 | WIN _ _ O 9 | , _ _ O 10 | CHINA _ _ B-13 11 | IN _ _ O 12 | SURPRISE _ _ O 13 | DEFEAT _ _ O 14 | . _ _ O 15 | 16 | Nadim _ _ B-13 17 | Ladki _ _ I-13 18 | 19 | AL-AIN _ _ B-12 20 | , _ _ O 21 | United _ _ B-12 22 | Arab _ _ I-12 23 | Emirates _ _ I-12 24 | 1996-12-06 _ _ O 25 | 26 | -DOCSTART- -X- -X- O 27 | 28 | RUGBY _ _ B-14 29 | UNION _ _ I-14 30 | - _ _ O 31 | CUTTITTA _ _ B-13 32 | BACK _ _ O 33 | FOR _ _ O 34 | ITALY _ _ B-12 35 | AFTER _ _ O 36 | A _ _ O 37 | YEAR _ _ O 38 | . _ _ O 39 | 40 | ROME _ _ B-12 41 | 1996-12-06 _ _ O 42 | 43 | Italy _ _ B-12 44 | recalled _ _ O 45 | Marcello _ _ B-13 46 | Cuttitta _ _ I-13 47 | 48 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/03-install.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U000023F3 Installation Problem" 3 | about: For the problem that you faced when installing doccano-transformer which none of the suggestions in the docs and other issues helped 4 | 5 | --- 6 | 7 | 8 | 9 | How to reproduce the problem 10 | --------- 11 | 12 | 13 | ```bash 14 | # copy-paste the error message here 15 | ``` 16 | 17 | Your Environment 18 | --------- 19 | 20 | * Operating System: 21 | * Python Version Used: 22 | * doccano-transformer Version: 23 | -------------------------------------------------------------------------------- /tests/data/labeling/labeling_text_label.conll2003.user0: -------------------------------------------------------------------------------- 1 | -DOCSTART- -X- -X- O 2 | 3 | SOCCER _ _ O 4 | - _ _ O 5 | JAPAN _ _ B-LOC 6 | GET _ _ O 7 | LUCKY _ _ O 8 | WIN _ _ O 9 | , _ _ O 10 | CHINA _ _ B-PER 11 | IN _ _ O 12 | SURPRISE _ _ O 13 | DEFEAT _ _ O 14 | . _ _ O 15 | 16 | Nadim _ _ B-PER 17 | Ladki _ _ I-PER 18 | 19 | AL-AIN _ _ B-LOC 20 | , _ _ O 21 | United _ _ B-LOC 22 | Arab _ _ I-LOC 23 | Emirates _ _ I-LOC 24 | 1996-12-06 _ _ O 25 | 26 | -DOCSTART- -X- -X- O 27 | 28 | RUGBY _ _ B-ORG 29 | UNION _ _ I-ORG 30 | - _ _ O 31 | CUTTITTA _ _ B-PER 32 | BACK _ _ O 33 | FOR _ _ O 34 | ITALY _ _ B-LOC 35 | AFTER _ _ O 36 | A _ _ O 37 | YEAR _ _ O 38 | . _ _ O 39 | 40 | ROME _ _ B-LOC 41 | 1996-12-06 _ _ O 42 | 43 | Italy _ _ B-LOC 44 | recalled _ _ O 45 | Marcello _ _ B-PER 46 | Cuttitta _ _ I-PER 47 | 48 | -------------------------------------------------------------------------------- /.github/workflows/pypi-publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@master 12 | - name: Setup Python 3.8 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: 3.8 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools wheel 20 | - name: Build a binary wheel and a source tarball 21 | run: | 22 | python setup.py sdist bdist_wheel 23 | - name: Publish a Python distribution to PyPI 24 | uses: pypa/gh-action-pypi-publish@master 25 | with: 26 | user: ${{ secrets.PYPI_USERNAME }} 27 | password: ${{ secrets.PYPI_PASSWORD }} 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 doccano 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | from setuptools import find_packages, setup 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | 7 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 8 | long_description = f.read() 9 | 10 | 11 | setup( 12 | name='doccano-transformer', 13 | use_scm_version=True, 14 | setup_requires=['setuptools_scm'], 15 | description='Format transformer tool for doccano', 16 | long_description=long_description, 17 | long_description_content_type='text/markdown', 18 | url='https://github.com/doccano/doccano-transformer', 19 | author='Hiroki Nakayama, Yasufumi Taniguchi', 20 | author_email='hiroki.nakayama.py@gmail.com', 21 | classifiers=[ 22 | 'Development Status :: 3 - Alpha', 23 | 'Intended Audience :: Developers', 24 | 'License :: OSI Approved :: MIT License', 25 | 'Programming Language :: Python :: 3', 26 | 'Programming Language :: Python :: 3.8', 27 | 'Programming Language :: Python :: 3 :: Only', 28 | ], 29 | keywords='doccano,annotation,machine learning', 30 | packages=find_packages(exclude=('tests',)), 31 | python_requires='>=3.5, <4', 32 | install_requires=['spacy', 'importlib-metadata'], 33 | ) 34 | -------------------------------------------------------------------------------- /tests/data/labeling/labeling.jsonl: -------------------------------------------------------------------------------- 1 | {"id": 2578, "text": "SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRISE DEFEAT .\nNadim Ladki\nAL-AIN , United Arab Emirates 1996-12-06", "annotations": [{"label": 12, "start_offset": 9, "end_offset": 14, "user": 1}, {"label": 13, "start_offset": 31, "end_offset": 36, "user": 1}, {"label": 13, "start_offset": 58, "end_offset": 69, "user": 1}, {"label": 12, "start_offset": 70, "end_offset": 76, "user": 1}, {"label": 12, "start_offset": 79, "end_offset": 99, "user": 1}, {"label": 12, "start_offset": 9, "end_offset": 14, "user": 2}, {"label": 13, "start_offset": 31, "end_offset": 36, "user": 2}, {"label": 13, "start_offset": 58, "end_offset": 69, "user": 2}, {"label": 12, "start_offset": 70, "end_offset": 76, "user": 2}, {"label": 12, "start_offset": 79, "end_offset": 99, "user": 2}], "meta": {}, "annotation_approver": null} 2 | {"id": 2580, "text": "RUGBY UNION - CUTTITTA BACK FOR ITALY AFTER A YEAR .\nROME 1996-12-06\nItaly recalled Marcello Cuttitta", "annotations": [{"label": 14, "start_offset": 0, "end_offset": 11, "user": 2}, {"label": 13, "start_offset": 14, "end_offset": 22, "user": 2}, {"label": 12, "start_offset": 32, "end_offset": 37, "user": 2}, {"label": 12, "start_offset": 53, "end_offset": 57, "user": 2}, {"label": 12, "start_offset": 69, "end_offset": 74, "user": 2}, {"label": 13, "start_offset": 84, "end_offset": 101, "user": 2}], "meta": {}, "annotation_approver": null} 3 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from doccano_transformer import utils 4 | 5 | 6 | class TestUtils(TestCase): 7 | 8 | def test_get_offsets(self): 9 | text = ' This is Doccano Transformer . ' 10 | tokens = text.split() 11 | result = utils.get_offsets(text, tokens) 12 | expected = [1, 6, 9, 17, 29] 13 | self.assertListEqual(result, expected) 14 | 15 | def test_create_bio_tags(self): 16 | tokens = ' This is Doccano Transformer . '.split() 17 | offsets = [1, 6, 9, 17, 29] 18 | labels = [[9, 28, 'SOFTWARE']] 19 | result = utils.create_bio_tags(tokens, offsets, labels) 20 | expected = ['O', 'O', 'B-SOFTWARE', 'I-SOFTWARE', 'O'] 21 | self.assertListEqual(result, expected) 22 | 23 | def test_convert_tokens_and_offsets_to_spacy_tokens(self): 24 | tokens = 'This is Doccano Transformer .'.split() 25 | offsets = [0, 5, 8, 16, 28] 26 | spacy_tokens = utils.convert_tokens_and_offsets_to_spacy_tokens( 27 | tokens, offsets 28 | ) 29 | for i, (spacy_token, token, offset) in enumerate( 30 | zip(spacy_tokens, tokens, offsets) 31 | ): 32 | self.assertEqual(str(spacy_token), token) 33 | self.assertEqual(len(spacy_token), len(token)) 34 | self.assertEqual(spacy_token.i, i) 35 | self.assertEqual(spacy_token.idx, offset) 36 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Description 2 | 3 | Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. 4 | 5 | Fixes # (issue) 6 | 7 | ## Type of change 8 | 9 | Please delete options that are not relevant. 10 | 11 | - [ ] Bug fix (non-breaking change which fixes an issue) 12 | - [ ] New feature (non-breaking change which adds functionality) 13 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 14 | - [ ] This change requires a documentation update 15 | 16 | # How Has This Been Tested? 17 | 18 | Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration 19 | 20 | - [ ] Test A 21 | - [ ] Test B 22 | 23 | # Checklist: 24 | 25 | - [ ] My code follows the style guidelines of this project 26 | - [ ] I have performed a self-review of my own code 27 | - [ ] I have commented my code, particularly in hard-to-understand areas 28 | - [ ] I have made corresponding changes to the documentation 29 | - [ ] My changes generate no new warnings 30 | - [ ] I have added tests that prove my fix is effective or that my feature works 31 | - [ ] New and existing unit tests pass locally with my changes 32 | - [ ] Any dependent changes have been merged and published in downstream modules 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # doccano-transformer 2 | 3 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/9fe17d104b644a53a3fe189433d3c797)](https://app.codacy.com/gh/doccano/doccano-transformer?utm_source=github.com&utm_medium=referral&utm_content=doccano/doccano-transformer&utm_campaign=Badge_Grade_Dashboard) 4 | [![Build Status](https://github.com/doccano/doccano-transformer/workflows/CI/badge.svg)](https://github.com/doccano/doccano-transformer/actions) 5 | 6 | Doccano Transformer helps you to transform an exported dataset into the format of your favorite machine learning library. 7 | 8 | ## Supported formats 9 | 10 | Doccano Transformer supports the following formats: 11 | 12 | * CoNLL 2003 13 | * spaCy 14 | 15 | ## Install 16 | 17 | To install `doccano-transformer`, simply use `pip`: 18 | 19 | ```bash 20 | pip install doccano-transformer 21 | ``` 22 | 23 | ## Examples 24 | 25 | ### Named Entity Recognition 26 | 27 | The following formats are supported: 28 | 29 | - CoNLL 2003 30 | - spaCy 31 | 32 | ```python 33 | from doccano_transformer.datasets import NERDataset 34 | from doccano_transformer.utils import read_jsonl 35 | 36 | dataset = read_jsonl(filepath='example.jsonl', dataset=NERDataset, encoding='utf-8') 37 | dataset.to_conll2003(tokenizer=str.split) 38 | dataset.to_spacy(tokenizer=str.split) 39 | ``` 40 | 41 | ## Contribution 42 | 43 | We encourage you to contribute to doccano transformer! Please check out the [Contributing to doccano transformer guide](https://github.com/doccano/doccano-transformer/blob/master/CONTRIBUTING.md) for guidelines about how to proceed. 44 | 45 | ## License 46 | 47 | [MIT](https://github.com/doccano/doccano-transformer/blob/master/LICENSE) 48 | -------------------------------------------------------------------------------- /doccano_transformer/datasets.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | from typing import Any, Callable, Iterable, Iterator, List, Optional, TextIO 4 | 5 | from doccano_transformer.examples import Example, NERExample 6 | 7 | 8 | class Dataset: 9 | def __init__( 10 | self, 11 | filepath: str, 12 | encoding: Optional[str] = 'utf-8', 13 | transformation_func: Optional[Callable[[TextIO], Iterable[Any]]] = None 14 | ) -> None: 15 | 16 | self.filepath = filepath 17 | self.encoding = encoding 18 | self.transformation_func = transformation_func or (lambda x: x) 19 | 20 | def __iter__(self) -> Iterator[Any]: 21 | with open(self.filepath, encoding=self.encoding) as f: 22 | yield from self.transformation_func(f) 23 | 24 | @classmethod 25 | def from_jsonl( 26 | cls, filepath: str, encoding: Optional[str] = 'utf-8' 27 | ) -> 'Dataset': 28 | return cls(filepath, encoding, lambda f: map(json.loads, f)) 29 | 30 | @classmethod 31 | def from_csv( 32 | cls, filepath: str, encoding: Optional[str] = 'utf-8' 33 | ) -> 'Dataset': 34 | return cls(filepath, encoding, csv.DictReader) 35 | 36 | 37 | class TaskDataset(Dataset): 38 | example_class: Example = None 39 | 40 | def __iter__(self) -> Iterator[Example]: 41 | for raw in super(TaskDataset, self).__iter__(): 42 | example = self.example_class(raw) 43 | example.is_valid(raise_exception=True) 44 | yield example 45 | 46 | 47 | class NERDataset(TaskDataset): 48 | example_class = NERExample 49 | 50 | def to_conll2003( 51 | self, tokenizer: Callable[[str], List[str]] 52 | ) -> Iterator[str]: 53 | for example in self: 54 | yield from example.to_conll2003(tokenizer) 55 | 56 | def to_spacy( 57 | self, tokenizer: Callable[[str], List[str]] 58 | ) -> Iterator[dict]: 59 | for example in self: 60 | yield from example.to_spacy(tokenizer) 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python 3 | # Edit at https://www.gitignore.io/?templates=python 4 | 5 | ### Python ### 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # pyenv 72 | .python-version 73 | 74 | # pipenv 75 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 76 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 77 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 78 | # install all needed dependencies. 79 | #Pipfile.lock 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | .spyproject 90 | 91 | # Rope project settings 92 | .ropeproject 93 | 94 | # Mr Developer 95 | .mr.developer.cfg 96 | .project 97 | .pydevproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | .dmypy.json 105 | dmypy.json 106 | 107 | # Pyre type checker 108 | .pyre/ 109 | 110 | # End of https://www.gitignore.io/api/python 111 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # ******** NOTE ******** 12 | 13 | name: "CodeQL" 14 | 15 | on: 16 | push: 17 | branches: [ master ] 18 | pull_request: 19 | # The branches below must be a subset of the branches above 20 | branches: [ master ] 21 | schedule: 22 | - cron: '35 6 * * 4' 23 | 24 | jobs: 25 | analyze: 26 | name: Analyze 27 | runs-on: ubuntu-latest 28 | 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | language: [ 'python' ] 33 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 34 | # Learn more: 35 | # https://docs.github.com/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 36 | 37 | steps: 38 | - name: Checkout repository 39 | uses: actions/checkout@v2 40 | 41 | # Initializes the CodeQL tools for scanning. 42 | - name: Initialize CodeQL 43 | uses: github/codeql-action/init@v1 44 | with: 45 | languages: ${{ matrix.language }} 46 | # If you wish to specify custom queries, you can do so here or in a config file. 47 | # By default, queries listed here will override any specified in a config file. 48 | # Prefix the list here with "+" to use these queries and those in the config file. 49 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 50 | 51 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 52 | # If this step fails, then you should remove it and run the build manually (see below) 53 | - name: Autobuild 54 | uses: github/codeql-action/autobuild@v1 55 | 56 | # ℹ️ Command-line programs to run using the OS shell. 57 | # 📚 https://git.io/JvXDl 58 | 59 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 60 | # and modify them (or add more) to build your code if your project 61 | # uses a compiled language 62 | 63 | #- run: | 64 | # make bootstrap 65 | # make release 66 | 67 | - name: Perform CodeQL Analysis 68 | uses: github/codeql-action/analyze@v1 69 | -------------------------------------------------------------------------------- /tests/test_datasets.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import defaultdict 3 | from pathlib import Path 4 | from unittest import TestCase 5 | 6 | from doccano_transformer.datasets import NERDataset 7 | 8 | 9 | class TestNERDataset(TestCase): 10 | @classmethod 11 | def setUp(self): 12 | current_path = Path(__file__).parent 13 | self.shared_datadir = current_path / 'data' / 'labeling' 14 | 15 | def test_from_labeling_text_label_jsonl_to_conll2003(self): 16 | src_path = self.shared_datadir / 'labeling_text_label.jsonl' 17 | filename = 'labeling_text_label.conll2003' 18 | users = defaultdict(list) 19 | d = NERDataset.from_jsonl(filepath=src_path) 20 | for x in d.to_conll2003(str.split): 21 | users[x['user']].append(x['data']) 22 | 23 | for user, data in users.items(): 24 | with open(self.shared_datadir / (filename + f'.user{user}')) as f: 25 | expected = f.read() 26 | self.assertEqual(''.join(data), expected) 27 | 28 | def test_from_labeling_jsonl_to_conll2003(self): 29 | src_path = self.shared_datadir / 'labeling.jsonl' 30 | filename = 'labeling.conll2003' 31 | users = defaultdict(list) 32 | d = NERDataset.from_jsonl(filepath=src_path) 33 | for x in d.to_conll2003(str.split): 34 | users[x['user']].append(x['data']) 35 | 36 | for user, data in users.items(): 37 | with open(self.shared_datadir / (filename + f'.user{user}')) as f: 38 | expected = f.read() 39 | self.assertEqual(''.join(data), expected) 40 | 41 | def test_from_labeling_text_label_jsonl_to_spacy(self): 42 | src_path = self.shared_datadir / 'labeling_text_label.jsonl' 43 | filename = 'labeling_text_label.spacy' 44 | users = defaultdict(list) 45 | d = NERDataset.from_jsonl(filepath=src_path) 46 | for x in d.to_spacy(str.split): 47 | users[x['user']].append(x['data']) 48 | 49 | for user, data in users.items(): 50 | with open(self.shared_datadir / (filename + f'.user{user}')) as f: 51 | expected = json.load(f) 52 | # print(data) 53 | self.assertEqual(data, expected) 54 | 55 | def test_from_labeling_jsonl_to_spacy(self): 56 | src_path = self.shared_datadir / 'labeling.jsonl' 57 | filename = 'labeling.spacy' 58 | users = defaultdict(list) 59 | d = NERDataset.from_jsonl(filepath=src_path) 60 | for x in d.to_spacy(str.split): 61 | users[x['user']].append(x['data']) 62 | 63 | for user, data in users.items(): 64 | with open(self.shared_datadir / (filename + f'.user{user}')) as f: 65 | expected = json.load(f) 66 | self.assertEqual(data, expected) 67 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /doccano_transformer/utils.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, List, Optional, Tuple 2 | 3 | if TYPE_CHECKING: 4 | from doccano_transformer.datasets import Dataset 5 | 6 | 7 | def read_jsonl( 8 | filepath: str, 9 | dataset: 'Dataset', 10 | encoding: Optional[str] = 'utf-8' 11 | ) -> 'Dataset': 12 | return dataset.from_jsonl(filepath, encoding) 13 | 14 | 15 | def read_csv( 16 | filepath: str, 17 | dataset: 'Dataset', 18 | encoding: Optional[str] = 'utf-8' 19 | ) -> 'Dataset': 20 | return dataset.from_csv(filepath, encoding) 21 | 22 | 23 | def split_sentences(text: str) -> List[str]: 24 | return text.split('\n') 25 | 26 | 27 | def get_offsets( 28 | text: str, 29 | tokens: List[str], 30 | start: Optional[int] = 0) -> List[int]: 31 | """Calculate char offsets of each tokens. 32 | 33 | Args: 34 | text (str): The string before tokenized. 35 | tokens (List[str]): The list of the string. Each string corresponds 36 | token. 37 | start (Optional[int]): The start position. 38 | Returns: 39 | (List[str]): The list of the offset. 40 | """ 41 | offsets = [] 42 | i = 0 43 | for token in tokens: 44 | for j, char in enumerate(token): 45 | while char != text[i]: 46 | i += 1 47 | if j == 0: 48 | offsets.append(i + start) 49 | return offsets 50 | 51 | 52 | def create_bio_tags( 53 | tokens: List[str], 54 | offsets: List[int], 55 | labels: List[Tuple[int, int, str]]) -> List[str]: 56 | """Create BI tags from Doccano's label data. 57 | 58 | Args: 59 | tokens (List[str]): The list of the token. 60 | offsets (List[str]): The list of the character offset. 61 | labels (List[Tuple[int, int, str]]): The list of labels. Each item in 62 | the list holds three values which are the start offset, the end 63 | offset, and the label name. 64 | Returns: 65 | (List[str]): The list of the BIO tag. 66 | """ 67 | labels = sorted(labels) 68 | n = len(labels) 69 | i = 0 70 | prefix = 'B-' 71 | tags = [] 72 | for token, token_start in zip(tokens, offsets): 73 | token_end = token_start + len(token) 74 | if i >= n or token_end < labels[i][0]: 75 | tags.append('O') 76 | elif token_start > labels[i][1]: 77 | tags.append('O') 78 | else: 79 | tags.append(prefix + str(labels[i][2])) 80 | if labels[i][1] > token_end: 81 | prefix = 'I-' 82 | elif i < n: 83 | i += 1 84 | prefix = 'B-' 85 | return tags 86 | 87 | 88 | class Token: 89 | def __init__(self, token: str, offset: int, i: int) -> None: 90 | self.token = token 91 | self.idx = offset 92 | self.i = i 93 | 94 | def __len__(self): 95 | return len(self.token) 96 | 97 | def __str__(self): 98 | return self.token 99 | 100 | 101 | def convert_tokens_and_offsets_to_spacy_tokens( 102 | tokens: List[str], offsets: List[int] 103 | ) -> List[Token]: 104 | """Convert tokens and offsets to the list of SpaCy compatible object. 105 | 106 | Asrgs: 107 | tokens (List[str]): The list of tokens. 108 | offsets (List[int]): The list of offsets. 109 | Returns: 110 | (List[Token]): The list of the SpaCy compatible object. 111 | Examples: 112 | >>> from doccano_transformer import utils 113 | >>> tokens = ['This', 'is', 'Doccano', 'Transformer', '.'] 114 | >>> offsets = [0, 5, 8, 16, 28] 115 | >>> utils.convert_tokens_and_offsets_to_spacy_tokens(tokens, offsets) 116 | """ 117 | if len(tokens) != len(offsets): 118 | raise ValueError('tokens size should equal to offsets size') 119 | spacy_tokens = [] 120 | for i, (token, offset) in enumerate(zip(tokens, offsets)): 121 | spacy_tokens.append(Token(token, offset, i)) 122 | return spacy_tokens 123 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing to this repository, please first discuss the change you wish to make via issue with the owners of this repository before making a change. 4 | 5 | Please note we have a code of conduct, please follow it in all your interactions with the project. 6 | 7 | ## How to contribute to doccano 8 | 9 | ### Reporting Bugs 10 | 11 | #### Before submitting a bug report 12 | 13 | * Check the FAQs for a list of common questions and problems. 14 | * Ensure the bug was not already reported by searching on GitHub under [Issues](https://github.com/doccano/doccano-transformer/issues). 15 | * [Open a new issue](https://github.com/doccano/doccano-transformer/issues/new/choose) if you're unable to find an open one addressing the problem. 16 | * Use the relevant bug report templates to create the issue. 17 | 18 | #### How do I submit a good bug report? 19 | 20 | Explain the problem and include additional details to help maintainers reproduce the problem: 21 | 22 | * Use a clear and descriptive title for the issue to identify the problem. 23 | * Describe the exact steps which reproduce the problem in as many details as possible. 24 | * Provide specific examples to demonstrate the steps. 25 | * Describe the behavior you observed after following the steps and point out what exactly is the problem with that behavior. 26 | * Explain which behavior you expected to see instead and why. 27 | * Include screenshots and animated GIFs which show you following the described steps and clearly demonstrate the problem. 28 | * If the problem is related to performance or memory, include a CPU profile capture with your report. 29 | * If the problem is related to network, include a network activity in Chrome/Firefox/Safari DevTools. 30 | * If the problem wasn't triggered by a specific action, describe what you were doing before the problem happened and share more information using the guidelines below. 31 | 32 | ### Suggesting Enhancements 33 | 34 | #### Before submitting an enhancement suggestion 35 | 36 | * Ensure the suggestion was not already reported by searching on GitHub under [Issues](https://github.com/doccano/doccano-transformer/issues). 37 | * [Open a new issue](https://github.com/doccano/doccano-transformer/issues/new/choose) if you're unable to find an open one addressing the suggestion. 38 | * Use the relevant issue templates to create one. 39 | 40 | #### How do I submit a good enhancement suggestion? 41 | 42 | Explain the suggestion and include additional details to help developers understand it: 43 | 44 | * Use a clear and descriptive title for the issue to identify the suggestion. 45 | * Provide a step-by-step description of the suggested enhancement in as many details as possible. 46 | * Provide specific examples to demonstrate the steps. 47 | * Describe the current behavior and explain which behavior you expected to see instead and why. 48 | * Include screenshots and animated GIFs which help you demonstrate the steps or point out the part of doccano which the suggestion is related to. 49 | * Explain why this enhancement would be useful to most doccano users. 50 | * List some other annotation tools or applications where this enhancement exists. 51 | * Specify which version of doccano you're using. 52 | * Specify the name and version of the OS you're using. 53 | 54 | ### Pull Request Process 55 | 56 | Please follow these steps to have your contribution considered by the maintainers: 57 | 58 | 1. Open a related issue before making a pull request as much as possible. 59 | 2. Follow all instructions in [the template](PULL_REQUEST_TEMPLATE.md) 60 | 3. Follow the [styleguides](#styleguides) 61 | 4. After you submit your pull request, verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing
What if the status checks are failing?If a status check is failing, and you believe that the failure is unrelated to your change, please leave a comment on the pull request explaining why you believe the failure is unrelated. A maintainer will re-run the status check for you. If we conclude that the failure was a false positive, then we will open an issue to track that problem with our status check suite.
62 | 5. You may merge the Pull Request in once you have the sign-off of the project members([@Hironsan](https://github.com/Hironsan), [@yasufumy](https://github.com/yasufumy) or [@icoxfog417](https://github.com/icoxfog417)). 63 | 64 | While the prerequisites above must be satisfied prior to having your pull request reviewed, the reviewer(s) may ask you to complete additional design work, tests, or other changes before your pull request can be ultimately accepted. 65 | 66 | ## Styleguides 67 | 68 | ### Git Commit Messages 69 | 70 | * Use the present tense ("Add feature" not "Added feature") 71 | * Use the imperative mood ("Move cursor to..." not "Moves cursor to...") 72 | * Limit the first line to 72 characters or less 73 | * Reference issues and pull requests liberally after the first line 74 | -------------------------------------------------------------------------------- /doccano_transformer/examples.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import Callable, Iterator, List, Optional 3 | 4 | from spacy.gold import biluo_tags_from_offsets 5 | 6 | from doccano_transformer import utils 7 | 8 | 9 | class Example: 10 | def is_valid(self, raise_exception: Optional[bool] = True) -> None: 11 | raise NotImplementedError 12 | 13 | 14 | class NERExample: 15 | 16 | def __init__(self, raw: dict) -> None: 17 | self.raw = raw 18 | self.id = raw['id'] 19 | self.text = raw['text'] 20 | self.sentences = utils.split_sentences(raw['text']) 21 | self.sentence_offsets = utils.get_offsets(raw['text'], self.sentences) 22 | self.sentence_offsets.append(len(raw['text'])) 23 | 24 | @property 25 | def labels(self): 26 | if 'annotations' in self.raw: 27 | labels = defaultdict(list) 28 | for annotation in self.raw['annotations']: 29 | labels[annotation['user']].append([ 30 | annotation['start_offset'], 31 | annotation['end_offset'], 32 | annotation['label'] 33 | ]) 34 | return labels 35 | elif 'labels' in self.raw: 36 | labels = defaultdict(list) 37 | for label in self.raw['labels']: 38 | # TODO: This format doesn't have a user field currently. 39 | # So this method uses the user 0 for all label. 40 | labels[0].append(label) 41 | return labels 42 | else: 43 | raise KeyError( 44 | 'The file should includes either "labels" or "annotations".' 45 | ) 46 | 47 | def get_tokens_and_token_offsets(self, tokenizer): 48 | tokens = [tokenizer(sentence) for sentence in self.sentences] 49 | token_offsets = [ 50 | utils.get_offsets(sentence, tokens, offset) 51 | for sentence, tokens, offset in zip( 52 | self.sentences, tokens, self.sentence_offsets 53 | ) 54 | ] 55 | return tokens, token_offsets 56 | 57 | def is_valid(self, raise_exception: Optional[bool] = True) -> bool: 58 | return True 59 | 60 | def to_conll2003( 61 | self, tokenizer: Callable[[str], List[str]] 62 | ) -> Iterator[dict]: 63 | all_tokens, all_token_offsets = self.get_tokens_and_token_offsets( 64 | tokenizer) 65 | for user, labels in self.labels.items(): 66 | label_split = [[] for _ in range(len(self.sentences))] 67 | for label in labels: 68 | for i, (start, end) in enumerate( 69 | zip(self.sentence_offsets, self.sentence_offsets[1:])): 70 | if start <= label[0] <= label[1] <= end: 71 | label_split[i].append(label) 72 | lines = ['-DOCSTART- -X- -X- O\n\n'] 73 | for tokens, offsets, label in zip( 74 | all_tokens, all_token_offsets, label_split): 75 | tags = utils.create_bio_tags(tokens, offsets, label) 76 | for token, tag in zip(tokens, tags): 77 | lines.append(f'{token} _ _ {tag}\n') 78 | lines.append('\n') 79 | yield {'user': user, 'data': ''.join(lines)} 80 | 81 | def to_spacy( 82 | self, tokenizer: Callable[[str], List[str]] 83 | ) -> Iterator[dict]: 84 | all_tokens, all_token_offsets = self.get_tokens_and_token_offsets( 85 | tokenizer) 86 | for user, labels in self.labels.items(): 87 | label_split = [[] for _ in range(len(self.sentences))] 88 | for label in labels: 89 | for i, (start, end) in enumerate( 90 | zip(self.sentence_offsets, self.sentence_offsets[1:])): 91 | if start <= label[0] <= label[1] <= end: 92 | label_split[i].append(label) 93 | 94 | data = {'raw': self.text} 95 | sentences = [] 96 | for tokens, offsets, label in zip( 97 | all_tokens, all_token_offsets, label_split): 98 | tokens = utils.convert_tokens_and_offsets_to_spacy_tokens( 99 | tokens, offsets 100 | ) 101 | tags = biluo_tags_from_offsets(tokens, label) 102 | tokens_for_spacy = [] 103 | for i, (token, tag, offset) in enumerate( 104 | zip(tokens, tags, offsets) 105 | ): 106 | tokens_for_spacy.append( 107 | {'id': i, 'orth': str(token), 'ner': tag} 108 | ) 109 | sentences.append({'tokens': tokens_for_spacy}) 110 | data['sentences'] = sentences 111 | yield {'user': user, 'data': {'id': self.id, 'paragraphs': [data]}} 112 | -------------------------------------------------------------------------------- /tests/data/labeling/labeling.spacy.user1: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 2578, 4 | "paragraphs": [ 5 | { 6 | "raw": "SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRISE DEFEAT .\nNadim Ladki\nAL-AIN , United Arab Emirates 1996-12-06", 7 | "sentences": [ 8 | { 9 | "tokens": [ 10 | { 11 | "id": 0, 12 | "orth": "SOCCER", 13 | "ner": "O" 14 | }, 15 | { 16 | "id": 1, 17 | "orth": "-", 18 | "ner": "O" 19 | }, 20 | { 21 | "id": 2, 22 | "orth": "JAPAN", 23 | "ner": "U-12" 24 | }, 25 | { 26 | "id": 3, 27 | "orth": "GET", 28 | "ner": "O" 29 | }, 30 | { 31 | "id": 4, 32 | "orth": "LUCKY", 33 | "ner": "O" 34 | }, 35 | { 36 | "id": 5, 37 | "orth": "WIN", 38 | "ner": "O" 39 | }, 40 | { 41 | "id": 6, 42 | "orth": ",", 43 | "ner": "O" 44 | }, 45 | { 46 | "id": 7, 47 | "orth": "CHINA", 48 | "ner": "U-13" 49 | }, 50 | { 51 | "id": 8, 52 | "orth": "IN", 53 | "ner": "O" 54 | }, 55 | { 56 | "id": 9, 57 | "orth": "SURPRISE", 58 | "ner": "O" 59 | }, 60 | { 61 | "id": 10, 62 | "orth": "DEFEAT", 63 | "ner": "O" 64 | }, 65 | { 66 | "id": 11, 67 | "orth": ".", 68 | "ner": "O" 69 | } 70 | ] 71 | }, 72 | { 73 | "tokens": [ 74 | { 75 | "id": 0, 76 | "orth": "Nadim", 77 | "ner": "B-13" 78 | }, 79 | { 80 | "id": 1, 81 | "orth": "Ladki", 82 | "ner": "L-13" 83 | } 84 | ] 85 | }, 86 | { 87 | "tokens": [ 88 | { 89 | "id": 0, 90 | "orth": "AL-AIN", 91 | "ner": "U-12" 92 | }, 93 | { 94 | "id": 1, 95 | "orth": ",", 96 | "ner": "O" 97 | }, 98 | { 99 | "id": 2, 100 | "orth": "United", 101 | "ner": "B-12" 102 | }, 103 | { 104 | "id": 3, 105 | "orth": "Arab", 106 | "ner": "I-12" 107 | }, 108 | { 109 | "id": 4, 110 | "orth": "Emirates", 111 | "ner": "L-12" 112 | }, 113 | { 114 | "id": 5, 115 | "orth": "1996-12-06", 116 | "ner": "O" 117 | } 118 | ] 119 | } 120 | ] 121 | } 122 | ] 123 | } 124 | ] -------------------------------------------------------------------------------- /tests/data/labeling/labeling.spacy.user2: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 2578, 4 | "paragraphs": [ 5 | { 6 | "raw": "SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRISE DEFEAT .\nNadim Ladki\nAL-AIN , United Arab Emirates 1996-12-06", 7 | "sentences": [ 8 | { 9 | "tokens": [ 10 | { 11 | "id": 0, 12 | "orth": "SOCCER", 13 | "ner": "O" 14 | }, 15 | { 16 | "id": 1, 17 | "orth": "-", 18 | "ner": "O" 19 | }, 20 | { 21 | "id": 2, 22 | "orth": "JAPAN", 23 | "ner": "U-12" 24 | }, 25 | { 26 | "id": 3, 27 | "orth": "GET", 28 | "ner": "O" 29 | }, 30 | { 31 | "id": 4, 32 | "orth": "LUCKY", 33 | "ner": "O" 34 | }, 35 | { 36 | "id": 5, 37 | "orth": "WIN", 38 | "ner": "O" 39 | }, 40 | { 41 | "id": 6, 42 | "orth": ",", 43 | "ner": "O" 44 | }, 45 | { 46 | "id": 7, 47 | "orth": "CHINA", 48 | "ner": "U-13" 49 | }, 50 | { 51 | "id": 8, 52 | "orth": "IN", 53 | "ner": "O" 54 | }, 55 | { 56 | "id": 9, 57 | "orth": "SURPRISE", 58 | "ner": "O" 59 | }, 60 | { 61 | "id": 10, 62 | "orth": "DEFEAT", 63 | "ner": "O" 64 | }, 65 | { 66 | "id": 11, 67 | "orth": ".", 68 | "ner": "O" 69 | } 70 | ] 71 | }, 72 | { 73 | "tokens": [ 74 | { 75 | "id": 0, 76 | "orth": "Nadim", 77 | "ner": "B-13" 78 | }, 79 | { 80 | "id": 1, 81 | "orth": "Ladki", 82 | "ner": "L-13" 83 | } 84 | ] 85 | }, 86 | { 87 | "tokens": [ 88 | { 89 | "id": 0, 90 | "orth": "AL-AIN", 91 | "ner": "U-12" 92 | }, 93 | { 94 | "id": 1, 95 | "orth": ",", 96 | "ner": "O" 97 | }, 98 | { 99 | "id": 2, 100 | "orth": "United", 101 | "ner": "B-12" 102 | }, 103 | { 104 | "id": 3, 105 | "orth": "Arab", 106 | "ner": "I-12" 107 | }, 108 | { 109 | "id": 4, 110 | "orth": "Emirates", 111 | "ner": "L-12" 112 | }, 113 | { 114 | "id": 5, 115 | "orth": "1996-12-06", 116 | "ner": "O" 117 | } 118 | ] 119 | } 120 | ] 121 | } 122 | ] 123 | }, 124 | { 125 | "id": 2580, 126 | "paragraphs": [ 127 | { 128 | "raw": "RUGBY UNION - CUTTITTA BACK FOR ITALY AFTER A YEAR .\nROME 1996-12-06\nItaly recalled Marcello Cuttitta", 129 | "sentences": [ 130 | { 131 | "tokens": [ 132 | { 133 | "id": 0, 134 | "orth": "RUGBY", 135 | "ner": "B-14" 136 | }, 137 | { 138 | "id": 1, 139 | "orth": "UNION", 140 | "ner": "L-14" 141 | }, 142 | { 143 | "id": 2, 144 | "orth": "-", 145 | "ner": "O" 146 | }, 147 | { 148 | "id": 3, 149 | "orth": "CUTTITTA", 150 | "ner": "U-13" 151 | }, 152 | { 153 | "id": 4, 154 | "orth": "BACK", 155 | "ner": "O" 156 | }, 157 | { 158 | "id": 5, 159 | "orth": "FOR", 160 | "ner": "O" 161 | }, 162 | { 163 | "id": 6, 164 | "orth": "ITALY", 165 | "ner": "U-12" 166 | }, 167 | { 168 | "id": 7, 169 | "orth": "AFTER", 170 | "ner": "O" 171 | }, 172 | { 173 | "id": 8, 174 | "orth": "A", 175 | "ner": "O" 176 | }, 177 | { 178 | "id": 9, 179 | "orth": "YEAR", 180 | "ner": "O" 181 | }, 182 | { 183 | "id": 10, 184 | "orth": ".", 185 | "ner": "O" 186 | } 187 | ] 188 | }, 189 | { 190 | "tokens": [ 191 | { 192 | "id": 0, 193 | "orth": "ROME", 194 | "ner": "U-12" 195 | }, 196 | { 197 | "id": 1, 198 | "orth": "1996-12-06", 199 | "ner": "O" 200 | } 201 | ] 202 | }, 203 | { 204 | "tokens": [ 205 | { 206 | "id": 0, 207 | "orth": "Italy", 208 | "ner": "U-12" 209 | }, 210 | { 211 | "id": 1, 212 | "orth": "recalled", 213 | "ner": "O" 214 | }, 215 | { 216 | "id": 2, 217 | "orth": "Marcello", 218 | "ner": "B-13" 219 | }, 220 | { 221 | "id": 3, 222 | "orth": "Cuttitta", 223 | "ner": "L-13" 224 | } 225 | ] 226 | } 227 | ] 228 | } 229 | ] 230 | } 231 | ] -------------------------------------------------------------------------------- /tests/data/labeling/labeling_text_label.spacy.user0: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 2578, 4 | "paragraphs": [ 5 | { 6 | "raw": "SOCCER - JAPAN GET LUCKY WIN , CHINA IN SURPRISE DEFEAT .\nNadim Ladki\nAL-AIN , United Arab Emirates 1996-12-06", 7 | "sentences": [ 8 | { 9 | "tokens": [ 10 | { 11 | "id": 0, 12 | "orth": "SOCCER", 13 | "ner": "O" 14 | }, 15 | { 16 | "id": 1, 17 | "orth": "-", 18 | "ner": "O" 19 | }, 20 | { 21 | "id": 2, 22 | "orth": "JAPAN", 23 | "ner": "U-LOC" 24 | }, 25 | { 26 | "id": 3, 27 | "orth": "GET", 28 | "ner": "O" 29 | }, 30 | { 31 | "id": 4, 32 | "orth": "LUCKY", 33 | "ner": "O" 34 | }, 35 | { 36 | "id": 5, 37 | "orth": "WIN", 38 | "ner": "O" 39 | }, 40 | { 41 | "id": 6, 42 | "orth": ",", 43 | "ner": "O" 44 | }, 45 | { 46 | "id": 7, 47 | "orth": "CHINA", 48 | "ner": "U-PER" 49 | }, 50 | { 51 | "id": 8, 52 | "orth": "IN", 53 | "ner": "O" 54 | }, 55 | { 56 | "id": 9, 57 | "orth": "SURPRISE", 58 | "ner": "O" 59 | }, 60 | { 61 | "id": 10, 62 | "orth": "DEFEAT", 63 | "ner": "O" 64 | }, 65 | { 66 | "id": 11, 67 | "orth": ".", 68 | "ner": "O" 69 | } 70 | ] 71 | }, 72 | { 73 | "tokens": [ 74 | { 75 | "id": 0, 76 | "orth": "Nadim", 77 | "ner": "B-PER" 78 | }, 79 | { 80 | "id": 1, 81 | "orth": "Ladki", 82 | "ner": "L-PER" 83 | } 84 | ] 85 | }, 86 | { 87 | "tokens": [ 88 | { 89 | "id": 0, 90 | "orth": "AL-AIN", 91 | "ner": "U-LOC" 92 | }, 93 | { 94 | "id": 1, 95 | "orth": ",", 96 | "ner": "O" 97 | }, 98 | { 99 | "id": 2, 100 | "orth": "United", 101 | "ner": "B-LOC" 102 | }, 103 | { 104 | "id": 3, 105 | "orth": "Arab", 106 | "ner": "I-LOC" 107 | }, 108 | { 109 | "id": 4, 110 | "orth": "Emirates", 111 | "ner": "L-LOC" 112 | }, 113 | { 114 | "id": 5, 115 | "orth": "1996-12-06", 116 | "ner": "O" 117 | } 118 | ] 119 | } 120 | ] 121 | } 122 | ] 123 | }, 124 | { 125 | "id": 2580, 126 | "paragraphs": [ 127 | { 128 | "raw": "RUGBY UNION - CUTTITTA BACK FOR ITALY AFTER A YEAR .\nROME 1996-12-06\nItaly recalled Marcello Cuttitta", 129 | "sentences": [ 130 | { 131 | "tokens": [ 132 | { 133 | "id": 0, 134 | "orth": "RUGBY", 135 | "ner": "B-ORG" 136 | }, 137 | { 138 | "id": 1, 139 | "orth": "UNION", 140 | "ner": "L-ORG" 141 | }, 142 | { 143 | "id": 2, 144 | "orth": "-", 145 | "ner": "O" 146 | }, 147 | { 148 | "id": 3, 149 | "orth": "CUTTITTA", 150 | "ner": "U-PER" 151 | }, 152 | { 153 | "id": 4, 154 | "orth": "BACK", 155 | "ner": "O" 156 | }, 157 | { 158 | "id": 5, 159 | "orth": "FOR", 160 | "ner": "O" 161 | }, 162 | { 163 | "id": 6, 164 | "orth": "ITALY", 165 | "ner": "U-LOC" 166 | }, 167 | { 168 | "id": 7, 169 | "orth": "AFTER", 170 | "ner": "O" 171 | }, 172 | { 173 | "id": 8, 174 | "orth": "A", 175 | "ner": "O" 176 | }, 177 | { 178 | "id": 9, 179 | "orth": "YEAR", 180 | "ner": "O" 181 | }, 182 | { 183 | "id": 10, 184 | "orth": ".", 185 | "ner": "O" 186 | } 187 | ] 188 | }, 189 | { 190 | "tokens": [ 191 | { 192 | "id": 0, 193 | "orth": "ROME", 194 | "ner": "U-LOC" 195 | }, 196 | { 197 | "id": 1, 198 | "orth": "1996-12-06", 199 | "ner": "O" 200 | } 201 | ] 202 | }, 203 | { 204 | "tokens": [ 205 | { 206 | "id": 0, 207 | "orth": "Italy", 208 | "ner": "U-LOC" 209 | }, 210 | { 211 | "id": 1, 212 | "orth": "recalled", 213 | "ner": "O" 214 | }, 215 | { 216 | "id": 2, 217 | "orth": "Marcello", 218 | "ner": "B-PER" 219 | }, 220 | { 221 | "id": 3, 222 | "orth": "Cuttitta", 223 | "ner": "L-PER" 224 | } 225 | ] 226 | } 227 | ] 228 | } 229 | ] 230 | } 231 | ] -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "d34a303b3e6e07f41a6396f4082281aa41886d4285c7f5d4f0af491cbe2693bc" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.8" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "blis": { 20 | "hashes": [ 21 | "sha256:00473602629ba69fe6565108e21957e918cb48b59f5bf2f6bfb6e04de42500cb", 22 | "sha256:03c368c9716ca814c436550a5f1e02ccf74850e613602519e3941d212e5aa177", 23 | "sha256:135450caabc8aea9bb9250329ebdf7189982d9b57d5c92789b2ba2fe52c247a7", 24 | "sha256:1402d9cbb0fbc21b749dd5b87d7ee14249e74a0ca38be6ecc56b3b356fca2f21", 25 | "sha256:26b16d6005bb2671699831b5cc699905215d1abde1ec5c1d04de7dcd9eb29f75", 26 | "sha256:3347a4b1b7d3ae14476aac9a6f7bf8ebf464863f4ebf4aea228874a7694ea240", 27 | "sha256:38fe877a4b52e762f5e137a412e3c256545a696a12ae8c40d67b8815d2bb5097", 28 | "sha256:4fb89c47ee06b58a4410a16fd5794847517262c9d2a342643475b477dfeff0a4", 29 | "sha256:77a6486b9794af01bcdfd1bc6e067c93add4b93292e6f95bf6e5ce7f98bf0163", 30 | "sha256:856142a11e37fd2c47c5006a3197e157bb8469a491a73d2d442223dd3279df84", 31 | "sha256:8aeaf6954351593a1e412f80e398aa51df588d3c0de74b9f3323b694c603381b", 32 | "sha256:9ede123065f3cacb109967755b3d83d4ca0de90643a9058129a6ab2d4051954f", 33 | "sha256:d1d59faebc1c94f8f4f77154ef4b9d6d40364b111cf8fde48ee3b524c85f1075", 34 | "sha256:d69257d317e86f34a7f230a2fd1f021fd2a1b944137f40d8cdbb23bd334cd0c4", 35 | "sha256:ddd732c5274d1082fa92e2c42317587d5ebabce7741ca98120f69bd45d004b99", 36 | "sha256:f0b0dad4d6268d9dba0a65a9db12dd7a2d8686b648399e4aa1aec7550697e99e" 37 | ], 38 | "version": "==0.4.1" 39 | }, 40 | "catalogue": { 41 | "hashes": [ 42 | "sha256:584d78e7f4c3c6e2fd498eb56dfc8ef1f4ff738480237de2ccd26cbe2cf47172", 43 | "sha256:d74d1d856c6b36a37bf14aa6dbbc27d0582667b7ab979a6108e61a575e8723f5" 44 | ], 45 | "version": "==1.0.0" 46 | }, 47 | "certifi": { 48 | "hashes": [ 49 | "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", 50 | "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" 51 | ], 52 | "version": "==2020.12.5" 53 | }, 54 | "chardet": { 55 | "hashes": [ 56 | "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", 57 | "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" 58 | ], 59 | "version": "==4.0.0" 60 | }, 61 | "cymem": { 62 | "hashes": [ 63 | "sha256:01d3ea159f7a3f3192b1e800ed8207dac7586794d903a153198b9ea317f144bc", 64 | "sha256:190e15d9cf2c3bde60ae37bddbae6568a36044dc4a326d84081a5fa08818eee0", 65 | "sha256:1f0eb9b3d03623dcfc746cf8bff0663b0e347f4aea759965c8932087a0307ee9", 66 | "sha256:3d48902d7441645835fefc7832df49feb5362c7300d182475b63a01d25ae44ef", 67 | "sha256:4bd023c2477198b39b660c2a6b0242880649765ecee8461688a57fd4afd2bfc0", 68 | "sha256:734d82d0d03c2ceb929bc1744c04dbe0a105e68a4947c8406056a36f86c41830", 69 | "sha256:8ea57e6923f40eb51012352161bb5707c14a5a5ce901ff72021e59df06221655", 70 | "sha256:9d72d69f7a62a280199c3aa7bc550685c47d6d0689b2d299e6492253b86d2437", 71 | "sha256:a440d63577fcdc9c528c9cc026b7b4f8648193bac462bc0596c9eac10f9fba62", 72 | "sha256:ce1e81c1d031f56b67bac2136e73b4512cbc794706cd570178972d54ba6115d8", 73 | "sha256:d19f68b90411e02ab33b1654118337f96f41c13a3cd00c4f44f7abed2bc712e7", 74 | "sha256:d307f7f6230d861a938837cae4b855226b6845a21c010242a15e9ce6853856cd", 75 | "sha256:f2167c9959fcd639b95d51fa5efaa7c61eef8d686cb75a25412a914f428ce980" 76 | ], 77 | "version": "==2.0.5" 78 | }, 79 | "idna": { 80 | "hashes": [ 81 | "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", 82 | "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" 83 | ], 84 | "version": "==2.10" 85 | }, 86 | "murmurhash": { 87 | "hashes": [ 88 | "sha256:023391cfefe584ac544c1ea0936976c0119b17dd27bb8280652cef1704f76428", 89 | "sha256:23c56182822a1ed88e2a098ac56958dfec380696a9a943df203b9b41e4bcf5e4", 90 | "sha256:76251513a2acad6c2e4b7aeffc5fcb807ee97a66cad5c2990557556555a6b7e9", 91 | "sha256:81474a45c4074637a6dfc8fea4cdebf091ab5aa781c2cfcb94c43b16030badd7", 92 | "sha256:8381172e03c5f6f947005fb146a53c5e5a9e0d630be4a40cbf8838e9324bfe1c", 93 | "sha256:892749023da26420d194f37bfa30df1368aaac0149cfa3b2105db36b66549e37", 94 | "sha256:98ec9d727bd998a35385abd56b062cf0cca216725ea7ec5068604ab566f7e97f", 95 | "sha256:99e55488476a5f70e8d305fd31258f140e52f724f788bcc50c31ec846a2b3766", 96 | "sha256:a9bd2312996e6e47605af305a1e5f091eba1bdd637cdd9986aec4885cb4c5530", 97 | "sha256:add366944eb8ec73013a4f36e166c5a4f0f7628ffe1746bc5fe031347489e5e8", 98 | "sha256:b9292c532538cf47846ca81056cfeab08b877c35fe7521d6524aa92ddcd833e2", 99 | "sha256:d4c3a0242014cf4c84e9ea0ba3f13b48f02a3992de3da7b1116d11b816451195", 100 | "sha256:d58315961dc5a5e740f41f2ac5c3a0ebc61ef472f8afeb4db7eeb3b863243105", 101 | "sha256:ef8819d15973e0d6f69688bafc097a1fae081675c1de39807028869a1320b1a9", 102 | "sha256:f00321998f0a6bad3fd068babf448a296d4b0b1f4dd424cab863ebe5ed54182f", 103 | "sha256:fd17973fd4554715efd8d86b3e9200358e49e437fdb92a897ca127aced48b61c", 104 | "sha256:fed7578fbaa6c301f27ed80834c1f7494ea7d335e269e98b9aee477cf0b3b487" 105 | ], 106 | "version": "==1.0.5" 107 | }, 108 | "numpy": { 109 | "hashes": [ 110 | "sha256:032be656d89bbf786d743fee11d01ef318b0781281241997558fa7950028dd29", 111 | "sha256:104f5e90b143dbf298361a99ac1af4cf59131218a045ebf4ee5990b83cff5fab", 112 | "sha256:125a0e10ddd99a874fd357bfa1b636cd58deb78ba4a30b5ddb09f645c3512e04", 113 | "sha256:12e4ba5c6420917571f1a5becc9338abbde71dd811ce40b37ba62dec7b39af6d", 114 | "sha256:13adf545732bb23a796914fe5f891a12bd74cf3d2986eed7b7eba2941eea1590", 115 | "sha256:2d7e27442599104ee08f4faed56bb87c55f8b10a5494ac2ead5c98a4b289e61f", 116 | "sha256:3bc63486a870294683980d76ec1e3efc786295ae00128f9ea38e2c6e74d5a60a", 117 | "sha256:3d3087e24e354c18fb35c454026af3ed8997cfd4997765266897c68d724e4845", 118 | "sha256:4ed8e96dc146e12c1c5cdd6fb9fd0757f2ba66048bf94c5126b7efebd12d0090", 119 | "sha256:60759ab15c94dd0e1ed88241fd4fa3312db4e91d2c8f5a2d4cf3863fad83d65b", 120 | "sha256:65410c7f4398a0047eea5cca9b74009ea61178efd78d1be9847fac1d6716ec1e", 121 | "sha256:66b467adfcf628f66ea4ac6430ded0614f5cc06ba530d09571ea404789064adc", 122 | "sha256:7199109fa46277be503393be9250b983f325880766f847885607d9b13848f257", 123 | "sha256:72251e43ac426ff98ea802a931922c79b8d7596480300eb9f1b1e45e0543571e", 124 | "sha256:89e5336f2bec0c726ac7e7cdae181b325a9c0ee24e604704ed830d241c5e47ff", 125 | "sha256:89f937b13b8dd17b0099c7c2e22066883c86ca1575a975f754babc8fbf8d69a9", 126 | "sha256:9c94cab5054bad82a70b2e77741271790304651d584e2cdfe2041488e753863b", 127 | "sha256:9eb551d122fadca7774b97db8a112b77231dcccda8e91a5bc99e79890797175e", 128 | "sha256:a1d7995d1023335e67fb070b2fae6f5968f5be3802b15ad6d79d81ecaa014fe0", 129 | "sha256:ae61f02b84a0211abb56462a3b6cd1e7ec39d466d3160eb4e1da8bf6717cdbeb", 130 | "sha256:b9410c0b6fed4a22554f072a86c361e417f0258838957b78bd063bde2c7f841f", 131 | "sha256:c26287dfc888cf1e65181f39ea75e11f42ffc4f4529e5bd19add57ad458996e2", 132 | "sha256:c91ec9569facd4757ade0888371eced2ecf49e7982ce5634cc2cf4e7331a4b14", 133 | "sha256:ecb5b74c702358cdc21268ff4c37f7466357871f53a30e6f84c686952bef16a9" 134 | ], 135 | "version": "==1.20.1" 136 | }, 137 | "plac": { 138 | "hashes": [ 139 | "sha256:398cb947c60c4c25e275e1f1dadf027e7096858fb260b8ece3b33bcff90d985f", 140 | "sha256:487e553017d419f35add346c4c09707e52fa53f7e7181ce1098ca27620e9ceee" 141 | ], 142 | "version": "==1.1.3" 143 | }, 144 | "preshed": { 145 | "hashes": [ 146 | "sha256:12cbe1e378b4f1c6b06f5e4130408befe916e55ea1616e6aa63c5cd0ccd9c927", 147 | "sha256:1bdededa7fd81f26a42bc9d11d542657c74746b7ea7fc2b2ca6d0ddbf1f93792", 148 | "sha256:1ce0846cb7ebb2ea913d44ec2e296098c285443ecdea80ddf02656bbef4deacb", 149 | "sha256:30f0c8ea85113d0565a1e3eb6222d00513ec39b56f3f9a2615e304575e65422e", 150 | "sha256:56b9603517bb2a364418163236d6a147a1d722ff7546cbe085e76e25ae118e89", 151 | "sha256:572899224578d30f6a67fadecb3d62b824866b4d2b6bad73f71abf7585db1389", 152 | "sha256:5e06a49477bd257eea02bf823b5d3e201d00a19d6976523a58da8606b2358481", 153 | "sha256:67c11e384ce4c008bc487ba3a29bafdfe038b9a2546ccfe0fe2160480b356fed", 154 | "sha256:6e833f1632a1d0232bdc6df6c3542fb130ef044d8656b24576d9fd19e5f1e0d1", 155 | "sha256:6f126bcc414a0304b54956f9dac2628a0f9bef1657d1b3a3837fc82b791aa2a1", 156 | "sha256:85074eebf90a858a6b68242f1ae265ca99e1af45bf9dafcb9a83d49b0815a2e1", 157 | "sha256:8a3adffde3126c2a0ab7d57cab1d605cb5f63da1ba88088ad3cf8debfd9aa4dc", 158 | "sha256:8a560850b8c53c1487ba51c2b0f5769535512b36d3b129ad5796b64653abe2f9", 159 | "sha256:9ebf444f8487782c84d7b5acb1d7195e603155882fafc4697344199eeeafbe5f", 160 | "sha256:c6d3dba39ed5059aaf99767017b9568c75b2d0780c3481e204b1daecde00360e", 161 | "sha256:ca4a7681b643b8356e7dfdab9cf668b2b34bd07ef4b09ebed44c8aeb3b1626ee", 162 | "sha256:fb4d2e82add82d63b2c97802b759a58ff200d06b632e2edc48a9ced1e6472faf" 163 | ], 164 | "version": "==3.0.5" 165 | }, 166 | "requests": { 167 | "hashes": [ 168 | "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", 169 | "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" 170 | ], 171 | "version": "==2.25.1" 172 | }, 173 | "spacy": { 174 | "hashes": [ 175 | "sha256:0f5d088c1d2a1fcf247090854927cd0ba4e28266323af112dead20ff020ded1c", 176 | "sha256:11b9517cdcbea166a9461093821d12bf632aea7dd14b6e3c549871903bda41b8", 177 | "sha256:1fcfb911b254af3144b3e65a2daf671cb26b6243ec431089ccb28cbe03d826de", 178 | "sha256:366eaae9634c59f89015ad11db1d8559c327ab665a5f644c71155c76711ee50a", 179 | "sha256:3bafcc134c340c5d7556612344d2844522d452b99a21f2b0a9b640f6c55f1110", 180 | "sha256:4944a1118f6dbb49201749d72527b749f74032e1026ddf387bc3a7e172ff0300", 181 | "sha256:7a6b7486f71930e7de7100feb72036e3ccb8c18509ff23e8453cff0b28470ea4", 182 | "sha256:818de26e0e383f64ccbe3db185574920de05923d8deac8bbb12113b9e33cee1f", 183 | "sha256:b7df3622e9a867294b913cd0a4fba99d47162af1cfd3a840c5943b25f390bb5c", 184 | "sha256:f7b3a17730786979f964b16ee1e4a9146cd05016f100afb274dd66336dfc39eb" 185 | ], 186 | "index": "pypi", 187 | "version": "==2.3.2" 188 | }, 189 | "srsly": { 190 | "hashes": [ 191 | "sha256:11447f8e659e1f62f29302252fb057f179031457b36c83426027182f624fe565", 192 | "sha256:23c7205b8c1cac49a03521bee37f0afe3680d9f0ec18c75ab3ac39bd3e15272b", 193 | "sha256:2615b8713dfe793ca57925076b0869385d56754816b1eaee5490a6827a1cb5c7", 194 | "sha256:334f29435099e644a8047b63d60b8386a98b5f7b4739f7efc86b46ca0200aa0e", 195 | "sha256:4c43a1f28e555891a1e65650adea2c5d0f0fe4b3d63821de65c8357f32c3a11c", 196 | "sha256:779ebfaa3cf1d5c0f1286ac1baf06af5f2a17bb103622992c71acc6ac20b2781", 197 | "sha256:8fc4c0641537262e15c7b5b57edc47487b15ac47b696adcb81e0a770ef78e8f5", 198 | "sha256:a1449da4195e30a3bd1fd3122e5b1a0c57703843c590643555c412fc87132aa0", 199 | "sha256:a2746afccfd4f51f0793cccc2b6d5e8a564c962870feec5c77408244c1dbb3c5", 200 | "sha256:a696e9c925e91f76ec53840c55483a4fbf76cb717424410a4f249d4805439038", 201 | "sha256:b5b887328ac6e210842560fcf32a29c2a9c1ed38c6d47479cadc03d81940da8c", 202 | "sha256:d3dd796372367c71946d0cd6f734e49db3d99dd13a57bdac937d9eb62689fc9e", 203 | "sha256:fd5e1e01f5fd0f532a6f3977bb74facc42f1b7155402ee3d06c07a73e83e3c47" 204 | ], 205 | "version": "==1.0.5" 206 | }, 207 | "thinc": { 208 | "hashes": [ 209 | "sha256:0139fa84dc9b8d88af15e648fc4ae13d899b8b5e49cb26a8f4a0604ee9ad8a9e", 210 | "sha256:061633bf334e3728173d59d6001e8cdef3839166c71e23b3c5f74f5fae3c0d7c", 211 | "sha256:0df8c5762359a3a4d8d494aa2eff11c4936c4f34559fe1b3ab1d13d24c76b509", 212 | "sha256:33db4a9182c78c8f4823b1765274bbb0caa8f4269dbd102f2e6ab2f7f91a6084", 213 | "sha256:36237f711f0b3da932bd28cc366a92f6f1b6d1f95ad6cbbc8166b94785b38e40", 214 | "sha256:387d25e57e53eed86d24f2657ab9555703043de27211764835a38e2e31b3c8e9", 215 | "sha256:55b9e02e4b8395cee0a8a810bd8af4d7600b04520bab60df1fc513d50a41eec5", 216 | "sha256:5d633cc5c210a02ba706ed7e800f4dc906ba1e10b85e3ed40d77fdb7e7674a20", 217 | "sha256:947806f4cbbcaf8dd046942acd5e52d55ac805303985a2e36de4734be5496bf1", 218 | "sha256:d3ff8cfbf583ac788a85f5e0e3cf00edf2f6bc5ba2b2ca264771870c07cb5717", 219 | "sha256:d70e71b0561bbf844bc9f737f60150b0f8f04dfd603151869d93a5735deb6219", 220 | "sha256:e2ebeeafd79bb86697388fccc5996d6ea1e69106e2a7fc3a1092d626b522cc01" 221 | ], 222 | "version": "==7.4.1" 223 | }, 224 | "tqdm": { 225 | "hashes": [ 226 | "sha256:9fdf349068d047d4cfbe24862c425883af1db29bcddf4b0eeb2524f6fbdb23c7", 227 | "sha256:d666ae29164da3e517fcf125e41d4fe96e5bb375cd87ff9763f6b38b5592fe33" 228 | ], 229 | "version": "==4.59.0" 230 | }, 231 | "urllib3": { 232 | "hashes": [ 233 | "sha256:1b465e494e3e0d8939b50680403e3aedaa2bc434b7d5af64dfd3c958d7f5ae80", 234 | "sha256:de3eedaad74a2683334e282005cd8d7f22f4d55fa690a2a1020a416cb0a47e73" 235 | ], 236 | "index": "pypi", 237 | "version": "==1.26.3" 238 | }, 239 | "wasabi": { 240 | "hashes": [ 241 | "sha256:a493e09d86109ec6d9e70d040472f9facc44634d4ae6327182f94091ca73a490", 242 | "sha256:b4a36aaa9ca3a151f0c558f269d442afbb3526f0160fd541acd8a0d5e5712054" 243 | ], 244 | "version": "==0.8.2" 245 | } 246 | }, 247 | "develop": { 248 | "attrs": { 249 | "hashes": [ 250 | "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6", 251 | "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700" 252 | ], 253 | "version": "==20.3.0" 254 | }, 255 | "autopep8": { 256 | "hashes": [ 257 | "sha256:d21d3901cb0da6ebd1e83fc9b0dfbde8b46afc2ede4fe32fbda0c7c6118ca094" 258 | ], 259 | "index": "pypi", 260 | "version": "==1.5.4" 261 | }, 262 | "backcall": { 263 | "hashes": [ 264 | "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", 265 | "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255" 266 | ], 267 | "version": "==0.2.0" 268 | }, 269 | "coverage": { 270 | "hashes": [ 271 | "sha256:004d1880bed2d97151facef49f08e255a20ceb6f9432df75f4eef018fdd5a78c", 272 | "sha256:01d84219b5cdbfc8122223b39a954820929497a1cb1422824bb86b07b74594b6", 273 | "sha256:040af6c32813fa3eae5305d53f18875bedd079960822ef8ec067a66dd8afcd45", 274 | "sha256:06191eb60f8d8a5bc046f3799f8a07a2d7aefb9504b0209aff0b47298333302a", 275 | "sha256:13034c4409db851670bc9acd836243aeee299949bd5673e11844befcb0149f03", 276 | "sha256:13c4ee887eca0f4c5a247b75398d4114c37882658300e153113dafb1d76de529", 277 | "sha256:184a47bbe0aa6400ed2d41d8e9ed868b8205046518c52464fde713ea06e3a74a", 278 | "sha256:18ba8bbede96a2c3dde7b868de9dcbd55670690af0988713f0603f037848418a", 279 | "sha256:1aa846f56c3d49205c952d8318e76ccc2ae23303351d9270ab220004c580cfe2", 280 | "sha256:217658ec7187497e3f3ebd901afdca1af062b42cfe3e0dafea4cced3983739f6", 281 | "sha256:24d4a7de75446be83244eabbff746d66b9240ae020ced65d060815fac3423759", 282 | "sha256:2910f4d36a6a9b4214bb7038d537f015346f413a975d57ca6b43bf23d6563b53", 283 | "sha256:2949cad1c5208b8298d5686d5a85b66aae46d73eec2c3e08c817dd3513e5848a", 284 | "sha256:2a3859cb82dcbda1cfd3e6f71c27081d18aa251d20a17d87d26d4cd216fb0af4", 285 | "sha256:2cafbbb3af0733db200c9b5f798d18953b1a304d3f86a938367de1567f4b5bff", 286 | "sha256:2e0d881ad471768bf6e6c2bf905d183543f10098e3b3640fc029509530091502", 287 | "sha256:30c77c1dc9f253283e34c27935fded5015f7d1abe83bc7821680ac444eaf7793", 288 | "sha256:3487286bc29a5aa4b93a072e9592f22254291ce96a9fbc5251f566b6b7343cdb", 289 | "sha256:372da284cfd642d8e08ef606917846fa2ee350f64994bebfbd3afb0040436905", 290 | "sha256:41179b8a845742d1eb60449bdb2992196e211341818565abded11cfa90efb821", 291 | "sha256:44d654437b8ddd9eee7d1eaee28b7219bec228520ff809af170488fd2fed3e2b", 292 | "sha256:4a7697d8cb0f27399b0e393c0b90f0f1e40c82023ea4d45d22bce7032a5d7b81", 293 | "sha256:51cb9476a3987c8967ebab3f0fe144819781fca264f57f89760037a2ea191cb0", 294 | "sha256:52596d3d0e8bdf3af43db3e9ba8dcdaac724ba7b5ca3f6358529d56f7a166f8b", 295 | "sha256:53194af30d5bad77fcba80e23a1441c71abfb3e01192034f8246e0d8f99528f3", 296 | "sha256:5fec2d43a2cc6965edc0bb9e83e1e4b557f76f843a77a2496cbe719583ce8184", 297 | "sha256:6c90e11318f0d3c436a42409f2749ee1a115cd8b067d7f14c148f1ce5574d701", 298 | "sha256:74d881fc777ebb11c63736622b60cb9e4aee5cace591ce274fb69e582a12a61a", 299 | "sha256:7501140f755b725495941b43347ba8a2777407fc7f250d4f5a7d2a1050ba8e82", 300 | "sha256:796c9c3c79747146ebd278dbe1e5c5c05dd6b10cc3bcb8389dfdf844f3ead638", 301 | "sha256:869a64f53488f40fa5b5b9dcb9e9b2962a66a87dab37790f3fcfb5144b996ef5", 302 | "sha256:8963a499849a1fc54b35b1c9f162f4108017b2e6db2c46c1bed93a72262ed083", 303 | "sha256:8d0a0725ad7c1a0bcd8d1b437e191107d457e2ec1084b9f190630a4fb1af78e6", 304 | "sha256:900fbf7759501bc7807fd6638c947d7a831fc9fdf742dc10f02956ff7220fa90", 305 | "sha256:92b017ce34b68a7d67bd6d117e6d443a9bf63a2ecf8567bb3d8c6c7bc5014465", 306 | "sha256:970284a88b99673ccb2e4e334cfb38a10aab7cd44f7457564d11898a74b62d0a", 307 | "sha256:972c85d205b51e30e59525694670de6a8a89691186012535f9d7dbaa230e42c3", 308 | "sha256:9a1ef3b66e38ef8618ce5fdc7bea3d9f45f3624e2a66295eea5e57966c85909e", 309 | "sha256:af0e781009aaf59e25c5a678122391cb0f345ac0ec272c7961dc5455e1c40066", 310 | "sha256:b6d534e4b2ab35c9f93f46229363e17f63c53ad01330df9f2d6bd1187e5eaacf", 311 | "sha256:b7895207b4c843c76a25ab8c1e866261bcfe27bfaa20c192de5190121770672b", 312 | "sha256:c0891a6a97b09c1f3e073a890514d5012eb256845c451bd48f7968ef939bf4ae", 313 | "sha256:c2723d347ab06e7ddad1a58b2a821218239249a9e4365eaff6649d31180c1669", 314 | "sha256:d1f8bf7b90ba55699b3a5e44930e93ff0189aa27186e96071fac7dd0d06a1873", 315 | "sha256:d1f9ce122f83b2305592c11d64f181b87153fc2c2bbd3bb4a3dde8303cfb1a6b", 316 | "sha256:d314ed732c25d29775e84a960c3c60808b682c08d86602ec2c3008e1202e3bb6", 317 | "sha256:d636598c8305e1f90b439dbf4f66437de4a5e3c31fdf47ad29542478c8508bbb", 318 | "sha256:deee1077aae10d8fa88cb02c845cfba9b62c55e1183f52f6ae6a2df6a2187160", 319 | "sha256:ebe78fe9a0e874362175b02371bdfbee64d8edc42a044253ddf4ee7d3c15212c", 320 | "sha256:f030f8873312a16414c0d8e1a1ddff2d3235655a2174e3648b4fa66b3f2f1079", 321 | "sha256:f0b278ce10936db1a37e6954e15a3730bea96a0997c26d7fee88e6c396c2086d", 322 | "sha256:f11642dddbb0253cc8853254301b51390ba0081750a8ac03f20ea8103f0c56b6" 323 | ], 324 | "version": "==5.5" 325 | }, 326 | "decorator": { 327 | "hashes": [ 328 | "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760", 329 | "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7" 330 | ], 331 | "version": "==4.4.2" 332 | }, 333 | "flake8": { 334 | "hashes": [ 335 | "sha256:749dbbd6bfd0cf1318af27bf97a14e28e5ff548ef8e5b1566ccfb25a11e7c839", 336 | "sha256:aadae8761ec651813c24be05c6f7b4680857ef6afaae4651a4eccaef97ce6c3b" 337 | ], 338 | "index": "pypi", 339 | "version": "==3.8.4" 340 | }, 341 | "iniconfig": { 342 | "hashes": [ 343 | "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", 344 | "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" 345 | ], 346 | "version": "==1.1.1" 347 | }, 348 | "ipython": { 349 | "hashes": [ 350 | "sha256:c987e8178ced651532b3b1ff9965925bfd445c279239697052561a9ab806d28f", 351 | "sha256:cbb2ef3d5961d44e6a963b9817d4ea4e1fa2eb589c371a470fed14d8d40cbd6a" 352 | ], 353 | "index": "pypi", 354 | "version": "==7.19.0" 355 | }, 356 | "ipython-genutils": { 357 | "hashes": [ 358 | "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8", 359 | "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8" 360 | ], 361 | "version": "==0.2.0" 362 | }, 363 | "isort": { 364 | "extras": [ 365 | "pipfile" 366 | ], 367 | "hashes": [ 368 | "sha256:dcab1d98b469a12a1a624ead220584391648790275560e1a43e54c5dceae65e7", 369 | "sha256:dcaeec1b5f0eca77faea2a35ab790b4f3680ff75590bfcb7145986905aab2f58" 370 | ], 371 | "index": "pypi", 372 | "version": "==5.6.4" 373 | }, 374 | "jedi": { 375 | "hashes": [ 376 | "sha256:18456d83f65f400ab0c2d3319e48520420ef43b23a086fdc05dff34132f0fb93", 377 | "sha256:92550a404bad8afed881a137ec9a461fed49eca661414be45059329614ed0707" 378 | ], 379 | "version": "==0.18.0" 380 | }, 381 | "mccabe": { 382 | "hashes": [ 383 | "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", 384 | "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" 385 | ], 386 | "version": "==0.6.1" 387 | }, 388 | "packaging": { 389 | "hashes": [ 390 | "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5", 391 | "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a" 392 | ], 393 | "version": "==20.9" 394 | }, 395 | "parso": { 396 | "hashes": [ 397 | "sha256:15b00182f472319383252c18d5913b69269590616c947747bc50bf4ac768f410", 398 | "sha256:8519430ad07087d4c997fda3a7918f7cfa27cb58972a8c89c2a0295a1c940e9e" 399 | ], 400 | "version": "==0.8.1" 401 | }, 402 | "pexpect": { 403 | "hashes": [ 404 | "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937", 405 | "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c" 406 | ], 407 | "markers": "sys_platform != 'win32'", 408 | "version": "==4.8.0" 409 | }, 410 | "pickleshare": { 411 | "hashes": [ 412 | "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", 413 | "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56" 414 | ], 415 | "version": "==0.7.5" 416 | }, 417 | "pluggy": { 418 | "hashes": [ 419 | "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0", 420 | "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d" 421 | ], 422 | "version": "==0.13.1" 423 | }, 424 | "prompt-toolkit": { 425 | "hashes": [ 426 | "sha256:4cea7d09e46723885cb8bc54678175453e5071e9449821dce6f017b1d1fbfc1a", 427 | "sha256:9397a7162cf45449147ad6042fa37983a081b8a73363a5253dd4072666333137" 428 | ], 429 | "version": "==3.0.17" 430 | }, 431 | "ptyprocess": { 432 | "hashes": [ 433 | "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", 434 | "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220" 435 | ], 436 | "version": "==0.7.0" 437 | }, 438 | "py": { 439 | "hashes": [ 440 | "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3", 441 | "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a" 442 | ], 443 | "version": "==1.10.0" 444 | }, 445 | "pycodestyle": { 446 | "hashes": [ 447 | "sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367", 448 | "sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e" 449 | ], 450 | "version": "==2.6.0" 451 | }, 452 | "pyflakes": { 453 | "hashes": [ 454 | "sha256:0d94e0e05a19e57a99444b6ddcf9a6eb2e5c68d3ca1e98e90707af8152c90a92", 455 | "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8" 456 | ], 457 | "version": "==2.2.0" 458 | }, 459 | "pygments": { 460 | "hashes": [ 461 | "sha256:2656e1a6edcdabf4275f9a3640db59fd5de107d88e8663c5d4e9a0fa62f77f94", 462 | "sha256:534ef71d539ae97d4c3a4cf7d6f110f214b0e687e92f9cb9d2a3b0d3101289c8" 463 | ], 464 | "version": "==2.8.1" 465 | }, 466 | "pyparsing": { 467 | "hashes": [ 468 | "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", 469 | "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" 470 | ], 471 | "version": "==2.4.7" 472 | }, 473 | "pytest": { 474 | "hashes": [ 475 | "sha256:4288fed0d9153d9646bfcdf0c0428197dba1ecb27a33bb6e031d002fa88653fe", 476 | "sha256:c0a7e94a8cdbc5422a51ccdad8e6f1024795939cc89159a0ae7f0b316ad3823e" 477 | ], 478 | "index": "pypi", 479 | "version": "==6.1.2" 480 | }, 481 | "pytest-cov": { 482 | "hashes": [ 483 | "sha256:45ec2d5182f89a81fc3eb29e3d1ed3113b9e9a873bcddb2a71faaab066110191", 484 | "sha256:47bd0ce14056fdd79f93e1713f88fad7bdcc583dcd7783da86ef2f085a0bb88e" 485 | ], 486 | "index": "pypi", 487 | "version": "==2.10.1" 488 | }, 489 | "pytest-datadir": { 490 | "hashes": [ 491 | "sha256:1847ed0efe0bc54cac40ab3fba6d651c2f03d18dd01f2a582979604d32e7621e", 492 | "sha256:d3af1e738df87515ee509d6135780f25a15959766d9c2b2dbe02bf4fb979cb18" 493 | ], 494 | "index": "pypi", 495 | "version": "==1.3.1" 496 | }, 497 | "toml": { 498 | "hashes": [ 499 | "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", 500 | "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" 501 | ], 502 | "version": "==0.10.2" 503 | }, 504 | "traitlets": { 505 | "hashes": [ 506 | "sha256:178f4ce988f69189f7e523337a3e11d91c786ded9360174a3d9ca83e79bc5396", 507 | "sha256:69ff3f9d5351f31a7ad80443c2674b7099df13cc41fc5fa6e2f6d3b0330b0426" 508 | ], 509 | "version": "==5.0.5" 510 | }, 511 | "wcwidth": { 512 | "hashes": [ 513 | "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784", 514 | "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83" 515 | ], 516 | "version": "==0.2.5" 517 | } 518 | } 519 | } 520 | --------------------------------------------------------------------------------