├── .gitignore
├── LICENSE
├── README.md
├── compose-dev.yml
├── docker
    ├── app.dockerfile
    ├── jupyter.dockerfile
    └── requirements.txt
└── src
    ├── app
        ├── __init__.py
        ├── database.py
        ├── settings.py
        └── tasks.py
    ├── main.py
    ├── recipes
        ├── .gitignore
        ├── README.md
        ├── __init__.py
        ├── example-datasets
        │   ├── annotated_github-issues-DOCUMENTATION-textcat.jsonl
        │   ├── annotated_opiates-DRUG-manual.jsonl
        │   ├── annotated_reddit-INSULT-textcat.jsonl
        │   ├── annotated_reddit-PRODUCT-ner.jsonl
        │   ├── raw_github-issue-titles.jsonl
        │   ├── raw_news-headlines-nyt.jsonl
        │   ├── raw_reddit-2015-01.jsonl
        │   └── raw_shares-newsapi.jsonl
        ├── example-patterns
        │   ├── patterns_countries-GPE.jsonl
        │   ├── patterns_drugs-DRUG.jsonl
        │   └── patterns_insults-INSULT.jsonl
        ├── image
        │   └── image_manual.py
        ├── ner
        │   ├── ner_make_gold.py
        │   ├── ner_manual.py
        │   ├── ner_match.py
        │   ├── ner_silver_to_gold.py
        │   └── ner_teach.py
        ├── other
        │   ├── choice.py
        │   └── mark.py
        ├── terms
        │   └── terms_teach.py
        ├── tests.py
        └── textcat
        │   ├── textcat_custom_model.py
        │   └── textcat_teach.py
    ├── static
        ├── fonts
        │   ├── lato-bold.woff
        │   ├── lato-bold.woff2
        │   ├── lato-regular.woff
        │   ├── lato-regular.woff2
        │   ├── robotocondensed-bold.woff
        │   ├── robotocondensed-bold.woff2
        │   ├── robotocondensed-bolditalic.woff
        │   ├── robotocondensed-bolditalic.woff2
        │   ├── sharetechmono-regular.woff
        │   └── sharetechmono-regular.woff2
        └── img
        │   ├── botonA.png
        │   ├── botonS.png
        │   ├── botonX.png
        │   ├── interfaz1.png
        │   └── interfaz2.png
    └── templates
        ├── home.html
        ├── layout.html
        ├── login.html
        ├── prodigy
            └── index.html
        └── task_list.html


/.gitignore:
--------------------------------------------------------------------------------
1 | */prodigy*.whl
2 | data/*
3 | .idea/
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (C) 2018 ExplosionAI UG (haftungsbeschränkt)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Prodigy Multi Annotator
  2 | =======================
  3 | 
  4 | This is my attempt for using the amazing tool [prodigy](https://prodi.gy/) with muliple users. It's just a basic web-based annotation maganer that I created for my spcecific needs.
  5 | 
  6 | It allows you to run muliple recipes, and with many users per recipe. It uses celery underneath to create a queue per user/recipe, so your annotators can start working on multiple tasks, and resume where they left at any time.
  7 | 
  8 | Celery also allows you to spin up multiple workers, so you can run the recipes in parallel, or on multiple machines.
  9 | 
 10 | ## Setup
 11 | 
 12 | This project uses [Docker](https://docs.docker.com/install/linux/docker-ce/ubuntu/) and [Docker Compose](https://docs.docker.com/compose/install/), make sure you have installed them before continuing.
 13 | 
 14 | ```
 15 | clone https://github.com/bastiancy/prodigy-multi-annotator
 16 | cd prodigy-multi-annotator
 17 | 
 18 | # You need to copy your own prodigy binary, as its a dependency for the docker image
 19 | cp /tmp/prodigy-1.5.1-cp35.cp36-cp35m.cp36m-linux_x86_64.whl ./docker/
 20 | 
 21 | docker-compose -f compose-dev up -d --build
 22 | ```
 23 | 
 24 | There is not [spaCy models](https://spacy.io/models/) installed on the image, so you need to add them manually
 25 | 
 26 | ```
 27 | docker-compose -f compose-dev exec worker python3 -m spacy download en_core_web_sm
 28 | ```
 29 | 
 30 | If you are not running the project locally, the `BASE_URL` must be set on the `compose-dev.yml` file (i.e. `BASE_URL=http://prodigy.mysite.com`)
 31 | 
 32 | Docker mounts the folder `./data` to the containers, here you can add files that the recipes can use, e.x. to load a custom model, or load a jsonl source file.
 33 | 
 34 | ## Usage
 35 | 
 36 | First, you need to configure what recipes will be available for users, I call this **projects**. Add them in the `src/config.py` file.
 37 | 
 38 | ```python
 39 | PROJECTS = {
 40 |     'manual_all': {
 41 |         'recipe': 'ner.manual',
 42 |         'recipe_sig': ('dataset', 'spacy_model', 'source', '--api', '--loader', '--label', '--exclude'),
 43 |         'recipe_args': {
 44 |             'dataset': 'manual_all.{user_id}',
 45 |             'spacy_model': 'en_core_web_sm',
 46 |             'source': '{data_dir}/manual_all/source.jsonl',
 47 |             '--label': ('PER', 'ORG', 'LOC')
 48 |         },
 49 |         'config': {
 50 |             'show_stats': False,
 51 |             'swipe': False,
 52 |         },
 53 |         'desc': 'Manually annotate examples for labels: PER, ORG and LOC.',
 54 |         'instructions': '{data_dir}/manual_all/instructions.html',
 55 |         'visible': True
 56 |     },
 57 |     'teach_org': {
 58 |         'recipe': 'ner.teach',
 59 |         'recipe_sig': ('dataset', 'spacy_model', 'source', '--api', '--loader', '--label', '--patterns', '--exclude', '--unsegmented'),
 60 |         'recipe_args': {
 61 |             'dataset': 'teach_org.{user_id}',
 62 |             'spacy_model': '{data_dir}/teach_org/{user_id}/model',
 63 |             'source': '{data_dir}/teach_org/source.jsonl',
 64 |             '--label': ('ORG',),
 65 |         },
 66 |         'desc': 'Validate algorithm predictions, for label "ORG".',
 67 |         'instructions': '{data_dir}/teach_org/instructions.html',
 68 |         'visible': True,
 69 |         'users': ['john', 'jane'],
 70 |         'copy_model': ('{data_dir}/teach_org/model', '{data_dir}/teach_org/{user_id}/model'),
 71 |     },
 72 |     'train_all': {
 73 |         'recipe': 'ner.batch-train',
 74 |         'recipe_sig': ('dataset', 'spacy_model', '--output', '--factor', '--dropout', '--n-iter', '--batch-size', '--beam-width', '--eval-id', '--eval-split', '--unsegmented', '--no-missing', '--silent'),
 75 |         'recipe_args': {
 76 |             'dataset': 'train_all',
 77 |             'spacy_model': '{data_dir}/manual_all/model',
 78 |             '--factor': 1,
 79 |             '--output': '{data_dir}/manual_all/model_v2',
 80 |             '--eval-id': 'eval_all',
 81 |         },
 82 |         'visible': False,
 83 |         'consolidate': {
 84 |         	'source': ['teach_org.{user_id}', 'manual_all.{user_id}'],
 85 |         	'dest': 'train_all',
 86 |         },
 87 |         'copy_model': ('{data_dir}/manual_all/model_v2', '{data_dir}/teach_org/model')
 88 |     }
 89 | }
 90 | ```
 91 | 
 92 | ## Limitations
 93 | 
 94 | Currently, I'm not able to config Celery for proper concurrency, so I run the workers with the `--concurrency=1` flag. But, you can run multiple workers with docker using the command `docker-compose -f compose-dev.yml scale worker=4`; this runs 4 celery workers on the same machine, then when a user starts a task the applicaction will choose randomly one worker and will stick to it.
 95 | 
 96 | The application does not cache resources between recipes nor workers, so for every user starting a recipe the spacy model will be loaded into memory. You should make sure you have enough memory to run your recipes for all users.
 97 | 
 98 | ## Todo
 99 | 
100 |  - allow consolidation of annotations, and re-train models periodicaly.
101 |  - config Celery for proper concurrency.
102 |  - implement resource caching for concurrency.
103 | 
104 | ## Licence
105 | 
106 | 


--------------------------------------------------------------------------------
/compose-dev.yml:
--------------------------------------------------------------------------------
 1 | version: '2'
 2 | 
 3 | services:
 4 |     redis:
 5 |         image: redis
 6 |         ports:
 7 |             - 6379:6379
 8 |     postgres:
 9 |         image: postgres:9.6.5
10 |         environment:
11 |             - POSTGRES_USER=prodigy
12 |             - POSTGRES_PASSWORD=prodigy
13 |         ports:
14 |             - 5432:5432
15 |     app:
16 |         build: 
17 |             context: ./docker
18 |             dockerfile: app.dockerfile
19 |         environment:
20 |             - DATA_DIR=/data
21 |             - DB_HOST=postgres
22 |             - DB_USER=prodigy
23 |             - DB_PASS=prodigy
24 |             - BASE_URL=http://192.168.99.100
25 |             - CELERY_BROKER=redis://redis:6379/0
26 |             - CELERY_BACKEND=redis://redis:6379/0
27 |         ports:
28 |             - 80:8080
29 |         volumes:
30 |             - ./src:/app
31 |             - ./data:/data
32 |         depends_on:
33 |             - redis
34 |             - postgres
35 | #            - worker
36 |         command: python3 main.py
37 | #        restart: always
38 | #    worker:
39 | #        build:
40 | #            context: ./docker
41 | #            dockerfile: app.dockerfile
42 | #        environment:
43 | #            - LISTEN_PORT=6000
44 | #            - DATA_DIR=/data
45 | #            - DB_HOST=postgres
46 | #            - DB_USER=prodigy
47 | #            - DB_PASS=prodigy
48 | #            - CELERY_BROKER=redis://redis:6379/0
49 | #            - CELERY_BACKEND=redis://redis:6379/0
50 | #        volumes:
51 | #            - ./src:/app
52 | #            - ./data:/data
53 | #        depends_on:
54 | #            - redis
55 | #            - postgres
56 | #        command: celery -A tasks worker -l warning -Q prodigy --concurrency=1
57 | #        restart: always
58 | #    jupyter:
59 | #        build:
60 | #            context: ./docker
61 | #            dockerfile: jupyter.dockerfile
62 | #        volumes:
63 | #            - ./data:/data
64 | #        ports:
65 | #            - 8888:8888
66 | #        command: start.sh jupyter lab
67 | #        restart: always


--------------------------------------------------------------------------------
/docker/app.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6-slim
 2 | 
 3 | RUN apt-get update && apt-get install -y build-essential python-dev openssl less curl
 4 | 
 5 | RUN mkdir -p /tmp/lib
 6 | COPY ./requirements.txt /tmp/lib
 7 | COPY ./*.whl /tmp/lib
 8 | 
 9 | RUN curl https://bootstrap.pypa.io/get-pip.py | python3
10 | RUN pip3 install --no-cache-dir -r /tmp/lib/requirements.txt
11 | RUN pip3 install /tmp/lib/prodigy-1.5*.whl
12 | RUN python3 -m spacy download en
13 | 
14 | WORKDIR /app
15 | 
16 | EXPOSE 8080
17 | EXPOSE 6000
18 | 


--------------------------------------------------------------------------------
/docker/jupyter.dockerfile:
--------------------------------------------------------------------------------
 1 | FROM jupyter/scipy-notebook:1145fb1198b2
 2 | 
 3 | # Install from requirements.txt file
 4 | COPY ./*.whl /tmp/
 5 | 
 6 | RUN pip install 'spacy==2.0.12' 'psycopg2-binary==2.7.5' && \
 7 | 	pip install /tmp/prodigy-1.5*.whl && \
 8 | 	python -m spacy download en
 9 | 
10 | RUN fix-permissions $CONDA_DIR && \
11 |     fix-permissions /home/$NB_USER
12 | 


--------------------------------------------------------------------------------
/docker/requirements.txt:
--------------------------------------------------------------------------------
1 | connexion == 1.1.15
2 | python_dateutil == 2.6.0
3 | setuptools >= 21.0.0
4 | Flask==0.12.2
5 | flask-cors==3.0.3
6 | flask-login==0.4.1
7 | celery[redis]==4.2.1
8 | spacy==2.0.12
9 | psycopg2-binary==2.7.5


--------------------------------------------------------------------------------
/src/app/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/app/__init__.py


--------------------------------------------------------------------------------
/src/app/database.py:
--------------------------------------------------------------------------------
  1 | # coding: utf8
  2 | from __future__ import unicode_literals
  3 | 
  4 | import peewee as orm
  5 | from pathlib import Path
  6 | import ujson
  7 | 
  8 | from prodigy.util import PRODIGY_HOME, TASK_HASH_ATTR, INPUT_HASH_ATTR, log
  9 | from prodigy.util import get_config, get_entry_points, convert_blob, get_display_name
 10 | 
 11 | 
 12 | DB_PROXY = orm.Proxy()
 13 | _DB = None
 14 | 
 15 | 
 16 | def get_db():
 17 |     """Get access to the shared database instance that was previously connected"""
 18 |     global _DB
 19 |     return _DB
 20 | 
 21 | 
 22 | def disconnect():
 23 |     """Disconnect the shared database instance and revert it back to None type"""
 24 |     global _DB
 25 |     if _DB is None:
 26 |         raise AssertionError("Database is already destroyed")
 27 |     _DB.close()
 28 |     _DB = None
 29 | 
 30 | 
 31 | def connect(db_id=None, db_settings=None):
 32 |     """Connect to the database.
 33 | 
 34 |     db_id (unicode): 'sqlite' (default), 'postgresql' or 'mysql'.
 35 |     db_settings (dict): Optional database connection parameters.
 36 |     RETURNS (prodigy.components.db.Database): The initialized database.
 37 |     """
 38 |     global _DB
 39 |     if _DB is not None:
 40 |         return _DB
 41 |     connectors = {'sqlite': connect_sqlite, 'postgresql': connect_postgresql,
 42 |                   'mysql': connect_mysql}
 43 |     user_dbs = get_entry_points('prodigy_db')
 44 |     if user_dbs:
 45 |         log("DB: Added {} connector(s) via entry points".format(len(user_dbs)))
 46 |     if db_id in user_dbs:
 47 |         _DB = user_dbs[db_id]
 48 |         return _DB
 49 |     config = get_config()
 50 |     if db_id in (True, False, None):
 51 |         db_id = config.get('db', 'sqlite')
 52 |     if db_settings in (True, False, None):
 53 |         config_db_settings = config.setdefault('db_settings', {})
 54 |         db_settings = config_db_settings.get(db_id, {})
 55 |     if db_id not in connectors:
 56 |         raise ValueError("Invalid database id: {}".format(db_id))
 57 |     db_name, db = connectors[db_id](**db_settings)
 58 |     _DB = Database(db, db_id, db_name)
 59 |     log("DB: Connecting to database {}".format(db_name), db_settings)
 60 |     return _DB
 61 | 
 62 | 
 63 | class BaseModel(orm.Model):
 64 |     class Meta:
 65 |         database = DB_PROXY
 66 | 
 67 | 
 68 | class Dataset(BaseModel):
 69 |     name = orm.CharField(unique=True)
 70 |     created = orm.TimestampField()
 71 |     meta = orm.BlobField()
 72 |     session = orm.BooleanField()
 73 | 
 74 | 
 75 | class Example(BaseModel):
 76 |     input_hash = orm.BigIntegerField()
 77 |     task_hash = orm.BigIntegerField()
 78 |     content = orm.BlobField()
 79 | 
 80 |     def load(self):
 81 |         content = convert_blob(self.content)
 82 |         return ujson.loads(content)
 83 | 
 84 | 
 85 | class Link(BaseModel):
 86 |     example = orm.ForeignKeyField(Example)
 87 |     dataset = orm.ForeignKeyField(Dataset)
 88 | 
 89 | 
 90 | class User(BaseModel):
 91 |     username = orm.CharField(unique=True)
 92 |     password = orm.CharField()
 93 |     email = orm.CharField()
 94 | 
 95 | 
 96 | def connect_sqlite(**settings):
 97 |     database = settings.pop('name', 'prodigy.db')
 98 |     path = settings.pop('path', PRODIGY_HOME)
 99 |     if database != ':memory:':
100 |         database = str(Path(path) / database)
101 |     return 'SQLite', orm.SqliteDatabase(database, **settings)
102 | 
103 | 
104 | def connect_postgresql(**settings):
105 |     database = 'prodigy'
106 |     for setting in ('db', 'name', 'dbname', 'database'):
107 |         if setting in settings:
108 |             database = settings.pop(setting)
109 |     return 'PostgreSQL', orm.PostgresqlDatabase(database, **settings)
110 | 
111 | 
112 | def connect_mysql(**settings):
113 |     database = 'prodigy'
114 |     for setting in ('db', 'name', 'dbname', 'database'):
115 |         if setting in settings:
116 |             database = settings.pop(setting)
117 |     return 'MySQL', orm.MySQLDatabase(database, **settings)
118 | 
119 | 
120 | class Database(object):
121 |     def __init__(self, db, display_id='custom', display_name=None):
122 |         """Initialize a database.
123 | 
124 |         db: A database object that can be initialized by peewee.
125 |         display_id (unicode): Database ID used for logging, e.g. 'sqlite'.
126 |         display_name (unicode): Database name used for logging, e.g. 'SQLite'.
127 |         RETURNS (Database): The initialized database.
128 |         """
129 |         DB_PROXY.initialize(db)
130 |         self.db_id = display_id
131 |         self.db_name = display_name or get_display_name(db)
132 |         log("DB: Initialising database {}".format(self.db_name))
133 |         try:
134 |             DB_PROXY.create_tables([User, Dataset, Example, Link], safe=True)
135 |         except orm.OperationalError:
136 |             pass
137 |         self.db = DB_PROXY
138 | 
139 |     def __bool__(self):
140 |         return True
141 | 
142 |     def __len__(self):
143 |         """
144 |         RETURNS (int): The number of datasets in the database.
145 |         """
146 |         return len(self.datasets)
147 | 
148 |     def __contains__(self, name):
149 |         """
150 |         name (unicode): Name of the dataset.
151 |         RETURNS (bool): Whether the dataset exists in the database.
152 |         """
153 |         try:
154 |             has_ds = bool(Dataset.get(Dataset.name == name))
155 |         except Dataset.DoesNotExist:
156 |             has_ds = False
157 |         return has_ds
158 | 
159 |     @property
160 |     def datasets(self):
161 |         """
162 |         RETURNS (list): A list of dataset IDs.
163 |         """
164 |         datasets = (Dataset.select(Dataset.name)
165 |                            .where(Dataset.session == False)  # noqa: E712
166 |                            .order_by(Dataset.created))
167 |         return [ds.name for ds in datasets]
168 | 
169 |     @property
170 |     def sessions(self):
171 |         """
172 |         RETURNS (list): A list of session dataset IDs.
173 |         """
174 |         datasets = (Dataset.select(Dataset.name)
175 |                            .where(Dataset.session == True)  # noqa: E712
176 |                            .order_by(Dataset.created))
177 |         return [ds.name for ds in datasets]
178 | 
179 |     def close(self):
180 |         """
181 |         Close the database connection (if not already closed). Called after
182 |         API requests to avoid timeout issues, especially with MySQL.
183 |         """
184 |         if not self.db.is_closed():
185 |             self.db.close()
186 | 
187 |     def reconnect(self):
188 |         """
189 |         Reconnect to the database. Called on API requests to avoid timeout
190 |         issues, especiallly with MySQL. If the database connection is still
191 |         open, it will be closed before reconnecting.
192 |         """
193 |         if not self.db.is_closed():
194 |             self.db.close()
195 |         self.db.connect()
196 | 
197 |     def get_examples(self, ids, by='task_hash'):
198 |         """
199 |         ids (list): List of example hashes.
200 |         by (unicode): ID to get examples by. Defaults to 'task_hash'.
201 |         RETURNS (list): The examples.
202 |         """
203 |         try:
204 |             ids = list(ids)
205 |         except TypeError:
206 |             ids = [ids]
207 |         field = getattr(Example, by)
208 |         ids = list(ids)
209 |         return [eg.load() for eg in Example.select().where(field << ids)]
210 | 
211 |     def get_meta(self, name):
212 |         """
213 |         name (unicode): The dataset name.
214 |         RETURNS (dict): The dataset meta.
215 |         """
216 |         if name not in self:
217 |             return None
218 |         dataset = Dataset.get(Dataset.name == name)
219 |         meta = convert_blob(dataset.meta)
220 |         meta = ujson.loads(meta)
221 |         meta['created'] = dataset.created
222 |         return meta
223 | 
224 |     def get_dataset(self, name, default=None):
225 |         """
226 |         name (unicode): The dataset name.
227 |         default: Return value if dataset not in database.
228 |         RETURNS (list): The examples in the dataset or default value.
229 |         """
230 |         if name not in self:
231 |             return default
232 |         dataset = Dataset.get(Dataset.name == name)
233 |         examples = (Example
234 |                     .select()
235 |                     .join(Link)
236 |                     .join(Dataset)
237 |                     .where(Dataset.id == dataset.id)).execute()
238 |         log("DB: Loading dataset '{}' ({} examples)"
239 |             .format(name, len(examples)))
240 |         return [eg.load() for eg in examples]
241 | 
242 |     def get_input_hashes(self, *names):
243 |         """
244 |         *names (unicode): Dataset names to get hashes for.
245 |         RETURNS (set): The input hashes.
246 |         """
247 |         datasets = Dataset.select(Dataset.id).where(Dataset.name << names)
248 |         examples = (Example
249 |                     .select(Example.input_hash)
250 |                     .join(Link)
251 |                     .join(Dataset)
252 |                     .where(Dataset.id << datasets)).execute()
253 |         return set([eg.input_hash for eg in examples])
254 | 
255 |     def get_task_hashes(self, *names):
256 |         """
257 |         *names (unicode): The dataset names.
258 |         RETURNS (set): The task hashes.
259 |         """
260 |         datasets = Dataset.select(Dataset.id).where(Dataset.name << names)
261 |         examples = (Example
262 |                     .select(Example.task_hash)
263 |                     .join(Link)
264 |                     .join(Dataset)
265 |                     .where(Dataset.id << datasets)).execute()
266 |         return set([eg.task_hash for eg in examples])
267 | 
268 |     def add_dataset(self, name, meta={}, session=False):
269 |         """
270 |         name (unicode): The name of the dataset to add.
271 |         meta (dict): Optional dataset meta.
272 |         session (bool): Whether the dataset is a session dataset.
273 |         RETURNS (list): The created dataset.
274 |         """
275 |         if any([char in name for char in (',', ' ')]):
276 |             raise ValueError("Dataset name can't include commas or whitespace")
277 |         try:
278 |             dataset = Dataset.get(Dataset.name == name)
279 |             log("DB: Getting dataset '{}'".format(name))
280 |         except Dataset.DoesNotExist:
281 |             log("DB: Creating dataset '{}'".format(name), meta)
282 |             meta = ujson.dumps(meta, escape_forward_slashes=False)
283 |             dataset = Dataset.create(name=name, meta=meta, session=session)
284 |         return dataset
285 | 
286 |     def add_examples(self, examples, datasets=tuple()):
287 |         """
288 |         examples (list): The examples to add.
289 |         datasets (list): The names of the dataset(s) to add the examples to.
290 |         """
291 |         with self.db.atomic():
292 |             ids = []
293 |             for eg in examples:
294 |                 content = ujson.dumps(eg, escape_forward_slashes=False)
295 |                 eg = Example.create(input_hash=eg[INPUT_HASH_ATTR],
296 |                                     task_hash=eg[TASK_HASH_ATTR],
297 |                                     content=content)
298 |                 ids.append(eg.id)
299 |         if type(datasets) is not tuple and type(datasets) is not list:
300 |             raise ValueError('datasets must be a tuple or list type, not: {}'.format(type(datasets)))
301 |         for dataset in datasets:
302 |             self.link(dataset, ids)
303 |         log("DB: Added {} examples to {} datasets"
304 |             .format(len(examples), len(datasets)))
305 | 
306 |     def link(self, dataset_name, example_ids):
307 |         """
308 |         dataset_name (unicode): The name of the dataset.
309 |         example_ids (list): The IDs of the examples to link to the dataset.
310 |         """
311 |         with self.db.atomic():
312 |             dataset = self.add_dataset(dataset_name)
313 |             for eg in example_ids:
314 |                 link = Link.create(dataset=dataset.id, example=eg)  # noqa F841
315 | 
316 |     def unlink(self, dataset):
317 |         """
318 |         dataset (unicode): The name of the dataset to unlink.
319 |         """
320 |         dataset = Dataset.get(Dataset.name == dataset)
321 |         query = Link.delete().where(Dataset.id == dataset.id)
322 |         query.execute()
323 | 
324 |     def drop_dataset(self, name):
325 |         """
326 |         name (unicode): The name of the dataset to drop.
327 |         RETURNS (bool): True if dataset was dropped.
328 |         """
329 |         dataset = Dataset.get(Dataset.name == name)
330 |         query = Link.delete().where(Link.dataset == dataset.id)
331 |         query.execute()
332 |         query = Dataset.delete().where(Dataset.id == dataset.id)
333 |         query.execute()
334 |         self.db.commit()
335 |         log("DB: Removed dataset '{}'".format(name))
336 |         return True
337 | 
338 |     def drop_examples(self, ids, by='task_hash'):
339 |         """
340 |         ids (list): The IDs of the examples to drop.
341 |         by (unicode): ID to get examples by. Defaults to 'task_hash'.
342 |         """
343 |         try:
344 |             ids = list(ids)
345 |         except TypeError:
346 |             ids = [ids]
347 |         field = getattr(Example, by)
348 |         ids = list(ids)
349 |         query = Example.delete().where(field << ids)
350 |         query.execute()
351 |         self.db.commit()
352 | 
353 |     def save(self):
354 |         log("DB: Saving database")
355 |         self.reconnect()
356 |         self.db.commit()
357 | 
358 | 


--------------------------------------------------------------------------------
/src/app/settings.py:
--------------------------------------------------------------------------------
  1 | # coding: utf8
  2 | from __future__ import unicode_literals
  3 | 
  4 | from os import environ
  5 | 
  6 | 
  7 | DEBUG = True
  8 | SECRET_KEY = 'hin6bab8ge25*r=x&amp;+5$0kn=-#log$pt^#@vrqjld!^2ci@g*b'
  9 | BASE_URL = environ.get('BASE_URL', 'http://127.0.0.1')
 10 | DATA_DIR = environ.get('DATA_DIR', '../data')
 11 | CELERY_BROKER = environ.get('CELERY_BROKER', 'redis://192.168.99.100:6379/0')
 12 | CELERY_BACKEND = environ.get('CELERY_BACKEND', 'redis://192.168.99.100:6379/0')
 13 | 
 14 | PRODIGY_CONFIG = {
 15 |     "db": "postgresql",
 16 |     "db_settings": {
 17 |         "postgresql": {
 18 |             "host": environ.get('DB_HOST'),
 19 |             "port": 5432,
 20 |             "dbname": "prodigy",
 21 |             "user": environ.get('DB_USER'),
 22 |             "password": environ.get('DB_PASS')
 23 |         },
 24 |         "sqlite": {
 25 |             "name": "prodigy.db",
 26 |             "path": DATA_DIR
 27 |         }
 28 |     },
 29 |     "theme": "basic",
 30 |     "custom_theme": {},
 31 |     "batch_size": 10,
 32 |     "port": 8080,
 33 |     "host": "0.0.0.0",
 34 |     "validate": True,
 35 |     "auto_create": True,
 36 |     "show_stats": True,
 37 |     "hide_meta": False,
 38 |     "show_flag": False,
 39 |     "instructions": False,
 40 |     "swipe": False,
 41 |     "split_sents_threshold": True,
 42 |     "diff_style": "words",
 43 |     "html_template": False,
 44 |     "card_css": {},
 45 |     "writing_dir": "ltr",
 46 |     "hide_true_newline_tokens": False,
 47 |     "ner_manual_require_click": False,
 48 |     "ner_manual_label_style": "list",
 49 |     "choice_style": "single",
 50 |     "choice_auto_accept": False,
 51 |     "darken_image": 0,
 52 |     "show_bounding_box_center": False,
 53 |     "preview_bounding_boxes": False,
 54 |     "shade_bounding_boxes": False
 55 | }
 56 | USERS = {
 57 |     'bcarvajal': {'username': 'bcarvajal', 'password': 'ppba', 'name': 'Bastian Carvajal'},
 58 |     'carroyo': {'username': 'carroyo', 'password': 'ppba', 'name': 'Cristian Arroyo'},
 59 |     'csobarzo': {'username': 'csobarzo', 'password': 'ppba', 'name': 'Cristian Sobarzo'},
 60 |     'mcepeda': {'username': 'mcepeda', 'password': 'ppba', 'name': 'Maurice Cepeda'},
 61 |     'jmleon': {'username': 'jmleon', 'password': 'ppba', 'name': 'Jose Manuel Leon'},
 62 |     'emartin': {'username': 'emartin', 'password': 'ppba', 'name': 'Eric Martin'},
 63 |     'raguirre': {'username': 'raguirre', 'password': 'ppba', 'name': 'Rodrigo Aguirre'},
 64 |     'jccarmona': {'username': 'jccarmona', 'password': 'ppba', 'name': 'Juan Christian Carmona'},
 65 |     'mrodriguez': {'username': 'mrodriguez', 'password': 'ppba', 'name': 'Mauricio Rodriguez'},
 66 |     'amrugaslki': {'username': 'amrugaslki', 'password': 'ppba', 'name': 'Alan Mrugalski'},
 67 |     'cmerino': {'username': 'cmerino', 'password': 'ppba', 'name': 'Carola Merino'},
 68 | }
 69 | PROJECTS = {
 70 |     'manual_all': {
 71 |         'recipe': 'ner.manual',
 72 |         'recipe_sig': ('dataset', 'spacy_model', 'source', '--api', '--loader', '--label', '--exclude'),
 73 |         'recipe_args': {
 74 |             'dataset': 'manual_all.{user_id}',
 75 |             'spacy_model': 'en_core_web_sm',
 76 |             'source': '{data_dir}/manual_all/source.jsonl',
 77 |             '--label': ('PER', 'ORG', 'LOC')
 78 |         },
 79 |         'config': {
 80 |             'show_stats': False,
 81 |             'swipe': False,
 82 |         },
 83 |         'desc': 'Manually annotate examples for labels: PER, ORG and LOC.',
 84 |         'instructions': '{data_dir}/manual_all/instructions.html',
 85 |         'visible': True
 86 |     },
 87 |     'teach_org': {
 88 |         'recipe': 'ner.teach',
 89 |         'recipe_sig': ('dataset', 'spacy_model', 'source', '--api', '--loader', '--label', '--patterns', '--exclude', '--unsegmented'),
 90 |         'recipe_args': {
 91 |             'dataset': 'teach_org.{user_id}',
 92 |             'spacy_model': '{data_dir}/teach_org/{user_id}/model',
 93 |             'source': '{data_dir}/teach_org/source.jsonl',
 94 |             '--label': ('ORG',),
 95 |         },
 96 |         'desc': 'Validate algorithm predictions, for label "ORG".',
 97 |         'instructions': '{data_dir}/teach_org/instructions.html',
 98 |         'visible': True,
 99 |         'only_user': ['john', 'jane'],
100 |         'copy_model': ('{data_dir}/teach_org/model', '{data_dir}/teach_org/{user_id}/model'),
101 |     },
102 |     'train_all': {
103 |         'recipe': 'ner.batch-train',
104 |         'recipe_sig': ('dataset', 'spacy_model', '--output', '--factor', '--dropout', '--n-iter', '--batch-size', '--beam-width', '--eval-id', '--eval-split', '--unsegmented', '--no-missing', '--silent'),
105 |         'recipe_args': {
106 |             'dataset': 'train_all',
107 |             'spacy_model': '{data_dir}/manual_all/model',
108 |             '--factor': 1,
109 |             '--output': '{data_dir}/manual_all/model_v2',
110 |             '--eval-id': 'eval_all',
111 |         },
112 |         'visible': False,
113 |         'consolidate': {
114 |             'source': ['teach_org.{user_id}', 'manual_all.{user_id}'],
115 |             'dest': 'train_all',
116 |         },
117 |         'copy_model': ('{data_dir}/manual_all/model_v2', '{data_dir}/teach_org/model')
118 |     }
119 | }
120 | 


--------------------------------------------------------------------------------
/src/app/tasks.py:
--------------------------------------------------------------------------------
  1 | # coding: utf8
  2 | from __future__ import unicode_literals
  3 | 
  4 | import os
  5 | import uuid
  6 | import re
  7 | import shutil
  8 | import os.path
  9 | from celery import Celery
 10 | from celery.utils.log import get_logger
 11 | from app.settings import *
 12 | 
 13 | 
 14 | logger = get_logger(__name__)
 15 | celery = Celery('tasks', broker=CELERY_BROKER, backend=CELERY_BACKEND)
 16 | 
 17 | 
 18 | def make_prodigy(job_id, project_id, settings, logger, debug=False):
 19 |     import prodigy
 20 |     from app.database import connect
 21 | 
 22 |     if debug:
 23 |         os.environ["PRODIGY_LOGGING"] = 'basic'
 24 | 
 25 |     dbname = PRODIGY_CONFIG['db']
 26 |     connect(dbname, PRODIGY_CONFIG['db_settings'][dbname])
 27 | 
 28 |     loaded_recipe = prodigy.get_recipe(settings['recipe'])
 29 |     if not loaded_recipe:
 30 |         raise ValueError("Can't find recipe {}.".format(settings['recipe']))
 31 | 
 32 |     args = []   # to maintaint order of arguments
 33 |     for item in settings['recipe_sig']:
 34 |         if item in settings['recipe_args']:
 35 |             args.append(settings['recipe_args'][item])
 36 |         else:
 37 |             args.append(None)
 38 | 
 39 |     controller = loaded_recipe(*args)
 40 |     controller.config.update(PRODIGY_CONFIG)
 41 |     if 'config' in settings:
 42 |         controller.config.update(settings['config'])
 43 | 
 44 |     config = controller.config
 45 |     config['view_id'] = controller.view_id
 46 |     config['batch_size'] = controller.batch_size
 47 |     config['version'] = prodigy.about.__version__
 48 | 
 49 |     if 'instructions' in settings:
 50 |         help_path = Path('{}/{}'.format(DATA_DIR, settings['instructions']))
 51 |         if help_path.is_file():
 52 |             with help_path.open('r', encoding='utf8') as f:
 53 |                 config['instructions'] = f.read()
 54 |         else:
 55 |             config['instructions'] = settings['instructions']
 56 | 
 57 |     for setting in ['db_settings', 'api_keys']:
 58 |         if setting in config:
 59 |             config.pop(setting)
 60 | 
 61 |     controller.save()
 62 |     return config, controller
 63 | 
 64 | 
 65 | class ProdigyJob(object):
 66 |     def __init__(self, job_id, project_id, settings, logger, debug=False):
 67 |         self.id = job_id
 68 |         self.uid = str(uuid.uuid4())[:8]
 69 |         self.logger = logger
 70 |         self.debug = debug
 71 |         self.config, self.controller = make_prodigy(self.id, project_id, settings, self.logger, self.debug)
 72 | 
 73 |     def get_project(self):
 74 |         self.logger.debug('CALLED "get_project" on job {} '.format(self.id))
 75 |         return self.config
 76 | 
 77 |     def get_questions(self):
 78 |         self.logger.debug('CALLED "get_questions" on job {} '.format(self.id))
 79 |         if self.controller.db and hasattr(self.controller.db, 'reconnect'):
 80 |             self.controller.db.reconnect()
 81 |         questions = self.controller.get_questions()
 82 |         result = {'tasks': questions, 'total': self.controller.total_annotated,
 83 |                   'progress': self.controller.progress}
 84 |         if self.controller.db and hasattr(self.controller.db, 'close'):
 85 |             self.controller.db.close()
 86 |         return result
 87 | 
 88 |     def give_answers(self, answers):
 89 |         self.logger.debug('CALLED "give_answers" on job {} with: '.format(self.id, repr(answers)))
 90 |         answers = answers['answers']
 91 |         if self.controller.db and hasattr(self.controller.db, 'reconnect'):
 92 |             self.controller.db.reconnect()
 93 |         self.controller.receive_answers(answers)
 94 |         result = {'progress': self.controller.progress}
 95 |         if self.controller.db and hasattr(self.controller.db, 'close'):
 96 |             self.controller.db.close()
 97 |         return result
 98 | 
 99 |     def get_stats(self):
100 |         self.logger.debug('CALLED "get_stats" on job {} '.format(self.id))
101 |         if self.controller.db and hasattr(self.controller.db, 'reconnect'):
102 |             self.controller.db.reconnect()
103 | 
104 |         result = {'total': self.controller.total_annotated,
105 |                   'progress': self.controller.progress,
106 |                   'accept': 0, 'reject': 0, 'ignore': 0, 'meta': 0}
107 | 
108 |         if hasattr(self.config, 'meta'):
109 |             result['meta'] = self.config['meta']
110 | 
111 |         if self.controller.db and hasattr(self.controller.db, 'close'):
112 |             self.controller.db.close()
113 |         return result
114 | 
115 | 
116 | class ProdigyFactory(object):
117 |     _jobs = {}
118 | 
119 |     def __init__(self):
120 |         self.id = str(uuid.uuid4())[:8]
121 |         logger.debug('_factory -> {}'.format(self.id))
122 | 
123 |     def get_job(self, job_id):
124 |         logger.debug('_factory -> {}'.format(self.id))
125 | 
126 |         if job_id not in self._jobs:
127 |             logger.debug('_jobs -> len: {}, repr: {}'.format(len(self._jobs), repr(self._jobs)))
128 |             raise KeyError('invalid job_id "{}"'.format(job_id))
129 | 
130 |         return self._jobs[job_id]
131 | 
132 |     def create_job(self, job_id, project_id, user_id):
133 |         logger.debug('_factory -> {}'.format(self.id))
134 | 
135 |         if job_id not in self._jobs:
136 |             logger.warning('JOB "{}" IS STARTING'.format(job_id))
137 | 
138 |             if project_id not in ['manual_general']:
139 |                 # model shoud be copied per user
140 |                 orig = '{}/{}/model_v1'.format(DATA_DIR, project_id)
141 |                 dest = '{}/{}/jobs/{}/model_v1/'.format(DATA_DIR, project_id, job_id)
142 |                 if not os.path.exists(dest):
143 |                     shutil.copytree(orig, dest)
144 | 
145 |             settings = PROJECTS[project_id]
146 | 
147 |             rep = {"{project_id}": project_id, "{user_id}": user_id, "{job_id}": job_id, "{base_path}": DATA_DIR}
148 |             rep = dict((re.escape(k), v) for k, v in rep.items())
149 |             pattern = re.compile("|".join(rep.keys()))
150 | 
151 |             for arg_name in settings['recipe_args']:
152 |                 if arg_name in ['dataset', 'spacy_model', 'source', '--patterns']:
153 |                     text = settings['recipe_args'][arg_name]
154 |                     text = pattern.sub(lambda m: rep[re.escape(m.group(0))], text)
155 |                     settings['recipe_args'][arg_name] = text
156 | 
157 |             logger.info('JOB "{}" SETTINGS: {}'.format(job_id, repr(settings)))
158 | 
159 |             self._jobs[job_id] = ProdigyJob(job_id, project_id, settings, logger, False)
160 |             job = self._jobs[job_id]
161 |             logger.warning('JOB "{}" (uid: {}) IS READY'.format(job_id, job.uid))
162 |         else:
163 |             job = self._jobs[job_id]
164 |             logger.warning('JOB "{}" (uid: {}) IS ALREADY RUNNING'.format(job_id, job.uid))
165 | 
166 |         logger.debug('_jobs -> len: {}, repr: {}'.format(len(self._jobs), repr(self._jobs)))
167 |         return job
168 | 
169 | 
170 | class ProdigyTask(celery.Task):
171 | 
172 |     def __init__(self):
173 |         self.jobs = ProdigyFactory()
174 | 
175 | 
176 | class SpacyTask(celery.Task):
177 |     _models = {}
178 | 
179 |     @property
180 |     def model(self, name):
181 |         if not name in self._models:
182 |             import spacy
183 |             self._models[name] = spacy.load(name)
184 | 
185 |         return self._models[name]
186 | 
187 | 
188 | @celery.task(bind=True, base=ProdigyTask)
189 | def start_job(self, job_id, project_id, user_id):
190 |     logger.debug('Executing task id {0.id}, args: {0.args!r} kwargs: {0.kwargs!r}'.format(self.request))
191 |     job = self.jobs.create_job(job_id, project_id, user_id)
192 |     job = self.jobs.get_job(job.id)
193 |     return self.request.hostname, job.id, job.uid
194 | 
195 | 
196 | @celery.task(bind=True, base=ProdigyTask)
197 | def get_project(self, job_id):
198 |     logger.debug('Executing task id {0.id}, args: {0.args!r} kwargs: {0.kwargs!r}'.format(self.request))
199 |     job = self.jobs.get_job(job_id)
200 |     return job.get_project()
201 | 
202 | 
203 | @celery.task(bind=True, base=ProdigyTask)
204 | def get_questions(self, job_id):
205 |     logger.debug('Executing task id {0.id}, args: {0.args!r} kwargs: {0.kwargs!r}'.format(self.request))
206 |     job = self.jobs.get_job(job_id)
207 |     return job.get_questions()
208 | 
209 | 
210 | @celery.task(bind=True, base=ProdigyTask)
211 | def give_answers(self, job_id, data):
212 |     logger.debug('Executing task id {0.id}, args: {0.args!r} kwargs: {0.kwargs!r}'.format(self.request))
213 |     job = self.jobs.get_job(job_id)
214 |     return job.give_answers(data)
215 | 
216 | 
217 | @celery.task(bind=True, base=ProdigyTask)
218 | def get_stats(self, job_id):
219 |     logger.debug('Executing task id {0.id}, args: {0.args!r} kwargs: {0.kwargs!r}'.format(self.request))
220 |     job = self.jobs.get_job(job_id)
221 |     return job.get_stats()
222 | 
223 | 
224 | @celery.task(bind=True)
225 | def train_models(self):
226 |     # train each model independently
227 |     rep = {"{project_id}": project_id, "{user_id}": user_id, "{job_id}": job_id, "{base_path}": DATA_DIR}
228 |     rep = dict((re.escape(k), v) for k, v in rep.items())
229 |     pattern = re.compile("|".join(rep.keys()))
230 | 
231 |     for project_id in PROJECTS:
232 |         settings = PROJECTS[project_id]
233 | 
234 |         for user_id in USERS:
235 |             job_id = 'prodigy.{}.{}'.format(project_id, user_id)
236 |             orig = '{}/{}/jobs/{}/model_v1/'.format(DATA_DIR, project_id, job_id)
237 |             
238 |     # train model_general 
239 | 
240 | 
241 | @celery.task(bind=True, base=SpacyTask)
242 | def get_prediction(self, text, modelname, only_ents=True):
243 |     logger.debug('Executing task id {0.id}, args: {0.args!r} kwargs: {0.kwargs!r}'.format(self.request))
244 |     nlp = self.model(modelname)
245 |     doc = nlp(text)
246 |     if only_ents:
247 |         return list(doc.ents)
248 |     else:
249 |         return doc
250 | 


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
  1 | # coding: utf8
  2 | from __future__ import unicode_literals
  3 | 
  4 | import random
  5 | import logging
  6 | from hashlib import md5
  7 | from functools import wraps
  8 | from pathlib import Path
  9 | 
 10 | from flask import Flask, g, request, Response, json, render_template, send_from_directory, session, abort, flash, redirect, url_for
 11 | from flask_cors import CORS, cross_origin
 12 | from app.settings import *
 13 | from app.database import connect, User
 14 | from app.tasks import start_job, get_project, get_questions, give_answers, get_stats
 15 | 
 16 | 
 17 | web = Flask(__name__)
 18 | web.config.from_object(__name__)
 19 | CORS(web, supports_credentials=True)
 20 | DB = connect(PRODIGY_CONFIG['db'], PRODIGY_CONFIG['db_settings'][PRODIGY_CONFIG['db']])
 21 | 
 22 | 
 23 | def auth_user(user):
 24 |     session['logged_in'] = True
 25 |     session['user_id'] = user.id
 26 |     session['username'] = user.username
 27 |     flash('You are logged in as %s' % (user.username))
 28 | 
 29 | 
 30 | def get_current_user():
 31 |     if session.get('logged_in'):
 32 |         return User.get(User.id == session['user_id'])
 33 | 
 34 | 
 35 | def login_required(f):
 36 |     @wraps(f)
 37 |     def inner(*args, **kwargs):
 38 |         if not session.get('logged_in'):
 39 |             return redirect(url_for('login'))
 40 |         return f(*args, **kwargs)
 41 |     return inner
 42 | 
 43 | 
 44 | @web.before_first_request
 45 | def setup_logging():
 46 |     if not web.debug:
 47 |         web.logger.addHandler(logging.StreamHandler())
 48 |         web.logger.setLevel(logging.INFO)
 49 | 
 50 | 
 51 | # @web.before_request
 52 | # def before_request():
 53 | #     g.db = database
 54 | #     g.db.connect()
 55 | #
 56 | #
 57 | # @web.after_request
 58 | # def after_request(response):
 59 | #     g.db.close()
 60 | #     return response
 61 | 
 62 | 
 63 | @web.route('/login/', methods=['GET', 'POST'])
 64 | def login():
 65 |     if request.method == 'POST' and request.form['username']:
 66 |         try:
 67 |             pw_hash = md5(request.form['password'].encode('utf-8')).hexdigest()
 68 |             user = User.get(
 69 |                 (User.username == request.form['username']) &
 70 |                 (User.password == pw_hash))
 71 |         except User.DoesNotExist:
 72 |             web.logger.error('[DEBUG] pass: %s, hash: %s', request.form['password'], pw_hash)
 73 |             flash('The password entered is incorrect')
 74 |         else:
 75 |             auth_user(user)
 76 |             return redirect(url_for('task_list'))
 77 | 
 78 |     return render_template('login.html')
 79 | 
 80 | 
 81 | @web.route('/logout/')
 82 | def logout():
 83 |     session.pop('logged_in', None)
 84 |     flash('You were logged out')
 85 |     return redirect(url_for('task_list'))
 86 | 
 87 | 
 88 | @web.context_processor
 89 | def _inject_user():
 90 |     return {'current_user': get_current_user()}
 91 | 
 92 | 
 93 | @web.route('/')
 94 | @login_required
 95 | def task_list():
 96 |     user = get_current_user()
 97 |     available_projects = []
 98 |     for key, value in PROJECTS.items():
 99 |         # skip if not visible, or if user is not in only_user list (if not defined all users are allowed)
100 |         if value['visible'] != True or ('only_user' in value and user.username not in value['only_user']):
101 |             continue
102 | 
103 |         item = {'name': key, 'desc': value['desc'], 'stats': []}
104 | 
105 |         if 'instructions' in value:
106 |             help_path = Path('{}/{}'.format(DATA_DIR, value['instructions']))
107 |             if help_path.is_file():
108 |                 with help_path.open('r', encoding='utf8') as f:
109 |                     item['instructions'] = f.read()
110 |             else:
111 |                 item['instructions'] = value['instructions']
112 | 
113 |         available_projects.append(item)
114 | 
115 |     context = {'base_url': BASE_URL, 'projects': available_projects}
116 |     return render_template('task_list.html', **context)
117 | 
118 | 
119 | @web.route('/login')
120 | def web_login():
121 |     context = {'base_url': BASE_URL}
122 |     content = render_template('login.html', **context)
123 |     return Response(content, mimetype='text/html')
124 | 
125 | 
126 | @web.route('/static/<path:path>')
127 | def send_static(path):
128 |     return send_from_directory('static', path)
129 | 
130 | 
131 | @web.route('/fonts/<path:path>')
132 | def send_fonts(path):
133 |     return send_from_directory('static/fonts', path)
134 | 
135 | 
136 | @web.route('/prodigy/<job>/index.html')
137 | def prodigy_index(job):
138 |     context = {'base_url': BASE_URL, 'job': job}
139 |     content = render_template('prodigy/index.html', **context)
140 |     return Response(content, mimetype='text/html')
141 | 
142 | 
143 | @web.route('/prodigy/<job_id>/project')
144 | def prodigy_get_project(job_id):
145 |     """Get the meta data and configuration of the current project.
146 |     RETURNS (dict): The configuration parameters and settings.
147 |     """
148 |     reply = get_project.apply_async(args=(job_id,),
149 |                                     queue=job_id)
150 |     result = reply.get()
151 |     # print('[CLIENT] call "get_project" for job {} responded with: {}'.format(job_id, repr(result)))
152 |     return Response(json.dumps(result), mimetype='application/json')
153 | 
154 | 
155 | @web.route('/prodigy/<job_id>/get_questions')
156 | def prodigy_get_questions(job_id):
157 |     """Get the next batch of tasks to annotate.
158 |     RETURNS (dict): {'tasks': list, 'total': int, 'progress': float}
159 |     """
160 |     reply = get_questions.apply_async(args=(job_id,),
161 |                                       queue=job_id)
162 |     result = reply.get()
163 |     # print('[CLIENT] call "get_questions" for job {} responded with: {}'.format(job_id, repr(result)))
164 |     return Response(json.dumps(result), mimetype='application/json')
165 | 
166 | 
167 | @web.route('/prodigy/<job_id>/give_answers', methods=['POST'])
168 | def prodigy_give_answers(job_id):
169 |     """Receive annotated answers, e.g. from the web app.
170 |     answers (list): A list of task dictionaries with an added `"answer"` key.
171 |     RETURNS (dict): {'progress': float}
172 |     """
173 |     if not request.is_json:
174 |         raise KeyError('answers not valid')
175 | 
176 |     data = request.get_json(force=True, cache=False)
177 |     reply = give_answers.apply_async(args=(job_id, data),
178 |                                       queue=job_id)
179 |     result = reply.get()
180 |     # print('[CLIENT] call "get_questions" for job {} responded with: {}'.format(job_id, repr(result)))
181 |     return Response(json.dumps(result), mimetype='application/json')
182 | 
183 | 
184 | @web.route('/prodigy/<path:path>')
185 | def prodigy_static(path):
186 |     from prodigy.app import serve_static
187 |     base = serve_static()
188 |     return send_from_directory(base[0], path)
189 | 
190 | 
191 | @web.route("/api/project")
192 | def project_list():
193 |     user = None
194 |     content = []
195 |     for key, value in PROJECTS.items():
196 |         # skip if not visible, or if user is not in only_user list (if not defined all users are allowed)
197 |         if value['visible'] != True or ('only_user' in value and user.username not in value['only_user']):
198 |             continue
199 | 
200 |         item = {'name': key, 'desc': value['desc']}
201 | 
202 |         if 'instructions' in value:
203 |             help_path = Path('{}/{}'.format(DATA_DIR, value['instructions']))
204 |             if help_path.is_file():
205 |                 with help_path.open('r', encoding='utf8') as f:
206 |                     item['instructions'] = f.read()
207 |             else:
208 |                 item['instructions'] = value['instructions']
209 | 
210 |         content.append(item)
211 | 
212 |     return Response(json.dumps(content), mimetype='application/json')
213 | 
214 | 
215 | @web.route("/api/project/<project_id>/stats/<user_id>")
216 | def project_stat_for_user(project_id, user_id):
217 |     job_id = 'prodigy.{}.{}'.format(project_id, user_id)
218 | 
219 |     reply = get_stats.apply_async(args=(job_id,),
220 |                                   queue='prodigy',
221 |                                   routing_key=job_id)
222 |     result = reply.get()
223 |     # print('[CLIENT] call "get_stats" for job {} responded with: {}'.format(job_id, repr(result)))
224 | 
225 |     return Response(json.dumps(result), mimetype='application/json')
226 | 
227 | 
228 | @web.route("/api/project/<project>/comments/<username>", methods=['GET', 'POST'])
229 | def project_comments_for_user(project, username):
230 |     content = {'comments': ''}
231 |     return Response(json.dumps(content), mimetype='application/json')
232 | 
233 | 
234 | @web.route("/api/project/<project_id>/start_job/<user_id>")
235 | def project_create_job(project_id, user_id):
236 |     job_id = 'prodigy.{}.{}'.format(project_id, user_id)
237 | 
238 |     # retry = 3
239 |     # while retry > 0:
240 |     #     workers = app.control.inspect().stats()
241 |     #     web.logger.info('[API-INFO] stats: %r', workers)
242 |     #
243 |     #     if len(workers) == 0:
244 |     #         retry -= 1
245 |     #         web.logger.info('[API-INFO] no active workers. retry %i', retry)
246 |     #         time.sleep(0.1)
247 |     #     else:
248 |     #         break
249 | 
250 |     workers = celery.control.inspect().stats()
251 |     web.logger.debug('[DEBUG] workers: %r', workers)
252 | 
253 |     queues = celery.control.inspect().active_queues()
254 |     web.logger.debug('[DEBUG] active_queues: %r', queues)
255 | 
256 |     active_queue = None
257 |     for worker in queues:
258 |         for queue in queues[worker]:
259 |             if queue['name'] == job_id:
260 |                 active_queue = (worker, queue)
261 |                 break
262 | 
263 |     if active_queue is None:
264 |         worker, _ = random.choice(list(workers.items()))
265 |         web.logger.info('[INFO] bind queue %s to worker %s', job_id, worker)
266 |         reply = celery.control.add_consumer(
267 |             destination=(worker,),
268 |             queue=job_id,
269 |             exchange='prodigy',
270 |             exchange_type='direct',
271 |             options={
272 |                 'queue_durable': True,
273 |                 'exchange_durable': True,
274 |             },
275 |             reply=True)
276 |         web.logger.debug('[INFO] bind result: %r', reply)
277 | 
278 |         reply = start_job.apply_async((job_id, project_id, user_id), queue=job_id)
279 |         hostname, job_id, job_uid = reply.get()
280 |         web.logger.info('[INFO] response from control - hostname: %s, job_id: %s, job_uid: %s', hostname, job_id, job_uid)
281 |     else:
282 |         web.logger.info('[INFO] running in worker: %s', active_queue[0])
283 | 
284 |     result = {'url': '/prodigy/{}/index.html'.format(job_id)}
285 |     return Response(json.dumps(result), mimetype='application/json')
286 | 
287 | 
288 | @web.route("/api/user/login", methods=['POST'])
289 | def user_login():
290 |     username = request.form['username']
291 | 
292 |     if not username or username not in USERS:
293 |         raise KeyError('username not valid!')
294 | 
295 |     user = USERS[username]
296 |     password = request.form['password']
297 | 
298 |     if not password or password != user['password']:
299 |         raise KeyError('password not valid!')
300 | 
301 |     content = {'token': username, 'name': user['name']}
302 |     return Response(json.dumps(content), mimetype='application/json')
303 | 
304 | 
305 | @web.route("/api/user/logout")
306 | def user_logout():
307 |     content = {'status': 'ok'}
308 |     return Response(json.dumps(content), mimetype='application/json')
309 | 
310 | 
311 | if __name__ == '__main__':
312 |     web.run(debug=True, host='0.0.0.0', port=8080)
313 | 


--------------------------------------------------------------------------------
/src/recipes/.gitignore:
--------------------------------------------------------------------------------
  1 | tmp/
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # Environments
 84 | .env
 85 | .venv
 86 | env/
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/src/recipes/README.md:
--------------------------------------------------------------------------------
 1 | <a href="https://explosion.ai"><img src="https://explosion.ai/assets/img/logo.svg" width="125" height="125" align="right" /></a>
 2 | 
 3 | # Prodigy Recipes
 4 | 
 5 | This repository contains a collection of recipes for [Prodigy](https://prodi.gy),
 6 | our scriptable annotation tool for text, images and other data. In order to use
 7 | this repo, you'll need a license for Prodigy –
 8 | [see this page](https://prodi.gy/buy) for more details. For questions and bug
 9 | reports, please use the [Prodigy Support Forum](https://support.prodi.gy). If
10 | you've found a mistake or bug, feel free to submit a
11 | [pull request](https://github.com/explosion/prodigy-recipes/pulls).
12 | 
13 | > ✨ **Important note:** The recipes in this repository aren't 100% identical to
14 | > the built-in recipes shipped with Prodigy. They've been edited to include
15 | > comments and more information, and some of them have been simplified to make
16 | > it easier to follow what's going on, and to use them as the basis for a
17 | > custom recipe.
18 | 
19 | ## 📋 Usage
20 | 
21 | Once Prodigy is installed, you should be able to run the `prodigy` command from
22 | your terminal, either directly or via `python -m`:
23 | 
24 | ```bash
25 | python -m prodigy
26 | ```
27 | 
28 | The `prodigy` command lists the built-in recipes. To use a custom recipe script,
29 | simply pass the path to the file using the `-F` argument:
30 | 
31 | ```bash
32 | python -m prodigy -F prodigy-recipes/ner/ner_teach.py
33 | ```
34 | 
35 | Now you can edit the code in your `ner_teach.py` command, to customize how
36 | Prodigy behaves.
37 | 
38 | ### Some things to try
39 | 
40 | * Try replacing `prefer_uncertain()` with `prefer_high_scores()`.
41 | * Try writing a custom sorting function. It just needs to be a generator that
42 |   yields a sequence of `example` dicts, given a sequence of `(score, example)` tuples.
43 | * Try adding a filter that drops some questions from the stream. For instance,
44 |   try writing a filter that only asks you questions where the entity is two
45 |   words long.
46 | * Try customizing the `update()` callback, to include extra logging or extra
47 |   functionality.
48 | 
49 | ## 🍳 Recipes
50 | 
51 | ### Named Entity Recognition
52 | 
53 | | Recipe | Description |
54 | | --- | --- |
55 | | [`ner.teach`](ner/ner_teach.py) | Collect the best possible training data for a named entity recognition model with the model in the loop. Based on your annotations, Prodigy will decide which questions to ask next. |
56 | | [`ner.match`](ner/ner_match.py) | Suggest phrases that match a given patterns file, and mark whether they are examples of the entity you're interested in. The patterns file can include exact strings or token patterns for use with spaCy's `Matcher`. |
57 | | [`ner.manual`](ner/ner_manual.py) | Mark spans manually by token. Requires only a tokenizer and no entity recognizer, and doesn't do any active learning. |
58 | | [`ner.make-gold`](ner/ner_make-gold.py) | Create gold-standard data by correcting a model's predictions manually. |
59 | | [`ner.silver-to-gold`](ner/ner_silver_to_gold.py) | Take an existing "silver" dataset with binary accept/reject annotations, merge the annotations to find the best possible analysis given the constraints defined in the annotations, and manually edit it to create a perfect and complete "gold" dataset. |
60 | 
61 | ### Text Classification
62 | 
63 | | Recipe | Description |
64 | | --- | --- |
65 | | [`textcat.teach`](textcat/textcat_teach.py) | Collect the best possible training data for a text classification model with the model in the loop. Based on your annotations, Prodigy will decide which questions to ask next. |
66 | | [`textcat.custom-model`](textcat/textcat_custom_model.py) | Use active learning-powered text classification with a custom model. To demonstrate how it works, this demo recipe uses a simple dummy model that "predicts" random scores. But you can swap it out for any model of your choice, for example a text classification model implementation using PyTorch, TensorFlow or scikit-learn. |
67 | 
68 | ### Terminology
69 | 
70 | | Recipe | Description |
71 | | --- | --- |
72 | | [`terms.teach`](terms/terms_teach.py) | Bootstrap a terminology list with word vectors and seeds terms. Prodigy will suggest similar terms based on the word vectors, and update the target vector accordingly. |
73 | 
74 | ### Image
75 | 
76 | | Recipe | Description |
77 | | --- | --- |
78 | | [`image.manual`](image/image_manual.py) | Manually annotate images by drawing rectangular bounding boxes or polygon shapes on the image. |
79 | 
80 | ### Other
81 | 
82 | | Recipe | Description |
83 | | --- | --- |
84 | | [`mark`](other/mark.py) | Click through pre-prepared examples, with no model in the loop. |
85 | | [`choice`](other/choice.py) | Annotate data with multiple-choice options. The annotated examples will have an additional property `"accept": []` mapping to the ID(s) of the selected option(s). |
86 | 
87 | ## 📚 Example Datasets and Patterns
88 | 
89 | To make it even easier to get started, we've also included a few
90 | [`example-datasets`](example-datasets), both raw data as well as data containing
91 | annotations created with Prodigy. For examples of token-based match patterns to
92 | use with recipes like `ner.teach` or `ner.match`, see the
93 | [`example-patterns`](example-patterns) directory.
94 | 


--------------------------------------------------------------------------------
/src/recipes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/recipes/__init__.py


--------------------------------------------------------------------------------
/src/recipes/example-datasets/raw_news-headlines-nyt.jsonl:
--------------------------------------------------------------------------------
  1 | {"text":"Uber\u2019s Lesson: Silicon Valley\u2019s Start-Up Machine Needs Fixing","meta":{"source":"The New York Times"}}
  2 | {"text":"Pearl Automation, Founded by Apple Veterans, Shuts Down","meta":{"source":"The New York Times"}}
  3 | {"text":"How Silicon Valley Pushed Coding Into American Classrooms","meta":{"source":"The New York Times"}}
  4 | {"text":"Women in Tech Speak Frankly on Culture of Harassment","meta":{"source":"The New York Times"}}
  5 | {"text":"Silicon Valley Investors Flexed Their Muscles in Uber Fight","meta":{"source":"The New York Times"}}
  6 | {"text":"Uber is a Creature of an Industry Struggling to Grow Up","meta":{"source":"The New York Times"}}
  7 | {"text":"\u2018The Internet Is Broken\u2019: @ev Is Trying to Salvage It","meta":{"source":"The New York Times"}}
  8 | {"text":"The South Park Commons Fills a Hole in the Tech Landscape","meta":{"source":"The New York Times"}}
  9 | {"text":"The Closing of the Republican Mind","meta":{"source":"The New York Times"}}
 10 | {"text":"Writers From the Right and Left on Trump Jr., the Future of the F.B.I., Health Care and More","meta":{"source":"The New York Times"}}
 11 | {"text":"Daily Report: From Lean to Fat Start-Ups","meta":{"source":"The New York Times"}}
 12 | {"text":"How Uber\u2019s Chief Is Gaining Even More Clout in the Company","meta":{"source":"The New York Times"}}
 13 | {"text":"As New Zealand Courts Tech Talent, Isolation Becomes a Draw","meta":{"source":"The New York Times"}}
 14 | {"text":"One Thing Silicon Valley Can\u2019t Seem to Fix","meta":{"source":"The New York Times"}}
 15 | {"text":"In Silicon Valley, a Voice of Caution Guides a High-Flying Uber","meta":{"source":"The New York Times"}}
 16 | {"text":"Silicon Valley Writes a Protest Letter Against Trump","meta":{"source":"The New York Times"}}
 17 | {"text":"Warriors, Tech\u2019s Team, Are Soaring Out of Reach","meta":{"source":"The New York Times"}}
 18 | {"text":"Silicon Valley\u2019s Most Elusive Beast","meta":{"source":"The New York Times"}}
 19 | {"text":"Wall Street and Silicon Valley Form an Uneasy Alliance","meta":{"source":"The New York Times"}}
 20 | {"text":"Tim O'Reilly Explains the Internet of Things","meta":{"source":"The New York Times"}}
 21 | {"text":"Alibaba I.P.O. May Unleash Global Fight Over Users","meta":{"source":"The New York Times"}}
 22 | {"text":"Disruptions: Looking Beyond Silicon Valley's Bubble","meta":{"source":"The New York Times"}}
 23 | {"text":"Twitter Outages Linked to Glitches and Site Upgrade","meta":{"source":"The New York Times"}}
 24 | {"text":"Out of the Loop in Silicon Valley","meta":{"source":"The New York Times"}}
 25 | {"text":"A Determined Outpost of Tiny Technology","meta":{"source":"The New York Times"}}
 26 | {"text":"In Silicon Valley, Recruiters Are Sending Out Their Own R\u00e9sum\u00e9s","meta":{"source":"The New York Times"}}
 27 | {"text":"Credit Crisis Spreads a Pall Over Silicon Valley","meta":{"source":"The New York Times"}}
 28 | {"text":"As Department Stores Close, Stitch Fix Expands Online","meta":{"source":"The New York Times"}}
 29 | {"text":"Tech Incubators on a Mission of Diversity","meta":{"source":"The New York Times"}}
 30 | {"text":"Meal-Delivery Start-Ups Take Aim at Your Dinner","meta":{"source":"The New York Times"}}
 31 | {"text":"If a Bubble Bursts in Palo Alto, Does It Make a Sound?","meta":{"source":"The New York Times"}}
 32 | {"text":"Web\u2019s Reach Binds N.S.A. and Silicon Valley Leaders","meta":{"source":"The New York Times"}}
 33 | {"text":"Daily Report: Other Nations Offer Visas to Compete With Silicon Valley","meta":{"source":"The New York Times"}}
 34 | {"text":"Bill Maher Concert Will Introduce New Yahoo! Comedy Channel","meta":{"source":"The New York Times"}}
 35 | {"text":"A Silicon Valley Dream Grows in Guatemala, Despite the Risks","meta":{"source":"The New York Times"}}
 36 | {"text":"One Country, Two Revolutions","meta":{"source":"The New York Times"}}
 37 | {"text":"The Class That Built Apps, and Fortunes","meta":{"source":"The New York Times"}}
 38 | {"text":"E-Ties That Bind","meta":{"source":"The New York Times"}}
 39 | {"text":"Tech Recruiting Clashes With Immigration Rules","meta":{"source":"The New York Times"}}
 40 | {"text":"On Day Care, Google Makes a Rare Fumble","meta":{"source":"The New York Times"}}
 41 | {"text":"Where Computer Artifacts Come Alive","meta":{"source":"The New York Times"}}
 42 | {"text":"Can Green Make Green?","meta":{"source":"The New York Times"}}
 43 | {"text":"In Silicon Valley, the Crash Seems Like Just Yesterday","meta":{"source":"The New York Times"}}
 44 | {"text":"\u2018I\u2019m Here to Help,\u2019 Trump Tells Tech Executives at Meeting","meta":{"source":"The New York Times"}}
 45 | {"text":"Peter Thiel\u2019s Bet on Donald Trump Pays Off","meta":{"source":"The New York Times"}}
 46 | {"text":"Peter Thiel Defends His Most Contrarian Move Yet: Supporting Trump","meta":{"source":"The New York Times"}}
 47 | {"text":"What It\u2019s Like to Fight Online Hate","meta":{"source":"The New York Times"}}
 48 | {"text":"Start-Ups Once Showered With Cash Now Have to Work for It","meta":{"source":"The New York Times"}}
 49 | {"text":"European Tech Scene Begins to Feel Silicon Valley\u2019s Woes","meta":{"source":"The New York Times"}}
 50 | {"text":"Toyota Invests $1 Billion in Artificial Intelligence in U.S.","meta":{"source":"The New York Times"}}
 51 | {"text":"Farhad and Mike\u2019s Week in Review: Twitter Problems","meta":{"source":"The New York Times"}}
 52 | {"text":"Steep Discounts a Boon for Customers, but a Gamble for Start-Ups","meta":{"source":"The New York Times"}}
 53 | {"text":"Apple's Diversity Mirrors Other Tech Companies'   ","meta":{"source":"The New York Times"}}
 54 | {"text":"Building a Better Battery","meta":{"source":"The New York Times"}}
 55 | {"text":"Disruptions: If It Looks Like a Bubble and Floats Like a Bubble ...","meta":{"source":"The New York Times"}}
 56 | {"text":"Disruptions: Even the Tech Elites Leave Gadgets Behind","meta":{"source":"The New York Times"}}
 57 | {"text":"Latest Product From Tech Firms: An Immigration Bill","meta":{"source":"The New York Times"}}
 58 | {"text":"Migrant Bill Seems to Fit Tech Sector Wish List","meta":{"source":"The New York Times"}}
 59 | {"text":"The Internet Gets Physical","meta":{"source":"The New York Times"}}
 60 | {"text":"The War Between Apple and Google Has Just Begun","meta":{"source":"The New York Times"}}
 61 | {"text":"Site Lets Investors See and Copy Experts\u2019 Trades","meta":{"source":"The New York Times"}}
 62 | {"text":"Amid Conference Halls and Keynote Speakers, a Rivalry Forms","meta":{"source":"The New York Times"}}
 63 | {"text":"Economy Has Become a Drag on Silicon Valley","meta":{"source":"The New York Times"}}
 64 | {"text":"Yahoo Decides to Release a Rosy Forecast","meta":{"source":"The New York Times"}}
 65 | {"text":"Silicon Valley Losing Middle-Wage Jobs","meta":{"source":"The New York Times"}}
 66 | {"text":"The Working-Class Millionaire","meta":{"source":"The New York Times"}}
 67 | {"text":"Andy Grove\u2019s Warning to Silicon Valley","meta":{"source":"The New York Times"}}
 68 | {"text":"Caution Rebuffed, Unicorns and Other Start-Ups Fixate on Rainbows","meta":{"source":"The New York Times"}}
 69 | {"text":"Kleiner Perkins, Disrupted","meta":{"source":"The New York Times"}}
 70 | {"text":"When Uber and Airbnb Meet the Real World","meta":{"source":"The New York Times"}}
 71 | {"text":"Yahoo Reveals Work Force Data, Joining Tech's Small Diversity Parade","meta":{"source":"The New York Times"}}
 72 | {"text":"Disruptions: The Echo Chamber of Silicon Valley","meta":{"source":"The New York Times"}}
 73 | {"text":"TimesCast Tech: Career and Family in Silicon Valley","meta":{"source":"The New York Times"}}
 74 | {"text":"Steve Jobs: He Brought the Show to Business","meta":{"source":"The New York Times"}}
 75 | {"text":"For Start-Ups, Late Bursts of Private Cash","meta":{"source":"The New York Times"}}
 76 | {"text":"AOL, Seeking to Revitalize Its Internet Role, Turns to a Candid Ex-Yahoo Executive","meta":{"source":"The New York Times"}}
 77 | {"text":"Buy.com Deal With eBay Angers Sellers","meta":{"source":"The New York Times"}}
 78 | {"text":"Silicon Valley Starts to Turn Its Face to the Sun","meta":{"source":"The New York Times"}}
 79 | {"text":"Silicon Valley\u2019s High-Tech Hunt for Colleague","meta":{"source":"The New York Times"}}
 80 | {"text":"Twitter\u2019s Troubles and Snap\u2019s Appeal: It\u2019s All About the Mojo","meta":{"source":"The New York Times"}}
 81 | {"text":"A Rarity at a Republican Convention: \u2018I Am Proud to Be Gay\u2019","meta":{"source":"The New York Times"}}
 82 | {"text":"Pinterest Hires Its First Head of Diversity","meta":{"source":"The New York Times"}}
 83 | {"text":"Daily Report: Narendra Modi, Indian Prime Minister, Conquers Silicon Valley","meta":{"source":"The New York Times"}}
 84 | {"text":"Inside Amazon: Reporter\u2019s Notebook","meta":{"source":"The New York Times"}}
 85 | {"text":"Hot Tech Start-Ups May Face a Long and Bumpy Fall","meta":{"source":"The New York Times"}}
 86 | {"text":"White House Takes Cybersecurity Pitch to Silicon Valley ","meta":{"source":"The New York Times"}}
 87 | {"text":"The PayPal Mafia\u2019s Golden Touch","meta":{"source":"The New York Times"}}
 88 | {"text":"What We're Reading","meta":{"source":"The New York Times"}}
 89 | {"text":"Will Boom Lead to Bust in Silicon Valley? ","meta":{"source":"The New York Times"}}
 90 | {"text":"Tech Industry Flexes Muscle in California Race","meta":{"source":"The New York Times"}}
 91 | {"text":"Disruptions: A Blogger Mocks the Denizens of Silicon Valley ","meta":{"source":"The New York Times"}}
 92 | {"text":"How Big Data Is Playing Recruiter for Specialized Workers","meta":{"source":"The New York Times"}}
 93 | {"text":"Daily Report: Multinationals Push Into Venture Capital","meta":{"source":"The New York Times"}}
 94 | {"text":"As Silicon Valley Cheers Yahoo Chief, Wall Street's Reaction Is Muted","meta":{"source":"The New York Times"}}
 95 | {"text":"Old Techies Never Die; They Just Can\u2019t Get Hired as an Industry Moves On","meta":{"source":"The New York Times"}}
 96 | {"text":"A Silicon Bubble Shows Signs of Reinflating","meta":{"source":"The New York Times"}}
 97 | {"text":"A Start-Up Says It Can Predict Others\u2019 Fate","meta":{"source":"The New York Times"}}
 98 | {"text":"More Than Games, a Net to Snare Social Networkers","meta":{"source":"The New York Times"}}
 99 | {"text":"Silicon Valley Shaped by Technology and Traffic","meta":{"source":"The New York Times"}}
100 | {"text":"In Silicon Valley, Millionaires Who Don\u2019t Feel Rich","meta":{"source":"The New York Times"}}
101 | {"text":"Lots of Froth but No Bubble","meta":{"source":"The New York Times"}}
102 | {"text":"Start-Up Fervor Shifts to Energy in Silicon Valley","meta":{"source":"The New York Times"}}
103 | {"text":"VeriSign Moves to Address an Internet Security Problem","meta":{"source":"The New York Times"}}
104 | {"text":"China, Not Silicon Valley, Is Cutting Edge in Mobile Tech","meta":{"source":"The New York Times"}}
105 | {"text":"Women in Tech Band Together to Track Diversity, After Hours","meta":{"source":"The New York Times"}}
106 | {"text":"Facebook\u2019s Developer Conference Kicks Off","meta":{"source":"The New York Times"}}
107 | {"text":"An Anonymous Satire of Silicon Valley Now Has a Publisher","meta":{"source":"The New York Times"}}
108 | {"text":"Silicon Valley Bank Strengthens Its Roots","meta":{"source":"The New York Times"}}
109 | {"text":"Daily Report: Rivalry Brewing Between Uber and Google","meta":{"source":"The New York Times"}}
110 | {"text":"Behind the Cover Story: Yiren Lu on the Angst, Perks and Failures of the Tech Industry","meta":{"source":"The New York Times"}}
111 | {"text":"Thorny Side Effects in Silicon Valley Tactic to Keep Control","meta":{"source":"The New York Times"}}
112 | {"text":"Is New York's Tech Boom Sustainable?","meta":{"source":"The New York Times"}}
113 | {"text":"Internet Giants Foster, and Threaten, Innovation Economy","meta":{"source":"The New York Times"}}
114 | {"text":"Following Venture Capital for Signs of Tech to Come","meta":{"source":"The New York Times"}}
115 | {"text":"At Social Site, Only the Businesslike Need Apply","meta":{"source":"The New York Times"}}
116 | {"text":"When It Comes to Innovation, Geography Is Destiny","meta":{"source":"The New York Times"}}
117 | {"text":"Tech Billionaire Backing Trump Suggests Silicon Valley Is Out of Touch","meta":{"source":"The New York Times"}}
118 | {"text":"How Silicon Valley Treats a Trump Backer: Peter Thiel","meta":{"source":"The New York Times"}}
119 | {"text":"Parents Ready for Some Love From Silicon Valley Companies","meta":{"source":"The New York Times"}}
120 | {"text":"Scrutiny of Security Start-Ups May Signal Shift in Venture Funding","meta":{"source":"The New York Times"}}
121 | {"text":"Marissa Mayer of Yahoo Says She&#8217;s Pregnant With Twins","meta":{"source":"The New York Times"}}
122 | {"text":"A Novel Prompts a Conversation About How We Use Technology","meta":{"source":"The New York Times"}}
123 | {"text":"Tech Industry Sets Its Sights on Gambling","meta":{"source":"The New York Times"}}
124 | {"text":"Techies Break a Fashion Taboo","meta":{"source":"The New York Times"}}
125 | {"text":"Back to the Future: The Netscape and Google I.P.O.'s","meta":{"source":"The New York Times"}}
126 | {"text":"Where Are the Women Executives in Silicon Valley?","meta":{"source":"The New York Times"}}
127 | {"text":"For Buyers of Web Start-Ups, Quest to Corral Young Talent","meta":{"source":"The New York Times"}}
128 | {"text":"Digital Muse for Beat Poet","meta":{"source":"The New York Times"}}
129 | {"text":"Amid Conference Halls and Keynote Speakers, a Rivalry Forms","meta":{"source":"The New York Times"}}
130 | {"text":"Hostility Has Its Rewards","meta":{"source":"The New York Times"}}
131 | {"text":"Going Public Caps Dream for a Maker of Software","meta":{"source":"The New York Times"}}
132 | {"text":"Next Job for Obama? Silicon Valley Is Hiring","meta":{"source":"The New York Times"}}
133 | {"text":"Airbnb and Others Set Terms for Employees to Cash Out","meta":{"source":"The New York Times"}}
134 | {"text":"Google and Apple: the High-Tech Hippies of Silicon Valley","meta":{"source":"The New York Times"}}
135 | {"text":"Silicon Valley, Seeking Diversity, Focuses on Blacks","meta":{"source":"The New York Times"}}
136 | {"text":"As More Tech Start-Ups Stay Private, So Does the Money","meta":{"source":"The New York Times"}}
137 | {"text":"Want a Steady Income? There\u2019s an App for That","meta":{"source":"The New York Times"}}
138 | {"text":"Investing Early On for Insights, Not Profits","meta":{"source":"The New York Times"}}
139 | {"text":"Angel Investors Lend Expertise as Well as Cash","meta":{"source":"The New York Times"}}
140 | {"text":"Deal Makers Invade CES, the Land of Geeks","meta":{"source":"The New York Times"}}
141 | {"text":"Techies Break a Fashion Taboo","meta":{"source":"The New York Times"}}
142 | {"text":"Back to the Future: The Netscape and Google I.P.O.'s","meta":{"source":"The New York Times"}}
143 | {"text":"Where Are the Women Executives in Silicon Valley?","meta":{"source":"The New York Times"}}
144 | {"text":"For Buyers of Web Start-Ups, Quest to Corral Young Talent","meta":{"source":"The New York Times"}}
145 | {"text":"Digital Muse for Beat Poet","meta":{"source":"The New York Times"}}
146 | {"text":"Amid Conference Halls and Keynote Speakers, a Rivalry Forms","meta":{"source":"The New York Times"}}
147 | {"text":"Hostility Has Its Rewards","meta":{"source":"The New York Times"}}
148 | {"text":"Going Public Caps Dream for a Maker of Software","meta":{"source":"The New York Times"}}
149 | {"text":"Next Job for Obama? Silicon Valley Is Hiring","meta":{"source":"The New York Times"}}
150 | {"text":"Airbnb and Others Set Terms for Employees to Cash Out","meta":{"source":"The New York Times"}}
151 | {"text":"In Surveillance Debate, White House Turns Its Focus to Silicon Valley","meta":{"source":"The New York Times"}}
152 | {"text":"Silicon Valley\u2019s Youth Problem","meta":{"source":"The New York Times"}}
153 | {"text":"Zuckerberg Remains the Undisputed Boss at Facebook","meta":{"source":"The New York Times"}}
154 | {"text":"A Corporate Campus Made to Mirror Facebook","meta":{"source":"The New York Times"}}
155 | {"text":"One on One: Tim Wu, Author of 'The Master Switch'","meta":{"source":"The New York Times"}}
156 | {"text":"Spinning the Web: P.R. in Silicon Valley","meta":{"source":"The New York Times"}}
157 | {"text":"To Survive, Net Start-Ups Slow Their Metabolism","meta":{"source":"The New York Times"}}
158 | {"text":"Attending to the Needs of the Too-Busy","meta":{"source":"The New York Times"}}
159 | {"text":"Silicon Valley Start-Ups Awash in Dollars, Again","meta":{"source":"The New York Times"}}
160 | {"text":"New Social Sites Cater to People of a Certain Age","meta":{"source":"The New York Times"}}
161 | {"text":"In Surveillance Debate, White House Turns Its Focus to Silicon Valley","meta":{"source":"The New York Times"}}
162 | {"text":"Silicon Valley\u2019s Youth Problem","meta":{"source":"The New York Times"}}
163 | {"text":"Zuckerberg Remains the Undisputed Boss at Facebook","meta":{"source":"The New York Times"}}
164 | {"text":"A Corporate Campus Made to Mirror Facebook","meta":{"source":"The New York Times"}}
165 | {"text":"One on One: Tim Wu, Author of 'The Master Switch'","meta":{"source":"The New York Times"}}
166 | {"text":"Spinning the Web: P.R. in Silicon Valley","meta":{"source":"The New York Times"}}
167 | {"text":"To Survive, Net Start-Ups Slow Their Metabolism","meta":{"source":"The New York Times"}}
168 | {"text":"Attending to the Needs of the Too-Busy","meta":{"source":"The New York Times"}}
169 | {"text":"Silicon Valley Start-Ups Awash in Dollars, Again","meta":{"source":"The New York Times"}}
170 | {"text":"New Social Sites Cater to People of a Certain Age","meta":{"source":"The New York Times"}}
171 | {"text":"Demand for Data Puts Engineers in Spotlight","meta":{"source":"The New York Times"}}
172 | {"text":"Yahoo Sale Could Be Bad for Minnows","meta":{"source":"The New York Times"}}
173 | {"text":"Silicon Valley Helped Create Trump, and That\u2019s Bad for It","meta":{"source":"The New York Times"}}
174 | {"text":"Peter Thiel\u2019s Embrace of Trump Has Silicon Valley Squirming","meta":{"source":"The New York Times"}}
175 | {"text":"In Silicon Valley, a Divide in Income and Politics","meta":{"source":"The New York Times"}}
176 | {"text":"The Art of Failing Upward","meta":{"source":"The New York Times"}}
177 | {"text":"Daily Report: The Tech News Cycle is About to Ramp Up","meta":{"source":"The New York Times"}}
178 | {"text":"Netromancy","meta":{"source":"The New York Times"}}
179 | {"text":"Unicorns Hunt for Talent Among Silicon Valley\u2019s Giants","meta":{"source":"The New York Times"}}
180 | {"text":"Overvalued in Silicon Valley, but Don\u2019t Say \u2018Tech Bubble\u2019","meta":{"source":"The New York Times"}}
181 | {"text":"Managers Turn to Computer Games, Aiming for More Efficient Employees","meta":{"source":"The New York Times"}}
182 | {"text":"Stanford Women and Silicon Valley","meta":{"source":"The New York Times"}}
183 | {"text":"What It Means to Be a \u2018Dad\u2019","meta":{"source":"The New York Times"}}
184 | {"text":"What We're Reading","meta":{"source":"The New York Times"}}
185 | {"text":"A Golden Age of Design","meta":{"source":"The New York Times"}}
186 | {"text":"Court Rejects Deal on Hiring in Silicon Valley","meta":{"source":"The New York Times"}}
187 | {"text":"What the Beats Deal Says About Apple: It Loves Tastemakers","meta":{"source":"The New York Times"}}
188 | {"text":"Search for a Market Niche, and You Might Find a Crowd","meta":{"source":"The New York Times"}}
189 | {"text":"Search for the 'Next Big Thing' Yields Soaring Valuations","meta":{"source":"The New York Times"}}
190 | {"text":"Shifting Tech Scene Unsettles Big Players","meta":{"source":"The New York Times"}}
191 | {"text":"Silicon Valley\u2019s Start-Up Machine","meta":{"source":"The New York Times"}}
192 | {"text":"Opening a Gateway for Girls to Enter the Computer Field","meta":{"source":"The New York Times"}}
193 | {"text":"Silicon Valley Says Step Away From the Device","meta":{"source":"The New York Times"}}
194 | {"text":"Like the Apple Store Nearby, but This One Has Windows","meta":{"source":"The New York Times"}}
195 | {"text":"A King of Connections Is Tech\u2019s Go-To Guy","meta":{"source":"The New York Times"}}
196 | {"text":"One on One: Jaron Lanier","meta":{"source":"The New York Times"}}
197 | {"text":"6 Months, $90,000 and (Maybe) a Great Idea","meta":{"source":"The New York Times"}}
198 | {"text":"Microsoft\u2019s Failed Yahoo Bid Risks Online Growth","meta":{"source":"The New York Times"}}
199 | {"text":"Why Old Technologies Are Still Kicking","meta":{"source":"The New York Times"}}
200 | {"text":"Trying to Add a Pulse to a World of Machines","meta":{"source":"The New York Times"}}
201 | 


--------------------------------------------------------------------------------
/src/recipes/example-patterns/patterns_countries-GPE.jsonl:
--------------------------------------------------------------------------------
   1 | {"label": "GPE", "pattern": [{"lower": "afghanistan"}]}
   2 | {"label": "GPE", "pattern": [{"lower": "af"}]}
   3 | {"label": "GPE", "pattern": [{"lower": "afġānistān"}]}
   4 | {"label": "GPE", "pattern": [{"lower": "åland"}, {"lower": "islands"}]}
   5 | {"label": "GPE", "pattern": [{"lower": "ax"}]}
   6 | {"label": "GPE", "pattern": [{"lower": "aaland"}]}
   7 | {"label": "GPE", "pattern": [{"lower": "aland"}]}
   8 | {"label": "GPE", "pattern": [{"lower": "ahvenanmaa"}]}
   9 | {"label": "GPE", "pattern": [{"lower": "albania"}]}
  10 | {"label": "GPE", "pattern": [{"lower": "al"}]}
  11 | {"label": "GPE", "pattern": [{"lower": "shqipëri"}]}
  12 | {"label": "GPE", "pattern": [{"lower": "shqipëria"}]}
  13 | {"label": "GPE", "pattern": [{"lower": "shqipnia"}]}
  14 | {"label": "GPE", "pattern": [{"lower": "algeria"}]}
  15 | {"label": "GPE", "pattern": [{"lower": "dz"}]}
  16 | {"label": "GPE", "pattern": [{"lower": "dzayer"}]}
  17 | {"label": "GPE", "pattern": [{"lower": "algérie"}]}
  18 | {"label": "GPE", "pattern": [{"lower": "american"}, {"lower": "samoa"}]}
  19 | {"label": "GPE", "pattern": [{"lower": "as"}]}
  20 | {"label": "GPE", "pattern": [{"lower": "amerika"}, {"lower": "sāmoa"}]}
  21 | {"label": "GPE", "pattern": [{"lower": "amelika"}, {"lower": "sāmoa"}]}
  22 | {"label": "GPE", "pattern": [{"lower": "sāmoa"}, {"lower": "amelika"}]}
  23 | {"label": "GPE", "pattern": [{"lower": "andorra"}]}
  24 | {"label": "GPE", "pattern": [{"lower": "ad"}]}
  25 | {"label": "GPE", "pattern": [{"lower": "principality"}, {"lower": "of"}, {"lower": "andorra"}]}
  26 | {"label": "GPE", "pattern": [{"lower": "principat"}, {"lower": "d'andorra"}]}
  27 | {"label": "GPE", "pattern": [{"lower": "angola"}]}
  28 | {"label": "GPE", "pattern": [{"lower": "ao"}]}
  29 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "angola"}]}
  30 | {"label": "GPE", "pattern": [{"lower": "ʁɛpublika"}, {"lower": "de"}, {"lower": "an'ɡɔla"}]}
  31 | {"label": "GPE", "pattern": [{"lower": "anguilla"}]}
  32 | {"label": "GPE", "pattern": [{"lower": "ai"}]}
  33 | {"label": "GPE", "pattern": [{"lower": "antarctica"}]}
  34 | {"label": "GPE", "pattern": [{"lower": "antigua"}, {"lower": "and"}, {"lower": "barbuda"}]}
  35 | {"label": "GPE", "pattern": [{"lower": "ag"}]}
  36 | {"label": "GPE", "pattern": [{"lower": "argentina"}]}
  37 | {"label": "GPE", "pattern": [{"lower": "ar"}]}
  38 | {"label": "GPE", "pattern": [{"lower": "argentine"}, {"lower": "republic"}]}
  39 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "argentina"}]}
  40 | {"label": "GPE", "pattern": [{"lower": "armenia"}]}
  41 | {"label": "GPE", "pattern": [{"lower": "am"}]}
  42 | {"label": "GPE", "pattern": [{"lower": "hayastan"}]}
  43 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "armenia"}]}
  44 | {"label": "GPE", "pattern": [{"lower": "հայաստանի"}, {"lower": "հանրապետություն"}]}
  45 | {"label": "GPE", "pattern": [{"lower": "aruba"}]}
  46 | {"label": "GPE", "pattern": [{"lower": "aw"}]}
  47 | {"label": "GPE", "pattern": [{"lower": "australia"}]}
  48 | {"label": "GPE", "pattern": [{"lower": "au"}]}
  49 | {"label": "GPE", "pattern": [{"lower": "austria"}]}
  50 | {"label": "GPE", "pattern": [{"lower": "at"}]}
  51 | {"label": "GPE", "pattern": [{"lower": "österreich"}]}
  52 | {"label": "GPE", "pattern": [{"lower": "osterreich"}]}
  53 | {"label": "GPE", "pattern": [{"lower": "oesterreich"}]}
  54 | {"label": "GPE", "pattern": [{"lower": "azerbaijan"}]}
  55 | {"label": "GPE", "pattern": [{"lower": "az"}]}
  56 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "azerbaijan"}]}
  57 | {"label": "GPE", "pattern": [{"lower": "azərbaycan"}, {"lower": "respublikası"}]}
  58 | {"label": "GPE", "pattern": [{"lower": "bahamas"}]}
  59 | {"label": "GPE", "pattern": [{"lower": "bs"}]}
  60 | {"label": "GPE", "pattern": [{"lower": "commonwealth"}, {"lower": "of"}, {"lower": "the"}, {"lower": "bahamas"}]}
  61 | {"label": "GPE", "pattern": [{"lower": "bahrain"}]}
  62 | {"label": "GPE", "pattern": [{"lower": "bh"}]}
  63 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "bahrain"}]}
  64 | {"label": "GPE", "pattern": [{"lower": "mamlakat"}, {"lower": "al"}, {"lower": "-"}, {"lower": "baḥrayn"}]}
  65 | {"label": "GPE", "pattern": [{"lower": "bangladesh"}]}
  66 | {"label": "GPE", "pattern": [{"lower": "bd"}]}
  67 | {"label": "GPE", "pattern": [{"lower": "people"}, {"lower": "'s"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "bangladesh"}]}
  68 | {"label": "GPE", "pattern": [{"lower": "gônôprôjatôntri"}, {"lower": "bangladesh"}]}
  69 | {"label": "GPE", "pattern": [{"lower": "barbados"}]}
  70 | {"label": "GPE", "pattern": [{"lower": "bb"}]}
  71 | {"label": "GPE", "pattern": [{"lower": "belarus"}]}
  72 | {"label": "GPE", "pattern": [{"lower": "by"}]}
  73 | {"label": "GPE", "pattern": [{"lower": "bielaruś"}]}
  74 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "belarus"}]}
  75 | {"label": "GPE", "pattern": [{"lower": "белоруссия"}]}
  76 | {"label": "GPE", "pattern": [{"lower": "республика"}, {"lower": "беларусь"}]}
  77 | {"label": "GPE", "pattern": [{"lower": "belorussiya"}]}
  78 | {"label": "GPE", "pattern": [{"lower": "respublika"}, {"lower": "belarus’"}]}
  79 | {"label": "GPE", "pattern": [{"lower": "belgium"}]}
  80 | {"label": "GPE", "pattern": [{"lower": "be"}]}
  81 | {"label": "GPE", "pattern": [{"lower": "belgië"}]}
  82 | {"label": "GPE", "pattern": [{"lower": "belgie"}]}
  83 | {"label": "GPE", "pattern": [{"lower": "belgien"}]}
  84 | {"label": "GPE", "pattern": [{"lower": "belgique"}]}
  85 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "belgium"}]}
  86 | {"label": "GPE", "pattern": [{"lower": "koninkrijk"}, {"lower": "belgië"}]}
  87 | {"label": "GPE", "pattern": [{"lower": "royaume"}, {"lower": "de"}, {"lower": "belgique"}]}
  88 | {"label": "GPE", "pattern": [{"lower": "königreich"}, {"lower": "belgien"}]}
  89 | {"label": "GPE", "pattern": [{"lower": "belize"}]}
  90 | {"label": "GPE", "pattern": [{"lower": "bz"}]}
  91 | {"label": "GPE", "pattern": [{"lower": "benin"}]}
  92 | {"label": "GPE", "pattern": [{"lower": "bj"}]}
  93 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "benin"}]}
  94 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "du"}, {"lower": "bénin"}]}
  95 | {"label": "GPE", "pattern": [{"lower": "bermuda"}]}
  96 | {"label": "GPE", "pattern": [{"lower": "bm"}]}
  97 | {"label": "GPE", "pattern": [{"lower": "the"}, {"lower": "islands"}, {"lower": "of"}, {"lower": "bermuda"}]}
  98 | {"label": "GPE", "pattern": [{"lower": "the"}, {"lower": "bermudas"}]}
  99 | {"label": "GPE", "pattern": [{"lower": "somers"}, {"lower": "isles"}]}
 100 | {"label": "GPE", "pattern": [{"lower": "bhutan"}]}
 101 | {"label": "GPE", "pattern": [{"lower": "bt"}]}
 102 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "bhutan"}]}
 103 | {"label": "GPE", "pattern": [{"lower": "bolivia"}, {"lower": "("}, {"lower": "plurinational"}, {"lower": "state"}, {"lower": "of"}, {"lower": ")"}]}
 104 | {"label": "GPE", "pattern": [{"lower": "bo"}]}
 105 | {"label": "GPE", "pattern": [{"lower": "buliwya"}]}
 106 | {"label": "GPE", "pattern": [{"lower": "wuliwya"}]}
 107 | {"label": "GPE", "pattern": [{"lower": "plurinational"}, {"lower": "state"}, {"lower": "of"}, {"lower": "bolivia"}]}
 108 | {"label": "GPE", "pattern": [{"lower": "estado"}, {"lower": "plurinacional"}, {"lower": "de"}, {"lower": "bolivia"}]}
 109 | {"label": "GPE", "pattern": [{"lower": "buliwya"}, {"lower": "mamallaqta"}]}
 110 | {"label": "GPE", "pattern": [{"lower": "wuliwya"}, {"lower": "suyu"}]}
 111 | {"label": "GPE", "pattern": [{"lower": "tetã"}, {"lower": "volívia"}]}
 112 | {"label": "GPE", "pattern": [{"lower": "bonaire"}, {"lower": ","}, {"lower": "sint"}, {"lower": "eustatius"}, {"lower": "and"}, {"lower": "saba"}]}
 113 | {"label": "GPE", "pattern": [{"lower": "bq"}]}
 114 | {"label": "GPE", "pattern": [{"lower": "boneiru"}]}
 115 | {"label": "GPE", "pattern": [{"lower": "bosnia"}, {"lower": "and"}, {"lower": "herzegovina"}]}
 116 | {"label": "GPE", "pattern": [{"lower": "ba"}]}
 117 | {"label": "GPE", "pattern": [{"lower": "bosnia"}, {"lower": "-"}, {"lower": "herzegovina"}]}
 118 | {"label": "GPE", "pattern": [{"lower": "босна"}, {"lower": "и"}, {"lower": "херцеговина"}]}
 119 | {"label": "GPE", "pattern": [{"lower": "botswana"}]}
 120 | {"label": "GPE", "pattern": [{"lower": "bw"}]}
 121 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "botswana"}]}
 122 | {"label": "GPE", "pattern": [{"lower": "lefatshe"}, {"lower": "la"}, {"lower": "botswana"}]}
 123 | {"label": "GPE", "pattern": [{"lower": "bouvet"}, {"lower": "island"}]}
 124 | {"label": "GPE", "pattern": [{"lower": "bv"}]}
 125 | {"label": "GPE", "pattern": [{"lower": "bouvetøya"}]}
 126 | {"label": "GPE", "pattern": [{"lower": "bouvet"}, {"lower": "-"}, {"lower": "øya"}]}
 127 | {"label": "GPE", "pattern": [{"lower": "brazil"}]}
 128 | {"label": "GPE", "pattern": [{"lower": "br"}]}
 129 | {"label": "GPE", "pattern": [{"lower": "brasil"}]}
 130 | {"label": "GPE", "pattern": [{"lower": "federative"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "brazil"}]}
 131 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "federativa"}, {"lower": "do"}, {"lower": "brasil"}]}
 132 | {"label": "GPE", "pattern": [{"lower": "british"}, {"lower": "indian"}, {"lower": "ocean"}, {"lower": "territory"}]}
 133 | {"label": "GPE", "pattern": [{"lower": "io"}]}
 134 | {"label": "GPE", "pattern": [{"lower": "united"}, {"lower": "states"}, {"lower": "minor"}, {"lower": "outlying"}, {"lower": "islands"}]}
 135 | {"label": "GPE", "pattern": [{"lower": "um"}]}
 136 | {"label": "GPE", "pattern": [{"lower": "virgin"}, {"lower": "islands"}, {"lower": "("}, {"lower": "british"}, {"lower": ")"}]}
 137 | {"label": "GPE", "pattern": [{"lower": "vg"}]}
 138 | {"label": "GPE", "pattern": [{"lower": "virgin"}, {"lower": "islands"}, {"lower": "("}, {"lower": "u.s."}, {"lower": ")"}]}
 139 | {"label": "GPE", "pattern": [{"lower": "vi"}]}
 140 | {"label": "GPE", "pattern": [{"lower": "usvi"}]}
 141 | {"label": "GPE", "pattern": [{"lower": "american"}, {"lower": "virgin"}, {"lower": "islands"}]}
 142 | {"label": "GPE", "pattern": [{"lower": "u.s."}, {"lower": "virgin"}, {"lower": "islands"}]}
 143 | {"label": "GPE", "pattern": [{"lower": "brunei"}, {"lower": "darussalam"}]}
 144 | {"label": "GPE", "pattern": [{"lower": "bn"}]}
 145 | {"label": "GPE", "pattern": [{"lower": "nation"}, {"lower": "of"}, {"lower": "brunei"}]}
 146 | {"label": "GPE", "pattern": [{"lower": " "}, {"lower": "the"}, {"lower": "abode"}, {"lower": "of"}, {"lower": "peace"}]}
 147 | {"label": "GPE", "pattern": [{"lower": "bulgaria"}]}
 148 | {"label": "GPE", "pattern": [{"lower": "bg"}]}
 149 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "bulgaria"}]}
 150 | {"label": "GPE", "pattern": [{"lower": "република"}, {"lower": "българия"}]}
 151 | {"label": "GPE", "pattern": [{"lower": "burkina"}, {"lower": "faso"}]}
 152 | {"label": "GPE", "pattern": [{"lower": "bf"}]}
 153 | {"label": "GPE", "pattern": [{"lower": "burundi"}]}
 154 | {"label": "GPE", "pattern": [{"lower": "bi"}]}
 155 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "burundi"}]}
 156 | {"label": "GPE", "pattern": [{"lower": "republika"}, {"lower": "y'uburundi"}]}
 157 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "du"}, {"lower": "burundi"}]}
 158 | {"label": "GPE", "pattern": [{"lower": "cambodia"}]}
 159 | {"label": "GPE", "pattern": [{"lower": "kh"}]}
 160 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "cambodia"}]}
 161 | {"label": "GPE", "pattern": [{"lower": "cameroon"}]}
 162 | {"label": "GPE", "pattern": [{"lower": "cm"}]}
 163 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "cameroon"}]}
 164 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "du"}, {"lower": "cameroun"}]}
 165 | {"label": "GPE", "pattern": [{"lower": "canada"}]}
 166 | {"label": "GPE", "pattern": [{"lower": "ca"}]}
 167 | {"label": "GPE", "pattern": [{"lower": "cabo"}, {"lower": "verde"}]}
 168 | {"label": "GPE", "pattern": [{"lower": "cv"}]}
 169 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "cabo"}, {"lower": "verde"}]}
 170 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "cabo"}, {"lower": "verde"}]}
 171 | {"label": "GPE", "pattern": [{"lower": "cayman"}, {"lower": "islands"}]}
 172 | {"label": "GPE", "pattern": [{"lower": "ky"}]}
 173 | {"label": "GPE", "pattern": [{"lower": "central"}, {"lower": "african"}, {"lower": "republic"}]}
 174 | {"label": "GPE", "pattern": [{"lower": "cf"}]}
 175 | {"label": "GPE", "pattern": [{"lower": "central"}, {"lower": "african"}, {"lower": "republic"}]}
 176 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "centrafricaine"}]}
 177 | {"label": "GPE", "pattern": [{"lower": "chad"}]}
 178 | {"label": "GPE", "pattern": [{"lower": "td"}]}
 179 | {"label": "GPE", "pattern": [{"lower": "tchad"}]}
 180 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "chad"}]}
 181 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "du"}, {"lower": "tchad"}]}
 182 | {"label": "GPE", "pattern": [{"lower": "chile"}]}
 183 | {"label": "GPE", "pattern": [{"lower": "cl"}]}
 184 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "chile"}]}
 185 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "chile"}]}
 186 | {"label": "GPE", "pattern": [{"lower": "china"}]}
 187 | {"label": "GPE", "pattern": [{"lower": "cn"}]}
 188 | {"label": "GPE", "pattern": [{"lower": "zhōngguó"}]}
 189 | {"label": "GPE", "pattern": [{"lower": "zhongguo"}]}
 190 | {"label": "GPE", "pattern": [{"lower": "zhonghua"}]}
 191 | {"label": "GPE", "pattern": [{"lower": "people"}, {"lower": "'s"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "china"}]}
 192 | {"label": "GPE", "pattern": [{"lower": "中华人民共和国"}]}
 193 | {"label": "GPE", "pattern": [{"lower": "zhōnghuá"}, {"lower": "rénmín"}, {"lower": "gònghéguó"}]}
 194 | {"label": "GPE", "pattern": [{"lower": "christmas"}, {"lower": "island"}]}
 195 | {"label": "GPE", "pattern": [{"lower": "cx"}]}
 196 | {"label": "GPE", "pattern": [{"lower": "territory"}, {"lower": "of"}, {"lower": "christmas"}, {"lower": "island"}]}
 197 | {"label": "GPE", "pattern": [{"lower": "cocos"}, {"lower": "("}, {"lower": "keeling"}, {"lower": ")"}, {"lower": "islands"}]}
 198 | {"label": "GPE", "pattern": [{"lower": "cc"}]}
 199 | {"label": "GPE", "pattern": [{"lower": "territory"}, {"lower": "of"}, {"lower": "the"}, {"lower": "cocos"}, {"lower": "("}, {"lower": "keeling"}, {"lower": ")"}, {"lower": "islands"}]}
 200 | {"label": "GPE", "pattern": [{"lower": "keeling"}, {"lower": "islands"}]}
 201 | {"label": "GPE", "pattern": [{"lower": "colombia"}]}
 202 | {"label": "GPE", "pattern": [{"lower": "co"}]}
 203 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "colombia"}]}
 204 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "colombia"}]}
 205 | {"label": "GPE", "pattern": [{"lower": "comoros"}]}
 206 | {"label": "GPE", "pattern": [{"lower": "km"}]}
 207 | {"label": "GPE", "pattern": [{"lower": "union"}, {"lower": "of"}, {"lower": "the"}, {"lower": "comoros"}]}
 208 | {"label": "GPE", "pattern": [{"lower": "union"}, {"lower": "des"}, {"lower": "comores"}]}
 209 | {"label": "GPE", "pattern": [{"lower": "udzima"}, {"lower": "wa"}, {"lower": "komori"}]}
 210 | {"label": "GPE", "pattern": [{"lower": "al"}, {"lower": "-"}, {"lower": "ittiḥād"}, {"lower": "al"}, {"lower": "-"}, {"lower": "qumurī"}]}
 211 | {"label": "GPE", "pattern": [{"lower": "congo"}]}
 212 | {"label": "GPE", "pattern": [{"lower": "cg"}]}
 213 | {"label": "GPE", "pattern": [{"lower": "congo"}, {"lower": "-"}, {"lower": "brazzaville"}]}
 214 | {"label": "GPE", "pattern": [{"lower": "congo"}, {"lower": "("}, {"lower": "democratic"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "the"}, {"lower": ")"}]}
 215 | {"label": "GPE", "pattern": [{"lower": "cd"}]}
 216 | {"label": "GPE", "pattern": [{"lower": "dr"}, {"lower": "congo"}]}
 217 | {"label": "GPE", "pattern": [{"lower": "congo"}, {"lower": "-"}, {"lower": "kinshasa"}]}
 218 | {"label": "GPE", "pattern": [{"lower": "drc"}]}
 219 | {"label": "GPE", "pattern": [{"lower": "cook"}, {"lower": "islands"}]}
 220 | {"label": "GPE", "pattern": [{"lower": "ck"}]}
 221 | {"label": "GPE", "pattern": [{"lower": "kūki"}, {"lower": "'"}, {"lower": "āirani"}]}
 222 | {"label": "GPE", "pattern": [{"lower": "costa"}, {"lower": "rica"}]}
 223 | {"label": "GPE", "pattern": [{"lower": "cr"}]}
 224 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "costa"}, {"lower": "rica"}]}
 225 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "costa"}, {"lower": "rica"}]}
 226 | {"label": "GPE", "pattern": [{"lower": "croatia"}]}
 227 | {"label": "GPE", "pattern": [{"lower": "hr"}]}
 228 | {"label": "GPE", "pattern": [{"lower": "hrvatska"}]}
 229 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "croatia"}]}
 230 | {"label": "GPE", "pattern": [{"lower": "republika"}, {"lower": "hrvatska"}]}
 231 | {"label": "GPE", "pattern": [{"lower": "cuba"}]}
 232 | {"label": "GPE", "pattern": [{"lower": "cu"}]}
 233 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "cuba"}]}
 234 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "cuba"}]}
 235 | {"label": "GPE", "pattern": [{"lower": "curaçao"}]}
 236 | {"label": "GPE", "pattern": [{"lower": "cw"}]}
 237 | {"label": "GPE", "pattern": [{"lower": "curacao"}]}
 238 | {"label": "GPE", "pattern": [{"lower": "kòrsou"}]}
 239 | {"label": "GPE", "pattern": [{"lower": "country"}, {"lower": "of"}, {"lower": "curaçao"}]}
 240 | {"label": "GPE", "pattern": [{"lower": "land"}, {"lower": "curaçao"}]}
 241 | {"label": "GPE", "pattern": [{"lower": "pais"}, {"lower": "kòrsou"}]}
 242 | {"label": "GPE", "pattern": [{"lower": "cyprus"}]}
 243 | {"label": "GPE", "pattern": [{"lower": "cy"}]}
 244 | {"label": "GPE", "pattern": [{"lower": "kýpros"}]}
 245 | {"label": "GPE", "pattern": [{"lower": "kıbrıs"}]}
 246 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "cyprus"}]}
 247 | {"label": "GPE", "pattern": [{"lower": "κυπριακή"}, {"lower": "δημοκρατία"}]}
 248 | {"label": "GPE", "pattern": [{"lower": "kıbrıs"}, {"lower": "cumhuriyeti"}]}
 249 | {"label": "GPE", "pattern": [{"lower": "czech"}, {"lower": "republic"}]}
 250 | {"label": "GPE", "pattern": [{"lower": "cz"}]}
 251 | {"label": "GPE", "pattern": [{"lower": "česká"}, {"lower": "republika"}]}
 252 | {"label": "GPE", "pattern": [{"lower": "česko"}]}
 253 | {"label": "GPE", "pattern": [{"lower": "denmark"}]}
 254 | {"label": "GPE", "pattern": [{"lower": "dk"}]}
 255 | {"label": "GPE", "pattern": [{"lower": "danmark"}]}
 256 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "denmark"}]}
 257 | {"label": "GPE", "pattern": [{"lower": "kongeriget"}, {"lower": "danmark"}]}
 258 | {"label": "GPE", "pattern": [{"lower": "djibouti"}]}
 259 | {"label": "GPE", "pattern": [{"lower": "dj"}]}
 260 | {"label": "GPE", "pattern": [{"lower": "jabuuti"}]}
 261 | {"label": "GPE", "pattern": [{"lower": "gabuuti"}]}
 262 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "djibouti"}]}
 263 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "de"}, {"lower": "djibouti"}]}
 264 | {"label": "GPE", "pattern": [{"lower": "gabuutih"}, {"lower": "ummuuno"}]}
 265 | {"label": "GPE", "pattern": [{"lower": "jamhuuriyadda"}, {"lower": "jabuuti"}]}
 266 | {"label": "GPE", "pattern": [{"lower": "dominica"}]}
 267 | {"label": "GPE", "pattern": [{"lower": "dm"}]}
 268 | {"label": "GPE", "pattern": [{"lower": "dominique"}]}
 269 | {"label": "GPE", "pattern": [{"lower": "wai‘tu"}, {"lower": "kubuli"}]}
 270 | {"label": "GPE", "pattern": [{"lower": "commonwealth"}, {"lower": "of"}, {"lower": "dominica"}]}
 271 | {"label": "GPE", "pattern": [{"lower": "dominican"}, {"lower": "republic"}]}
 272 | {"label": "GPE", "pattern": [{"lower": "do"}]}
 273 | {"label": "GPE", "pattern": [{"lower": "ecuador"}]}
 274 | {"label": "GPE", "pattern": [{"lower": "ec"}]}
 275 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "ecuador"}]}
 276 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "del"}, {"lower": "ecuador"}]}
 277 | {"label": "GPE", "pattern": [{"lower": "egypt"}]}
 278 | {"label": "GPE", "pattern": [{"lower": "eg"}]}
 279 | {"label": "GPE", "pattern": [{"lower": "arab"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "egypt"}]}
 280 | {"label": "GPE", "pattern": [{"lower": "el"}, {"lower": "salvador"}]}
 281 | {"label": "GPE", "pattern": [{"lower": "sv"}]}
 282 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "el"}, {"lower": "salvador"}]}
 283 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "el"}, {"lower": "salvador"}]}
 284 | {"label": "GPE", "pattern": [{"lower": "equatorial"}, {"lower": "guinea"}]}
 285 | {"label": "GPE", "pattern": [{"lower": "gq"}]}
 286 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "equatorial"}, {"lower": "guinea"}]}
 287 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "guinea"}, {"lower": "ecuatorial"}]}
 288 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "de"}, {"lower": "guinée"}, {"lower": "équatoriale"}]}
 289 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "da"}, {"lower": "guiné"}, {"lower": "equatorial"}]}
 290 | {"label": "GPE", "pattern": [{"lower": "eritrea"}]}
 291 | {"label": "GPE", "pattern": [{"lower": "er"}]}
 292 | {"label": "GPE", "pattern": [{"lower": "state"}, {"lower": "of"}, {"lower": "eritrea"}]}
 293 | {"label": "GPE", "pattern": [{"lower": "ሃገረ"}, {"lower": "ኤርትራ"}]}
 294 | {"label": "GPE", "pattern": [{"lower": "dawlat"}, {"lower": "iritriyá"}]}
 295 | {"label": "GPE", "pattern": [{"lower": "ʾertrā"}]}
 296 | {"label": "GPE", "pattern": [{"lower": "iritriyā"}]}
 297 | {"label": "GPE", "pattern": [{"lower": "estonia"}]}
 298 | {"label": "GPE", "pattern": [{"lower": "ee"}]}
 299 | {"label": "GPE", "pattern": [{"lower": "eesti"}]}
 300 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "estonia"}]}
 301 | {"label": "GPE", "pattern": [{"lower": "eesti"}, {"lower": "vabariik"}]}
 302 | {"label": "GPE", "pattern": [{"lower": "ethiopia"}]}
 303 | {"label": "GPE", "pattern": [{"lower": "et"}]}
 304 | {"label": "GPE", "pattern": [{"lower": "ʾītyōṗṗyā"}]}
 305 | {"label": "GPE", "pattern": [{"lower": "federal"}, {"lower": "democratic"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "ethiopia"}]}
 306 | {"label": "GPE", "pattern": [{"lower": "የኢትዮጵያ"}, {"lower": "ፌዴራላዊ"}, {"lower": "ዲሞክራሲያዊ"}, {"lower": "ሪፐብሊክ"}]}
 307 | {"label": "GPE", "pattern": [{"lower": "falkland"}, {"lower": "islands"}, {"lower": "("}, {"lower": "malvinas"}, {"lower": ")"}]}
 308 | {"label": "GPE", "pattern": [{"lower": "fk"}]}
 309 | {"label": "GPE", "pattern": [{"lower": "islas"}, {"lower": "malvinas"}]}
 310 | {"label": "GPE", "pattern": [{"lower": "faroe"}, {"lower": "islands"}]}
 311 | {"label": "GPE", "pattern": [{"lower": "fo"}]}
 312 | {"label": "GPE", "pattern": [{"lower": "føroyar"}]}
 313 | {"label": "GPE", "pattern": [{"lower": "færøerne"}]}
 314 | {"label": "GPE", "pattern": [{"lower": "fiji"}]}
 315 | {"label": "GPE", "pattern": [{"lower": "fj"}]}
 316 | {"label": "GPE", "pattern": [{"lower": "viti"}]}
 317 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "fiji"}]}
 318 | {"label": "GPE", "pattern": [{"lower": "matanitu"}, {"lower": "ko"}, {"lower": "viti"}]}
 319 | {"label": "GPE", "pattern": [{"lower": "fijī"}, {"lower": "gaṇarājya"}]}
 320 | {"label": "GPE", "pattern": [{"lower": "finland"}]}
 321 | {"label": "GPE", "pattern": [{"lower": "fi"}]}
 322 | {"label": "GPE", "pattern": [{"lower": "suomi"}]}
 323 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "finland"}]}
 324 | {"label": "GPE", "pattern": [{"lower": "suomen"}, {"lower": "tasavalta"}]}
 325 | {"label": "GPE", "pattern": [{"lower": "republiken"}, {"lower": "finland"}]}
 326 | {"label": "GPE", "pattern": [{"lower": "france"}]}
 327 | {"label": "GPE", "pattern": [{"lower": "fr"}]}
 328 | {"label": "GPE", "pattern": [{"lower": "french"}, {"lower": "republic"}]}
 329 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "française"}]}
 330 | {"label": "GPE", "pattern": [{"lower": "french"}, {"lower": "guiana"}]}
 331 | {"label": "GPE", "pattern": [{"lower": "gf"}]}
 332 | {"label": "GPE", "pattern": [{"lower": "guiana"}]}
 333 | {"label": "GPE", "pattern": [{"lower": "guyane"}]}
 334 | {"label": "GPE", "pattern": [{"lower": "french"}, {"lower": "polynesia"}]}
 335 | {"label": "GPE", "pattern": [{"lower": "pf"}]}
 336 | {"label": "GPE", "pattern": [{"lower": "polynésie"}, {"lower": "française"}]}
 337 | {"label": "GPE", "pattern": [{"lower": "french"}, {"lower": "polynesia"}]}
 338 | {"label": "GPE", "pattern": [{"lower": "pōrīnetia"}, {"lower": "farāni"}]}
 339 | {"label": "GPE", "pattern": [{"lower": "french"}, {"lower": "southern"}, {"lower": "territories"}]}
 340 | {"label": "GPE", "pattern": [{"lower": "tf"}]}
 341 | {"label": "GPE", "pattern": [{"lower": "gabon"}]}
 342 | {"label": "GPE", "pattern": [{"lower": "ga"}]}
 343 | {"label": "GPE", "pattern": [{"lower": "gabonese"}, {"lower": "republic"}]}
 344 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "gabonaise"}]}
 345 | {"label": "GPE", "pattern": [{"lower": "gambia"}]}
 346 | {"label": "GPE", "pattern": [{"lower": "gm"}]}
 347 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "the"}, {"lower": "gambia"}]}
 348 | {"label": "GPE", "pattern": [{"lower": "georgia"}]}
 349 | {"label": "GPE", "pattern": [{"lower": "ge"}]}
 350 | {"label": "GPE", "pattern": [{"lower": "sakartvelo"}]}
 351 | {"label": "GPE", "pattern": [{"lower": "germany"}]}
 352 | {"label": "GPE", "pattern": [{"lower": "de"}]}
 353 | {"label": "GPE", "pattern": [{"lower": "federal"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "germany"}]}
 354 | {"label": "GPE", "pattern": [{"lower": "bundesrepublik"}, {"lower": "deutschland"}]}
 355 | {"label": "GPE", "pattern": [{"lower": "ghana"}]}
 356 | {"label": "GPE", "pattern": [{"lower": "gh"}]}
 357 | {"label": "GPE", "pattern": [{"lower": "gibraltar"}]}
 358 | {"label": "GPE", "pattern": [{"lower": "gi"}]}
 359 | {"label": "GPE", "pattern": [{"lower": "greece"}]}
 360 | {"label": "GPE", "pattern": [{"lower": "gr"}]}
 361 | {"label": "GPE", "pattern": [{"lower": "elláda"}]}
 362 | {"label": "GPE", "pattern": [{"lower": "hellenic"}, {"lower": "republic"}]}
 363 | {"label": "GPE", "pattern": [{"lower": "ελληνική"}, {"lower": "δημοκρατία"}]}
 364 | {"label": "GPE", "pattern": [{"lower": "greenland"}]}
 365 | {"label": "GPE", "pattern": [{"lower": "gl"}]}
 366 | {"label": "GPE", "pattern": [{"lower": "grønland"}]}
 367 | {"label": "GPE", "pattern": [{"lower": "grenada"}]}
 368 | {"label": "GPE", "pattern": [{"lower": "gd"}]}
 369 | {"label": "GPE", "pattern": [{"lower": "guadeloupe"}]}
 370 | {"label": "GPE", "pattern": [{"lower": "gp"}]}
 371 | {"label": "GPE", "pattern": [{"lower": "gwadloup"}]}
 372 | {"label": "GPE", "pattern": [{"lower": "guam"}]}
 373 | {"label": "GPE", "pattern": [{"lower": "gu"}]}
 374 | {"label": "GPE", "pattern": [{"lower": "guåhån"}]}
 375 | {"label": "GPE", "pattern": [{"lower": "guatemala"}]}
 376 | {"label": "GPE", "pattern": [{"lower": "gt"}]}
 377 | {"label": "GPE", "pattern": [{"lower": "guernsey"}]}
 378 | {"label": "GPE", "pattern": [{"lower": "gg"}]}
 379 | {"label": "GPE", "pattern": [{"lower": "bailiwick"}, {"lower": "of"}, {"lower": "guernsey"}]}
 380 | {"label": "GPE", "pattern": [{"lower": "bailliage"}, {"lower": "de"}, {"lower": "guernesey"}]}
 381 | {"label": "GPE", "pattern": [{"lower": "guinea"}]}
 382 | {"label": "GPE", "pattern": [{"lower": "gn"}]}
 383 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "guinea"}]}
 384 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "de"}, {"lower": "guinée"}]}
 385 | {"label": "GPE", "pattern": [{"lower": "guinea"}, {"lower": "-"}, {"lower": "bissau"}]}
 386 | {"label": "GPE", "pattern": [{"lower": "gw"}]}
 387 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "guinea"}, {"lower": "-"}, {"lower": "bissau"}]}
 388 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "da"}, {"lower": "guiné"}, {"lower": "-"}, {"lower": "bissau"}]}
 389 | {"label": "GPE", "pattern": [{"lower": "guyana"}]}
 390 | {"label": "GPE", "pattern": [{"lower": "gy"}]}
 391 | {"label": "GPE", "pattern": [{"lower": "co"}, {"lower": "-"}, {"lower": "operative"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "guyana"}]}
 392 | {"label": "GPE", "pattern": [{"lower": "haiti"}]}
 393 | {"label": "GPE", "pattern": [{"lower": "ht"}]}
 394 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "haiti"}]}
 395 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "d'haïti"}]}
 396 | {"label": "GPE", "pattern": [{"lower": "repiblik"}, {"lower": "ayiti"}]}
 397 | {"label": "GPE", "pattern": [{"lower": "heard"}, {"lower": "island"}, {"lower": "and"}, {"lower": "mcdonald"}, {"lower": "islands"}]}
 398 | {"label": "GPE", "pattern": [{"lower": "hm"}]}
 399 | {"label": "GPE", "pattern": [{"lower": "holy"}, {"lower": "see"}]}
 400 | {"label": "GPE", "pattern": [{"lower": "sancta"}, {"lower": "sedes"}]}
 401 | {"label": "GPE", "pattern": [{"lower": "vatican"}]}
 402 | {"label": "GPE", "pattern": [{"lower": "the"}, {"lower": "vatican"}]}
 403 | {"label": "GPE", "pattern": [{"lower": "honduras"}]}
 404 | {"label": "GPE", "pattern": [{"lower": "hn"}]}
 405 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "honduras"}]}
 406 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "honduras"}]}
 407 | {"label": "GPE", "pattern": [{"lower": "hong"}, {"lower": "kong"}]}
 408 | {"label": "GPE", "pattern": [{"lower": "hk"}]}
 409 | {"label": "GPE", "pattern": [{"lower": "香港"}]}
 410 | {"label": "GPE", "pattern": [{"lower": "hungary"}]}
 411 | {"label": "GPE", "pattern": [{"lower": "hu"}]}
 412 | {"label": "GPE", "pattern": [{"lower": "iceland"}]}
 413 | {"label": "GPE", "pattern": [{"lower": "is"}]}
 414 | {"label": "GPE", "pattern": [{"lower": "island"}]}
 415 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "iceland"}]}
 416 | {"label": "GPE", "pattern": [{"lower": "lýðveldið"}, {"lower": "ísland"}]}
 417 | {"label": "GPE", "pattern": [{"lower": "india"}]}
 418 | {"label": "GPE", "pattern": [{"lower": "in"}]}
 419 | {"label": "GPE", "pattern": [{"lower": "bhārat"}]}
 420 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "india"}]}
 421 | {"label": "GPE", "pattern": [{"lower": "bharat"}, {"lower": "ganrajya"}]}
 422 | {"label": "GPE", "pattern": [{"lower": "indonesia"}]}
 423 | {"label": "GPE", "pattern": [{"lower": "id"}]}
 424 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "indonesia"}]}
 425 | {"label": "GPE", "pattern": [{"lower": "republik"}, {"lower": "indonesia"}]}
 426 | {"label": "GPE", "pattern": [{"lower": "côte"}, {"lower": "d'ivoire"}]}
 427 | {"label": "GPE", "pattern": [{"lower": "ci"}]}
 428 | {"label": "GPE", "pattern": [{"lower": "ivory"}, {"lower": "coast"}]}
 429 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "côte"}, {"lower": "d'ivoire"}]}
 430 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "de"}, {"lower": "côte"}, {"lower": "d'ivoire"}]}
 431 | {"label": "GPE", "pattern": [{"lower": "iran"}, {"lower": "("}, {"lower": "islamic"}, {"lower": "republic"}, {"lower": "of"}, {"lower": ")"}]}
 432 | {"label": "GPE", "pattern": [{"lower": "ir"}]}
 433 | {"label": "GPE", "pattern": [{"lower": "islamic"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "iran"}]}
 434 | {"label": "GPE", "pattern": [{"lower": "jomhuri"}, {"lower": "-"}, {"lower": "ye"}, {"lower": "eslāmi"}, {"lower": "-"}, {"lower": "ye"}, {"lower": "irān"}]}
 435 | {"label": "GPE", "pattern": [{"lower": "iraq"}]}
 436 | {"label": "GPE", "pattern": [{"lower": "iq"}]}
 437 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "iraq"}]}
 438 | {"label": "GPE", "pattern": [{"lower": "jumhūriyyat"}, {"lower": "al-‘irāq"}]}
 439 | {"label": "GPE", "pattern": [{"lower": "ireland"}]}
 440 | {"label": "GPE", "pattern": [{"lower": "ie"}]}
 441 | {"label": "GPE", "pattern": [{"lower": "éire"}]}
 442 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "ireland"}]}
 443 | {"label": "GPE", "pattern": [{"lower": "poblacht"}, {"lower": "na"}, {"lower": "héireann"}]}
 444 | {"label": "GPE", "pattern": [{"lower": "isle"}, {"lower": "of"}, {"lower": "man"}]}
 445 | {"label": "GPE", "pattern": [{"lower": "im"}]}
 446 | {"label": "GPE", "pattern": [{"lower": "ellan"}, {"lower": "vannin"}]}
 447 | {"label": "GPE", "pattern": [{"lower": "mann"}]}
 448 | {"label": "GPE", "pattern": [{"lower": "mannin"}]}
 449 | {"label": "GPE", "pattern": [{"lower": "israel"}]}
 450 | {"label": "GPE", "pattern": [{"lower": "il"}]}
 451 | {"label": "GPE", "pattern": [{"lower": "state"}, {"lower": "of"}, {"lower": "israel"}]}
 452 | {"label": "GPE", "pattern": [{"lower": "medīnat"}, {"lower": "yisrā'el"}]}
 453 | {"label": "GPE", "pattern": [{"lower": "italy"}]}
 454 | {"label": "GPE", "pattern": [{"lower": "it"}]}
 455 | {"label": "GPE", "pattern": [{"lower": "italian"}, {"lower": "republic"}]}
 456 | {"label": "GPE", "pattern": [{"lower": "repubblica"}, {"lower": "italiana"}]}
 457 | {"label": "GPE", "pattern": [{"lower": "jamaica"}]}
 458 | {"label": "GPE", "pattern": [{"lower": "jm"}]}
 459 | {"label": "GPE", "pattern": [{"lower": "japan"}]}
 460 | {"label": "GPE", "pattern": [{"lower": "jp"}]}
 461 | {"label": "GPE", "pattern": [{"lower": "nippon"}]}
 462 | {"label": "GPE", "pattern": [{"lower": "nihon"}]}
 463 | {"label": "GPE", "pattern": [{"lower": "jersey"}]}
 464 | {"label": "GPE", "pattern": [{"lower": "je"}]}
 465 | {"label": "GPE", "pattern": [{"lower": "bailiwick"}, {"lower": "of"}, {"lower": "jersey"}]}
 466 | {"label": "GPE", "pattern": [{"lower": "bailliage"}, {"lower": "de"}, {"lower": "jersey"}]}
 467 | {"label": "GPE", "pattern": [{"lower": "bailliage"}, {"lower": "dé"}, {"lower": "jèrri"}]}
 468 | {"label": "GPE", "pattern": [{"lower": "jordan"}]}
 469 | {"label": "GPE", "pattern": [{"lower": "jo"}]}
 470 | {"label": "GPE", "pattern": [{"lower": "hashemite"}, {"lower": "kingdom"}, {"lower": "of"}, {"lower": "jordan"}]}
 471 | {"label": "GPE", "pattern": [{"lower": "al"}, {"lower": "-"}, {"lower": "mamlakah"}, {"lower": "al"}, {"lower": "-"}, {"lower": "urdunīyah"}, {"lower": "al"}, {"lower": "-"}, {"lower": "hāshimīyah"}]}
 472 | {"label": "GPE", "pattern": [{"lower": "kazakhstan"}]}
 473 | {"label": "GPE", "pattern": [{"lower": "kz"}]}
 474 | {"label": "GPE", "pattern": [{"lower": "qazaqstan"}]}
 475 | {"label": "GPE", "pattern": [{"lower": "казахстан"}]}
 476 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "kazakhstan"}]}
 477 | {"label": "GPE", "pattern": [{"lower": "қазақстан"}, {"lower": "республикасы"}]}
 478 | {"label": "GPE", "pattern": [{"lower": "qazaqstan"}, {"lower": "respublïkası"}]}
 479 | {"label": "GPE", "pattern": [{"lower": "республика"}, {"lower": "казахстан"}]}
 480 | {"label": "GPE", "pattern": [{"lower": "respublika"}, {"lower": "kazakhstan"}]}
 481 | {"label": "GPE", "pattern": [{"lower": "kenya"}]}
 482 | {"label": "GPE", "pattern": [{"lower": "ke"}]}
 483 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "kenya"}]}
 484 | {"label": "GPE", "pattern": [{"lower": "jamhuri"}, {"lower": "ya"}, {"lower": "kenya"}]}
 485 | {"label": "GPE", "pattern": [{"lower": "kiribati"}]}
 486 | {"label": "GPE", "pattern": [{"lower": "ki"}]}
 487 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "kiribati"}]}
 488 | {"label": "GPE", "pattern": [{"lower": "ribaberiki"}, {"lower": "kiribati"}]}
 489 | {"label": "GPE", "pattern": [{"lower": "kuwait"}]}
 490 | {"label": "GPE", "pattern": [{"lower": "kw"}]}
 491 | {"label": "GPE", "pattern": [{"lower": "state"}, {"lower": "of"}, {"lower": "kuwait"}]}
 492 | {"label": "GPE", "pattern": [{"lower": "dawlat"}, {"lower": "al"}, {"lower": "-"}, {"lower": "kuwait"}]}
 493 | {"label": "GPE", "pattern": [{"lower": "kyrgyzstan"}]}
 494 | {"label": "GPE", "pattern": [{"lower": "kg"}]}
 495 | {"label": "GPE", "pattern": [{"lower": "киргизия"}]}
 496 | {"label": "GPE", "pattern": [{"lower": "kyrgyz"}, {"lower": "republic"}]}
 497 | {"label": "GPE", "pattern": [{"lower": "кыргыз"}, {"lower": "республикасы"}]}
 498 | {"label": "GPE", "pattern": [{"lower": "kyrgyz"}, {"lower": "respublikasy"}]}
 499 | {"label": "GPE", "pattern": [{"lower": "lao"}, {"lower": "people"}, {"lower": "'s"}, {"lower": "democratic"}, {"lower": "republic"}]}
 500 | {"label": "GPE", "pattern": [{"lower": "la"}]}
 501 | {"label": "GPE", "pattern": [{"lower": "lao"}]}
 502 | {"label": "GPE", "pattern": [{"lower": "laos"}]}
 503 | {"label": "GPE", "pattern": [{"lower": "lao"}, {"lower": "people"}, {"lower": "'s"}, {"lower": "democratic"}, {"lower": "republic"}]}
 504 | {"label": "GPE", "pattern": [{"lower": "sathalanalat"}, {"lower": "paxathipatai"}, {"lower": "paxaxon"}, {"lower": "lao"}]}
 505 | {"label": "GPE", "pattern": [{"lower": "latvia"}]}
 506 | {"label": "GPE", "pattern": [{"lower": "lv"}]}
 507 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "latvia"}]}
 508 | {"label": "GPE", "pattern": [{"lower": "latvijas"}, {"lower": "republika"}]}
 509 | {"label": "GPE", "pattern": [{"lower": "lebanon"}]}
 510 | {"label": "GPE", "pattern": [{"lower": "lb"}]}
 511 | {"label": "GPE", "pattern": [{"lower": "lebanese"}, {"lower": "republic"}]}
 512 | {"label": "GPE", "pattern": [{"lower": "al"}, {"lower": "-"}, {"lower": "jumhūrīyah"}, {"lower": "al"}, {"lower": "-"}, {"lower": "libnānīyah"}]}
 513 | {"label": "GPE", "pattern": [{"lower": "lesotho"}]}
 514 | {"label": "GPE", "pattern": [{"lower": "ls"}]}
 515 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "lesotho"}]}
 516 | {"label": "GPE", "pattern": [{"lower": "muso"}, {"lower": "oa"}, {"lower": "lesotho"}]}
 517 | {"label": "GPE", "pattern": [{"lower": "liberia"}]}
 518 | {"label": "GPE", "pattern": [{"lower": "lr"}]}
 519 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "liberia"}]}
 520 | {"label": "GPE", "pattern": [{"lower": "libya"}]}
 521 | {"label": "GPE", "pattern": [{"lower": "ly"}]}
 522 | {"label": "GPE", "pattern": [{"lower": "state"}, {"lower": "of"}, {"lower": "libya"}]}
 523 | {"label": "GPE", "pattern": [{"lower": "dawlat"}, {"lower": "libya"}]}
 524 | {"label": "GPE", "pattern": [{"lower": "liechtenstein"}]}
 525 | {"label": "GPE", "pattern": [{"lower": "li"}]}
 526 | {"label": "GPE", "pattern": [{"lower": "principality"}, {"lower": "of"}, {"lower": "liechtenstein"}]}
 527 | {"label": "GPE", "pattern": [{"lower": "fürstentum"}, {"lower": "liechtenstein"}]}
 528 | {"label": "GPE", "pattern": [{"lower": "lithuania"}]}
 529 | {"label": "GPE", "pattern": [{"lower": "lt"}]}
 530 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "lithuania"}]}
 531 | {"label": "GPE", "pattern": [{"lower": "lietuvos"}, {"lower": "respublika"}]}
 532 | {"label": "GPE", "pattern": [{"lower": "luxembourg"}]}
 533 | {"label": "GPE", "pattern": [{"lower": "lu"}]}
 534 | {"label": "GPE", "pattern": [{"lower": "grand"}, {"lower": "duchy"}, {"lower": "of"}, {"lower": "luxembourg"}]}
 535 | {"label": "GPE", "pattern": [{"lower": "grand"}, {"lower": "-"}, {"lower": "duché"}, {"lower": "de"}, {"lower": "luxembourg"}]}
 536 | {"label": "GPE", "pattern": [{"lower": "großherzogtum"}, {"lower": "luxemburg"}]}
 537 | {"label": "GPE", "pattern": [{"lower": "groussherzogtum"}, {"lower": "lëtzebuerg"}]}
 538 | {"label": "GPE", "pattern": [{"lower": "macao"}]}
 539 | {"label": "GPE", "pattern": [{"lower": "mo"}]}
 540 | {"label": "GPE", "pattern": [{"lower": "澳门"}]}
 541 | {"label": "GPE", "pattern": [{"lower": "macao"}, {"lower": "special"}, {"lower": "administrative"}, {"lower": "region"}, {"lower": "of"}, {"lower": "the"}, {"lower": "people"}, {"lower": "'s"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "china"}]}
 542 | {"label": "GPE", "pattern": [{"lower": "中華人民共和國澳門特別行政區"}]}
 543 | {"label": "GPE", "pattern": [{"lower": "região"}, {"lower": "administrativa"}, {"lower": "especial"}, {"lower": "de"}, {"lower": "macau"}, {"lower": "da"}, {"lower": "república"}, {"lower": "popular"}, {"lower": "da"}, {"lower": "china"}]}
 544 | {"label": "GPE", "pattern": [{"lower": "macedonia"}, {"lower": "("}, {"lower": "the"}, {"lower": "former"}, {"lower": "yugoslav"}, {"lower": "republic"}, {"lower": "of"}, {"lower": ")"}]}
 545 | {"label": "GPE", "pattern": [{"lower": "mk"}]}
 546 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "macedonia"}]}
 547 | {"label": "GPE", "pattern": [{"lower": "република"}, {"lower": "македонија"}]}
 548 | {"label": "GPE", "pattern": [{"lower": "madagascar"}]}
 549 | {"label": "GPE", "pattern": [{"lower": "mg"}]}
 550 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "madagascar"}]}
 551 | {"label": "GPE", "pattern": [{"lower": "repoblikan'i"}, {"lower": "madagasikara"}]}
 552 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "de"}, {"lower": "madagascar"}]}
 553 | {"label": "GPE", "pattern": [{"lower": "malawi"}]}
 554 | {"label": "GPE", "pattern": [{"lower": "mw"}]}
 555 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "malawi"}]}
 556 | {"label": "GPE", "pattern": [{"lower": "malaysia"}]}
 557 | {"label": "GPE", "pattern": [{"lower": "my"}]}
 558 | {"label": "GPE", "pattern": [{"lower": "maldives"}]}
 559 | {"label": "GPE", "pattern": [{"lower": "mv"}]}
 560 | {"label": "GPE", "pattern": [{"lower": "maldive"}, {"lower": "islands"}]}
 561 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "the"}, {"lower": "maldives"}]}
 562 | {"label": "GPE", "pattern": [{"lower": "dhivehi"}, {"lower": "raajjeyge"}, {"lower": "jumhooriyya"}]}
 563 | {"label": "GPE", "pattern": [{"lower": "mali"}]}
 564 | {"label": "GPE", "pattern": [{"lower": "ml"}]}
 565 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "mali"}]}
 566 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "du"}, {"lower": "mali"}]}
 567 | {"label": "GPE", "pattern": [{"lower": "malta"}]}
 568 | {"label": "GPE", "pattern": [{"lower": "mt"}]}
 569 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "malta"}]}
 570 | {"label": "GPE", "pattern": [{"lower": "repubblika"}, {"lower": "ta"}, {"lower": "'"}, {"lower": "malta"}]}
 571 | {"label": "GPE", "pattern": [{"lower": "marshall"}, {"lower": "islands"}]}
 572 | {"label": "GPE", "pattern": [{"lower": "mh"}]}
 573 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "the"}, {"lower": "marshall"}, {"lower": "islands"}]}
 574 | {"label": "GPE", "pattern": [{"lower": "aolepān"}, {"lower": "aorōkin"}, {"lower": "m̧ajeļ"}]}
 575 | {"label": "GPE", "pattern": [{"lower": "martinique"}]}
 576 | {"label": "GPE", "pattern": [{"lower": "mq"}]}
 577 | {"label": "GPE", "pattern": [{"lower": "mauritania"}]}
 578 | {"label": "GPE", "pattern": [{"lower": "mr"}]}
 579 | {"label": "GPE", "pattern": [{"lower": "islamic"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "mauritania"}]}
 580 | {"label": "GPE", "pattern": [{"lower": "al"}, {"lower": "-"}, {"lower": "jumhūriyyah"}, {"lower": "al-ʾislāmiyyah"}, {"lower": "al"}, {"lower": "-"}, {"lower": "mūrītāniyyah"}]}
 581 | {"label": "GPE", "pattern": [{"lower": "mauritius"}]}
 582 | {"label": "GPE", "pattern": [{"lower": "mu"}]}
 583 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "mauritius"}]}
 584 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "de"}, {"lower": "maurice"}]}
 585 | {"label": "GPE", "pattern": [{"lower": "mayotte"}]}
 586 | {"label": "GPE", "pattern": [{"lower": "yt"}]}
 587 | {"label": "GPE", "pattern": [{"lower": "department"}, {"lower": "of"}, {"lower": "mayotte"}]}
 588 | {"label": "GPE", "pattern": [{"lower": "département"}, {"lower": "de"}, {"lower": "mayotte"}]}
 589 | {"label": "GPE", "pattern": [{"lower": "mexico"}]}
 590 | {"label": "GPE", "pattern": [{"lower": "mx"}]}
 591 | {"label": "GPE", "pattern": [{"lower": "mexicanos"}]}
 592 | {"label": "GPE", "pattern": [{"lower": "united"}, {"lower": "mexican"}, {"lower": "states"}]}
 593 | {"label": "GPE", "pattern": [{"lower": "estados"}, {"lower": "unidos"}, {"lower": "mexicanos"}]}
 594 | {"label": "GPE", "pattern": [{"lower": "micronesia"}, {"lower": "("}, {"lower": "federated"}, {"lower": "states"}, {"lower": "of"}, {"lower": ")"}]}
 595 | {"label": "GPE", "pattern": [{"lower": "fm"}]}
 596 | {"label": "GPE", "pattern": [{"lower": "federated"}, {"lower": "states"}, {"lower": "of"}, {"lower": "micronesia"}]}
 597 | {"label": "GPE", "pattern": [{"lower": "moldova"}, {"lower": "("}, {"lower": "republic"}, {"lower": "of"}, {"lower": ")"}]}
 598 | {"label": "GPE", "pattern": [{"lower": "md"}]}
 599 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "moldova"}]}
 600 | {"label": "GPE", "pattern": [{"lower": "republica"}, {"lower": "moldova"}]}
 601 | {"label": "GPE", "pattern": [{"lower": "monaco"}]}
 602 | {"label": "GPE", "pattern": [{"lower": "mc"}]}
 603 | {"label": "GPE", "pattern": [{"lower": "principality"}, {"lower": "of"}, {"lower": "monaco"}]}
 604 | {"label": "GPE", "pattern": [{"lower": "principauté"}, {"lower": "de"}, {"lower": "monaco"}]}
 605 | {"label": "GPE", "pattern": [{"lower": "mongolia"}]}
 606 | {"label": "GPE", "pattern": [{"lower": "mn"}]}
 607 | {"label": "GPE", "pattern": [{"lower": "montenegro"}]}
 608 | {"label": "GPE", "pattern": [{"lower": "me"}]}
 609 | {"label": "GPE", "pattern": [{"lower": "crna"}, {"lower": "gora"}]}
 610 | {"label": "GPE", "pattern": [{"lower": "montserrat"}]}
 611 | {"label": "GPE", "pattern": [{"lower": "ms"}]}
 612 | {"label": "GPE", "pattern": [{"lower": "morocco"}]}
 613 | {"label": "GPE", "pattern": [{"lower": "ma"}]}
 614 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "morocco"}]}
 615 | {"label": "GPE", "pattern": [{"lower": "al"}, {"lower": "-"}, {"lower": "mamlakah"}, {"lower": "al"}, {"lower": "-"}, {"lower": "maġribiyah"}]}
 616 | {"label": "GPE", "pattern": [{"lower": "mozambique"}]}
 617 | {"label": "GPE", "pattern": [{"lower": "mz"}]}
 618 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "mozambique"}]}
 619 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "moçambique"}]}
 620 | {"label": "GPE", "pattern": [{"lower": "myanmar"}]}
 621 | {"label": "GPE", "pattern": [{"lower": "mm"}]}
 622 | {"label": "GPE", "pattern": [{"lower": "burma"}]}
 623 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "the"}, {"lower": "union"}, {"lower": "of"}, {"lower": "myanmar"}]}
 624 | {"label": "GPE", "pattern": [{"lower": "pyidaunzu"}, {"lower": "thanmăda"}, {"lower": "myăma"}, {"lower": "nainngandaw"}]}
 625 | {"label": "GPE", "pattern": [{"lower": "namibia"}]}
 626 | {"label": "GPE", "pattern": [{"lower": "na"}]}
 627 | {"label": "GPE", "pattern": [{"lower": "namibië"}]}
 628 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "namibia"}]}
 629 | {"label": "GPE", "pattern": [{"lower": "nauru"}]}
 630 | {"label": "GPE", "pattern": [{"lower": "nr"}]}
 631 | {"label": "GPE", "pattern": [{"lower": "naoero"}]}
 632 | {"label": "GPE", "pattern": [{"lower": "pleasant"}, {"lower": "island"}]}
 633 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "nauru"}]}
 634 | {"label": "GPE", "pattern": [{"lower": "ripublik"}, {"lower": "naoero"}]}
 635 | {"label": "GPE", "pattern": [{"lower": "nepal"}]}
 636 | {"label": "GPE", "pattern": [{"lower": "np"}]}
 637 | {"label": "GPE", "pattern": [{"lower": "federal"}, {"lower": "democratic"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "nepal"}]}
 638 | {"label": "GPE", "pattern": [{"lower": "loktāntrik"}, {"lower": "ganatantra"}, {"lower": "nepāl"}]}
 639 | {"label": "GPE", "pattern": [{"lower": "netherlands"}]}
 640 | {"label": "GPE", "pattern": [{"lower": "nl"}]}
 641 | {"label": "GPE", "pattern": [{"lower": "holland"}]}
 642 | {"label": "GPE", "pattern": [{"lower": "nederland"}]}
 643 | {"label": "GPE", "pattern": [{"lower": "new"}, {"lower": "caledonia"}]}
 644 | {"label": "GPE", "pattern": [{"lower": "nc"}]}
 645 | {"label": "GPE", "pattern": [{"lower": "new"}, {"lower": "zealand"}]}
 646 | {"label": "GPE", "pattern": [{"lower": "nz"}]}
 647 | {"label": "GPE", "pattern": [{"lower": "aotearoa"}]}
 648 | {"label": "GPE", "pattern": [{"lower": "nicaragua"}]}
 649 | {"label": "GPE", "pattern": [{"lower": "ni"}]}
 650 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "nicaragua"}]}
 651 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "nicaragua"}]}
 652 | {"label": "GPE", "pattern": [{"lower": "niger"}]}
 653 | {"label": "GPE", "pattern": [{"lower": "ne"}]}
 654 | {"label": "GPE", "pattern": [{"lower": "nijar"}]}
 655 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "niger"}]}
 656 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "du"}, {"lower": "niger"}]}
 657 | {"label": "GPE", "pattern": [{"lower": "nigeria"}]}
 658 | {"label": "GPE", "pattern": [{"lower": "ng"}]}
 659 | {"label": "GPE", "pattern": [{"lower": "nijeriya"}]}
 660 | {"label": "GPE", "pattern": [{"lower": "naíjíríà"}]}
 661 | {"label": "GPE", "pattern": [{"lower": "federal"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "nigeria"}]}
 662 | {"label": "GPE", "pattern": [{"lower": "niue"}]}
 663 | {"label": "GPE", "pattern": [{"lower": "nu"}]}
 664 | {"label": "GPE", "pattern": [{"lower": "norfolk"}, {"lower": "island"}]}
 665 | {"label": "GPE", "pattern": [{"lower": "nf"}]}
 666 | {"label": "GPE", "pattern": [{"lower": "territory"}, {"lower": "of"}, {"lower": "norfolk"}, {"lower": "island"}]}
 667 | {"label": "GPE", "pattern": [{"lower": "teratri"}, {"lower": "of"}, {"lower": "norf'k"}, {"lower": "ailen"}]}
 668 | {"label": "GPE", "pattern": [{"lower": "korea"}, {"lower": "("}, {"lower": "democratic"}, {"lower": "people"}, {"lower": "'s"}, {"lower": "republic"}, {"lower": "of"}, {"lower": ")"}]}
 669 | {"label": "GPE", "pattern": [{"lower": "kp"}]}
 670 | {"label": "GPE", "pattern": [{"lower": "democratic"}, {"lower": "people"}, {"lower": "'s"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "korea"}]}
 671 | {"label": "GPE", "pattern": [{"lower": "조선민주주의인민공화국"}]}
 672 | {"label": "GPE", "pattern": [{"lower": "chosŏn"}, {"lower": "minjujuŭi"}, {"lower": "inmin"}, {"lower": "konghwaguk"}]}
 673 | {"label": "GPE", "pattern": [{"lower": "northern"}, {"lower": "mariana"}, {"lower": "islands"}]}
 674 | {"label": "GPE", "pattern": [{"lower": "mp"}]}
 675 | {"label": "GPE", "pattern": [{"lower": "commonwealth"}, {"lower": "of"}, {"lower": "the"}, {"lower": "northern"}, {"lower": "mariana"}, {"lower": "islands"}]}
 676 | {"label": "GPE", "pattern": [{"lower": "sankattan"}, {"lower": "siha"}, {"lower": "na"}, {"lower": "islas"}, {"lower": "mariånas"}]}
 677 | {"label": "GPE", "pattern": [{"lower": "norway"}]}
 678 | {"label": "GPE", "pattern": [{"lower": "no"}]}
 679 | {"label": "GPE", "pattern": [{"lower": "norge"}]}
 680 | {"label": "GPE", "pattern": [{"lower": "noreg"}]}
 681 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "norway"}]}
 682 | {"label": "GPE", "pattern": [{"lower": "kongeriket"}, {"lower": "norge"}]}
 683 | {"label": "GPE", "pattern": [{"lower": "kongeriket"}, {"lower": "noreg"}]}
 684 | {"label": "GPE", "pattern": [{"lower": "oman"}]}
 685 | {"label": "GPE", "pattern": [{"lower": "om"}]}
 686 | {"label": "GPE", "pattern": [{"lower": "sultanate"}, {"lower": "of"}, {"lower": "oman"}]}
 687 | {"label": "GPE", "pattern": [{"lower": "salṭanat"}, {"lower": "ʻumān"}]}
 688 | {"label": "GPE", "pattern": [{"lower": "pakistan"}]}
 689 | {"label": "GPE", "pattern": [{"lower": "pk"}]}
 690 | {"label": "GPE", "pattern": [{"lower": "pākistān"}]}
 691 | {"label": "GPE", "pattern": [{"lower": "islamic"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "pakistan"}]}
 692 | {"label": "GPE", "pattern": [{"lower": "islāmī"}, {"lower": "jumhūriya'eh"}, {"lower": "pākistān"}]}
 693 | {"label": "GPE", "pattern": [{"lower": "palau"}]}
 694 | {"label": "GPE", "pattern": [{"lower": "pw"}]}
 695 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "palau"}]}
 696 | {"label": "GPE", "pattern": [{"lower": "beluu"}, {"lower": "er"}, {"lower": "a"}, {"lower": "belau"}]}
 697 | {"label": "GPE", "pattern": [{"lower": "palestine"}, {"lower": ","}, {"lower": "state"}, {"lower": "of"}]}
 698 | {"label": "GPE", "pattern": [{"lower": "ps"}]}
 699 | {"label": "GPE", "pattern": [{"lower": "state"}, {"lower": "of"}, {"lower": "palestine"}]}
 700 | {"label": "GPE", "pattern": [{"lower": "dawlat"}, {"lower": "filasṭin"}]}
 701 | {"label": "GPE", "pattern": [{"lower": "panama"}]}
 702 | {"label": "GPE", "pattern": [{"lower": "pa"}]}
 703 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "panama"}]}
 704 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "de"}, {"lower": "panamá"}]}
 705 | {"label": "GPE", "pattern": [{"lower": "papua"}, {"lower": "new"}, {"lower": "guinea"}]}
 706 | {"label": "GPE", "pattern": [{"lower": "pg"}]}
 707 | {"label": "GPE", "pattern": [{"lower": "independent"}, {"lower": "state"}, {"lower": "of"}, {"lower": "papua"}, {"lower": "new"}, {"lower": "guinea"}]}
 708 | {"label": "GPE", "pattern": [{"lower": "independen"}, {"lower": "stet"}, {"lower": "bilong"}, {"lower": "papua"}, {"lower": "niugini"}]}
 709 | {"label": "GPE", "pattern": [{"lower": "paraguay"}]}
 710 | {"label": "GPE", "pattern": [{"lower": "py"}]}
 711 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "paraguay"}]}
 712 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "del"}, {"lower": "paraguay"}]}
 713 | {"label": "GPE", "pattern": [{"lower": "tetã"}, {"lower": "paraguái"}]}
 714 | {"label": "GPE", "pattern": [{"lower": "peru"}]}
 715 | {"label": "GPE", "pattern": [{"lower": "pe"}]}
 716 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "peru"}]}
 717 | {"label": "GPE", "pattern": [{"lower": " "}, {"lower": "república"}, {"lower": "del"}, {"lower": "perú"}]}
 718 | {"label": "GPE", "pattern": [{"lower": "philippines"}]}
 719 | {"label": "GPE", "pattern": [{"lower": "ph"}]}
 720 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "the"}, {"lower": "philippines"}]}
 721 | {"label": "GPE", "pattern": [{"lower": "repúblika"}, {"lower": "ng"}, {"lower": "pilipinas"}]}
 722 | {"label": "GPE", "pattern": [{"lower": "pitcairn"}]}
 723 | {"label": "GPE", "pattern": [{"lower": "pn"}]}
 724 | {"label": "GPE", "pattern": [{"lower": "pitcairn"}, {"lower": "henderson"}, {"lower": "ducie"}, {"lower": "and"}, {"lower": "oeno"}, {"lower": "islands"}]}
 725 | {"label": "GPE", "pattern": [{"lower": "poland"}]}
 726 | {"label": "GPE", "pattern": [{"lower": "pl"}]}
 727 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "poland"}]}
 728 | {"label": "GPE", "pattern": [{"lower": "rzeczpospolita"}, {"lower": "polska"}]}
 729 | {"label": "GPE", "pattern": [{"lower": "portugal"}]}
 730 | {"label": "GPE", "pattern": [{"lower": "pt"}]}
 731 | {"label": "GPE", "pattern": [{"lower": "portuguesa"}]}
 732 | {"label": "GPE", "pattern": [{"lower": "portuguese"}, {"lower": "republic"}]}
 733 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "portuguesa"}]}
 734 | {"label": "GPE", "pattern": [{"lower": "puerto"}, {"lower": "rico"}]}
 735 | {"label": "GPE", "pattern": [{"lower": "pr"}]}
 736 | {"label": "GPE", "pattern": [{"lower": "commonwealth"}, {"lower": "of"}, {"lower": "puerto"}, {"lower": "rico"}]}
 737 | {"label": "GPE", "pattern": [{"lower": "estado"}, {"lower": "libre"}, {"lower": "asociado"}, {"lower": "de"}, {"lower": "puerto"}, {"lower": "rico"}]}
 738 | {"label": "GPE", "pattern": [{"lower": "qatar"}]}
 739 | {"label": "GPE", "pattern": [{"lower": "qa"}]}
 740 | {"label": "GPE", "pattern": [{"lower": "state"}, {"lower": "of"}, {"lower": "qatar"}]}
 741 | {"label": "GPE", "pattern": [{"lower": "dawlat"}, {"lower": "qaṭar"}]}
 742 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "kosovo"}]}
 743 | {"label": "GPE", "pattern": [{"lower": "xk"}]}
 744 | {"label": "GPE", "pattern": [{"lower": "република"}, {"lower": "косово"}]}
 745 | {"label": "GPE", "pattern": [{"lower": "réunion"}]}
 746 | {"label": "GPE", "pattern": [{"lower": "re"}]}
 747 | {"label": "GPE", "pattern": [{"lower": "reunion"}]}
 748 | {"label": "GPE", "pattern": [{"lower": "romania"}]}
 749 | {"label": "GPE", "pattern": [{"lower": "ro"}]}
 750 | {"label": "GPE", "pattern": [{"lower": "rumania"}]}
 751 | {"label": "GPE", "pattern": [{"lower": "roumania"}]}
 752 | {"label": "GPE", "pattern": [{"lower": "românia"}]}
 753 | {"label": "GPE", "pattern": [{"lower": "russian"}, {"lower": "federation"}]}
 754 | {"label": "GPE", "pattern": [{"lower": "ru"}]}
 755 | {"label": "GPE", "pattern": [{"lower": "rossiya"}]}
 756 | {"label": "GPE", "pattern": [{"lower": "russian"}, {"lower": "federation"}]}
 757 | {"label": "GPE", "pattern": [{"lower": "российская"}, {"lower": "федерация"}]}
 758 | {"label": "GPE", "pattern": [{"lower": "rossiyskaya"}, {"lower": "federatsiya"}]}
 759 | {"label": "GPE", "pattern": [{"lower": "rwanda"}]}
 760 | {"label": "GPE", "pattern": [{"lower": "rw"}]}
 761 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "rwanda"}]}
 762 | {"label": "GPE", "pattern": [{"lower": "repubulika"}, {"lower": "y'u"}, {"lower": "rwanda"}]}
 763 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "du"}, {"lower": "rwanda"}]}
 764 | {"label": "GPE", "pattern": [{"lower": "saint"}, {"lower": "barthélemy"}]}
 765 | {"label": "GPE", "pattern": [{"lower": "bl"}]}
 766 | {"label": "GPE", "pattern": [{"lower": "st."}, {"lower": "barthelemy"}]}
 767 | {"label": "GPE", "pattern": [{"lower": "collectivity"}, {"lower": "of"}, {"lower": "saint"}, {"lower": "barthélemy"}]}
 768 | {"label": "GPE", "pattern": [{"lower": "collectivité"}, {"lower": "de"}, {"lower": "saint"}, {"lower": "-"}, {"lower": "barthélemy"}]}
 769 | {"label": "GPE", "pattern": [{"lower": "saint"}, {"lower": "helena"}, {"lower": ","}, {"lower": "ascension"}, {"lower": "and"}, {"lower": "tristan"}, {"lower": "da"}, {"lower": "cunha"}]}
 770 | {"label": "GPE", "pattern": [{"lower": "sh"}]}
 771 | {"label": "GPE", "pattern": [{"lower": "saint"}, {"lower": "kitts"}, {"lower": "and"}, {"lower": "nevis"}]}
 772 | {"label": "GPE", "pattern": [{"lower": "kn"}]}
 773 | {"label": "GPE", "pattern": [{"lower": "federation"}, {"lower": "of"}, {"lower": "saint"}, {"lower": "christopher"}, {"lower": "and"}, {"lower": "nevis"}]}
 774 | {"label": "GPE", "pattern": [{"lower": "saint"}, {"lower": "lucia"}]}
 775 | {"label": "GPE", "pattern": [{"lower": "lc"}]}
 776 | {"label": "GPE", "pattern": [{"lower": "saint"}, {"lower": "martin"}, {"lower": "("}, {"lower": "french"}, {"lower": "part"}, {"lower": ")"}]}
 777 | {"label": "GPE", "pattern": [{"lower": "mf"}]}
 778 | {"label": "GPE", "pattern": [{"lower": "collectivity"}, {"lower": "of"}, {"lower": "saint"}, {"lower": "martin"}]}
 779 | {"label": "GPE", "pattern": [{"lower": "collectivité"}, {"lower": "de"}, {"lower": "saint"}, {"lower": "-"}, {"lower": "martin"}]}
 780 | {"label": "GPE", "pattern": [{"lower": "saint"}, {"lower": "pierre"}, {"lower": "and"}, {"lower": "miquelon"}]}
 781 | {"label": "GPE", "pattern": [{"lower": "pm"}]}
 782 | {"label": "GPE", "pattern": [{"lower": "collectivité"}, {"lower": "territoriale"}, {"lower": "de"}, {"lower": "saint"}, {"lower": "-"}, {"lower": "pierre"}, {"lower": "-"}, {"lower": "et"}, {"lower": "-"}, {"lower": "miquelon"}]}
 783 | {"label": "GPE", "pattern": [{"lower": "saint"}, {"lower": "vincent"}, {"lower": "and"}, {"lower": "the"}, {"lower": "grenadines"}]}
 784 | {"label": "GPE", "pattern": [{"lower": "vc"}]}
 785 | {"label": "GPE", "pattern": [{"lower": "samoa"}]}
 786 | {"label": "GPE", "pattern": [{"lower": "ws"}]}
 787 | {"label": "GPE", "pattern": [{"lower": "independent"}, {"lower": "state"}, {"lower": "of"}, {"lower": "samoa"}]}
 788 | {"label": "GPE", "pattern": [{"lower": "malo"}, {"lower": "saʻoloto"}, {"lower": "tutoʻatasi"}, {"lower": "o"}, {"lower": "sāmoa"}]}
 789 | {"label": "GPE", "pattern": [{"lower": "san"}, {"lower": "marino"}]}
 790 | {"label": "GPE", "pattern": [{"lower": "sm"}]}
 791 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "san"}, {"lower": "marino"}]}
 792 | {"label": "GPE", "pattern": [{"lower": "repubblica"}, {"lower": "di"}, {"lower": "san"}, {"lower": "marino"}]}
 793 | {"label": "GPE", "pattern": [{"lower": "sao"}, {"lower": "tome"}, {"lower": "and"}, {"lower": "principe"}]}
 794 | {"label": "GPE", "pattern": [{"lower": "st"}]}
 795 | {"label": "GPE", "pattern": [{"lower": "democratic"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "são"}, {"lower": "tomé"}, {"lower": "and"}, {"lower": "príncipe"}]}
 796 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "democrática"}, {"lower": "de"}, {"lower": "são"}, {"lower": "tomé"}, {"lower": "e"}, {"lower": "príncipe"}]}
 797 | {"label": "GPE", "pattern": [{"lower": "saudi"}, {"lower": "arabia"}]}
 798 | {"label": "GPE", "pattern": [{"lower": "sa"}]}
 799 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "saudi"}, {"lower": "arabia"}]}
 800 | {"label": "GPE", "pattern": [{"lower": "al"}, {"lower": "-"}, {"lower": "mamlakah"}, {"lower": "al-‘arabiyyah"}, {"lower": "as"}, {"lower": "-"}, {"lower": "su‘ūdiyyah"}]}
 801 | {"label": "GPE", "pattern": [{"lower": "senegal"}]}
 802 | {"label": "GPE", "pattern": [{"lower": "sn"}]}
 803 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "senegal"}]}
 804 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "du"}, {"lower": "sénégal"}]}
 805 | {"label": "GPE", "pattern": [{"lower": "serbia"}]}
 806 | {"label": "GPE", "pattern": [{"lower": "rs"}]}
 807 | {"label": "GPE", "pattern": [{"lower": "srbija"}]}
 808 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "serbia"}]}
 809 | {"label": "GPE", "pattern": [{"lower": "република"}, {"lower": "србија"}]}
 810 | {"label": "GPE", "pattern": [{"lower": "republika"}, {"lower": "srbija"}]}
 811 | {"label": "GPE", "pattern": [{"lower": "seychelles"}]}
 812 | {"label": "GPE", "pattern": [{"lower": "sc"}]}
 813 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "seychelles"}]}
 814 | {"label": "GPE", "pattern": [{"lower": "repiblik"}, {"lower": "sesel"}]}
 815 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "des"}, {"lower": "seychelles"}]}
 816 | {"label": "GPE", "pattern": [{"lower": "sierra"}, {"lower": "leone"}]}
 817 | {"label": "GPE", "pattern": [{"lower": "sl"}]}
 818 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "sierra"}, {"lower": "leone"}]}
 819 | {"label": "GPE", "pattern": [{"lower": "singapore"}]}
 820 | {"label": "GPE", "pattern": [{"lower": "sg"}]}
 821 | {"label": "GPE", "pattern": [{"lower": "singapura"}]}
 822 | {"label": "GPE", "pattern": [{"lower": "republik"}, {"lower": "singapura"}]}
 823 | {"label": "GPE", "pattern": [{"lower": "新加坡共和国"}]}
 824 | {"label": "GPE", "pattern": [{"lower": "sint"}, {"lower": "maarten"}, {"lower": "("}, {"lower": "dutch"}, {"lower": "part"}, {"lower": ")"}]}
 825 | {"label": "GPE", "pattern": [{"lower": "sx"}]}
 826 | {"label": "GPE", "pattern": [{"lower": "slovakia"}]}
 827 | {"label": "GPE", "pattern": [{"lower": "sk"}]}
 828 | {"label": "GPE", "pattern": [{"lower": "slovak"}, {"lower": "republic"}]}
 829 | {"label": "GPE", "pattern": [{"lower": "slovenská"}, {"lower": "republika"}]}
 830 | {"label": "GPE", "pattern": [{"lower": "slovenia"}]}
 831 | {"label": "GPE", "pattern": [{"lower": "si"}]}
 832 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "slovenia"}]}
 833 | {"label": "GPE", "pattern": [{"lower": "republika"}, {"lower": "slovenija"}]}
 834 | {"label": "GPE", "pattern": [{"lower": "solomon"}, {"lower": "islands"}]}
 835 | {"label": "GPE", "pattern": [{"lower": "sb"}]}
 836 | {"label": "GPE", "pattern": [{"lower": "somalia"}]}
 837 | {"label": "GPE", "pattern": [{"lower": "so"}]}
 838 | {"label": "GPE", "pattern": [{"lower": "aṣ"}, {"lower": "-"}, {"lower": "ṣūmāl"}]}
 839 | {"label": "GPE", "pattern": [{"lower": "federal"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "somalia"}]}
 840 | {"label": "GPE", "pattern": [{"lower": "jamhuuriyadda"}, {"lower": "federaalka"}, {"lower": "soomaaliya"}]}
 841 | {"label": "GPE", "pattern": [{"lower": "jumhūriyyat"}, {"lower": "aṣ"}, {"lower": "-"}, {"lower": "ṣūmāl"}, {"lower": "al"}, {"lower": "-"}, {"lower": "fiderāliyya"}]}
 842 | {"label": "GPE", "pattern": [{"lower": "south"}, {"lower": "africa"}]}
 843 | {"label": "GPE", "pattern": [{"lower": "za"}]}
 844 | {"label": "GPE", "pattern": [{"lower": "rsa"}]}
 845 | {"label": "GPE", "pattern": [{"lower": "suid"}, {"lower": "-"}, {"lower": "afrika"}]}
 846 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "south"}, {"lower": "africa"}]}
 847 | {"label": "GPE", "pattern": [{"lower": "south"}, {"lower": "georgia"}, {"lower": "and"}, {"lower": "the"}, {"lower": "south"}, {"lower": "sandwich"}, {"lower": "islands"}]}
 848 | {"label": "GPE", "pattern": [{"lower": "gs"}]}
 849 | {"label": "GPE", "pattern": [{"lower": "south"}, {"lower": "georgia"}, {"lower": "and"}, {"lower": "the"}, {"lower": "south"}, {"lower": "sandwich"}, {"lower": "islands"}]}
 850 | {"label": "GPE", "pattern": [{"lower": "korea"}, {"lower": "("}, {"lower": "republic"}, {"lower": "of"}, {"lower": ")"}]}
 851 | {"label": "GPE", "pattern": [{"lower": "kr"}]}
 852 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "korea"}]}
 853 | {"label": "GPE", "pattern": [{"lower": "south"}, {"lower": "sudan"}]}
 854 | {"label": "GPE", "pattern": [{"lower": "ss"}]}
 855 | {"label": "GPE", "pattern": [{"lower": "spain"}]}
 856 | {"label": "GPE", "pattern": [{"lower": "es"}]}
 857 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "spain"}]}
 858 | {"label": "GPE", "pattern": [{"lower": "reino"}, {"lower": "de"}, {"lower": "españa"}]}
 859 | {"label": "GPE", "pattern": [{"lower": "sri"}, {"lower": "lanka"}]}
 860 | {"label": "GPE", "pattern": [{"lower": "lk"}]}
 861 | {"label": "GPE", "pattern": [{"lower": "ilaṅkai"}]}
 862 | {"label": "GPE", "pattern": [{"lower": "democratic"}, {"lower": "socialist"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "sri"}, {"lower": "lanka"}]}
 863 | {"label": "GPE", "pattern": [{"lower": "sudan"}]}
 864 | {"label": "GPE", "pattern": [{"lower": "sd"}]}
 865 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "the"}, {"lower": "sudan"}]}
 866 | {"label": "GPE", "pattern": [{"lower": "jumhūrīyat"}, {"lower": "as"}, {"lower": "-"}, {"lower": "sūdān"}]}
 867 | {"label": "GPE", "pattern": [{"lower": "suriname"}]}
 868 | {"label": "GPE", "pattern": [{"lower": "sr"}]}
 869 | {"label": "GPE", "pattern": [{"lower": "sarnam"}]}
 870 | {"label": "GPE", "pattern": [{"lower": "sranangron"}]}
 871 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "suriname"}]}
 872 | {"label": "GPE", "pattern": [{"lower": "republiek"}, {"lower": "suriname"}]}
 873 | {"label": "GPE", "pattern": [{"lower": "svalbard"}, {"lower": "and"}, {"lower": "jan"}, {"lower": "mayen"}]}
 874 | {"label": "GPE", "pattern": [{"lower": "sj"}]}
 875 | {"label": "GPE", "pattern": [{"lower": "svalbard"}, {"lower": "and"}, {"lower": "jan"}, {"lower": "mayen"}, {"lower": "islands"}]}
 876 | {"label": "GPE", "pattern": [{"lower": "swaziland"}]}
 877 | {"label": "GPE", "pattern": [{"lower": "sz"}]}
 878 | {"label": "GPE", "pattern": [{"lower": "weswatini"}]}
 879 | {"label": "GPE", "pattern": [{"lower": "swatini"}]}
 880 | {"label": "GPE", "pattern": [{"lower": "ngwane"}]}
 881 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "swaziland"}]}
 882 | {"label": "GPE", "pattern": [{"lower": "umbuso"}, {"lower": "waseswatini"}]}
 883 | {"label": "GPE", "pattern": [{"lower": "sweden"}]}
 884 | {"label": "GPE", "pattern": [{"lower": "se"}]}
 885 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "sweden"}]}
 886 | {"label": "GPE", "pattern": [{"lower": "konungariket"}, {"lower": "sverige"}]}
 887 | {"label": "GPE", "pattern": [{"lower": "switzerland"}]}
 888 | {"label": "GPE", "pattern": [{"lower": "ch"}]}
 889 | {"label": "GPE", "pattern": [{"lower": "swiss"}, {"lower": "confederation"}]}
 890 | {"label": "GPE", "pattern": [{"lower": "schweiz"}]}
 891 | {"label": "GPE", "pattern": [{"lower": "suisse"}]}
 892 | {"label": "GPE", "pattern": [{"lower": "svizzera"}]}
 893 | {"label": "GPE", "pattern": [{"lower": "svizra"}]}
 894 | {"label": "GPE", "pattern": [{"lower": "syrian"}, {"lower": "arab"}, {"lower": "republic"}]}
 895 | {"label": "GPE", "pattern": [{"lower": "sy"}]}
 896 | {"label": "GPE", "pattern": [{"lower": "syrian"}, {"lower": "arab"}, {"lower": "republic"}]}
 897 | {"label": "GPE", "pattern": [{"lower": "al"}, {"lower": "-"}, {"lower": "jumhūrīyah"}, {"lower": "al-ʻarabīyah"}, {"lower": "as"}, {"lower": "-"}, {"lower": "sūrīyah"}]}
 898 | {"label": "GPE", "pattern": [{"lower": "taiwan"}]}
 899 | {"label": "GPE", "pattern": [{"lower": "tw"}]}
 900 | {"label": "GPE", "pattern": [{"lower": "táiwān"}]}
 901 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "china"}]}
 902 | {"label": "GPE", "pattern": [{"lower": "中華民國"}]}
 903 | {"label": "GPE", "pattern": [{"lower": "zhōnghuá"}, {"lower": "mínguó"}]}
 904 | {"label": "GPE", "pattern": [{"lower": "tajikistan"}]}
 905 | {"label": "GPE", "pattern": [{"lower": "tj"}]}
 906 | {"label": "GPE", "pattern": [{"lower": "toçikiston"}]}
 907 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "tajikistan"}]}
 908 | {"label": "GPE", "pattern": [{"lower": "ҷумҳурии"}, {"lower": "тоҷикистон"}]}
 909 | {"label": "GPE", "pattern": [{"lower": "çumhuriyi"}, {"lower": "toçikiston"}]}
 910 | {"label": "GPE", "pattern": [{"lower": "tanzania"}, {"lower": ","}, {"lower": "united"}, {"lower": "republic"}, {"lower": "of"}]}
 911 | {"label": "GPE", "pattern": [{"lower": "tz"}]}
 912 | {"label": "GPE", "pattern": [{"lower": "united"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "tanzania"}]}
 913 | {"label": "GPE", "pattern": [{"lower": "jamhuri"}, {"lower": "ya"}, {"lower": "muungano"}, {"lower": "wa"}, {"lower": "tanzania"}]}
 914 | {"label": "GPE", "pattern": [{"lower": "thailand"}]}
 915 | {"label": "GPE", "pattern": [{"lower": "th"}]}
 916 | {"label": "GPE", "pattern": [{"lower": "prathet"}]}
 917 | {"label": "GPE", "pattern": [{"lower": "thai"}]}
 918 | {"label": "GPE", "pattern": [{"lower": "kingdom"}, {"lower": "of"}, {"lower": "thailand"}]}
 919 | {"label": "GPE", "pattern": [{"lower": "ราชอาณาจักรไทย"}]}
 920 | {"label": "GPE", "pattern": [{"lower": "ratcha"}, {"lower": "anachak"}, {"lower": "thai"}]}
 921 | {"label": "GPE", "pattern": [{"lower": "timor"}, {"lower": "-"}, {"lower": "leste"}]}
 922 | {"label": "GPE", "pattern": [{"lower": "tl"}]}
 923 | {"label": "GPE", "pattern": [{"lower": "east"}, {"lower": "timor"}]}
 924 | {"label": "GPE", "pattern": [{"lower": "democratic"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "timor"}, {"lower": "-"}, {"lower": "leste"}]}
 925 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "democrática"}, {"lower": "de"}, {"lower": "timor"}, {"lower": "-"}, {"lower": "leste"}]}
 926 | {"label": "GPE", "pattern": [{"lower": "repúblika"}, {"lower": "demokrátika"}, {"lower": "timór"}, {"lower": "-"}, {"lower": "leste"}]}
 927 | {"label": "GPE", "pattern": [{"lower": "togo"}]}
 928 | {"label": "GPE", "pattern": [{"lower": "tg"}]}
 929 | {"label": "GPE", "pattern": [{"lower": "togolese"}]}
 930 | {"label": "GPE", "pattern": [{"lower": "togolese"}, {"lower": "republic"}]}
 931 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "togolaise"}]}
 932 | {"label": "GPE", "pattern": [{"lower": "tokelau"}]}
 933 | {"label": "GPE", "pattern": [{"lower": "tk"}]}
 934 | {"label": "GPE", "pattern": [{"lower": "tonga"}]}
 935 | {"label": "GPE", "pattern": [{"lower": "to"}]}
 936 | {"label": "GPE", "pattern": [{"lower": "trinidad"}, {"lower": "and"}, {"lower": "tobago"}]}
 937 | {"label": "GPE", "pattern": [{"lower": "tt"}]}
 938 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "trinidad"}, {"lower": "and"}, {"lower": "tobago"}]}
 939 | {"label": "GPE", "pattern": [{"lower": "tunisia"}]}
 940 | {"label": "GPE", "pattern": [{"lower": "tn"}]}
 941 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "tunisia"}]}
 942 | {"label": "GPE", "pattern": [{"lower": "al"}, {"lower": "-"}, {"lower": "jumhūriyyah"}, {"lower": "at"}, {"lower": "-"}, {"lower": "tūnisiyyah"}]}
 943 | {"label": "GPE", "pattern": [{"lower": "turkey"}]}
 944 | {"label": "GPE", "pattern": [{"lower": "tr"}]}
 945 | {"label": "GPE", "pattern": [{"lower": "turkiye"}]}
 946 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "turkey"}]}
 947 | {"label": "GPE", "pattern": [{"lower": "türkiye"}, {"lower": "cumhuriyeti"}]}
 948 | {"label": "GPE", "pattern": [{"lower": "turkmenistan"}]}
 949 | {"label": "GPE", "pattern": [{"lower": "tm"}]}
 950 | {"label": "GPE", "pattern": [{"lower": "turks"}, {"lower": "and"}, {"lower": "caicos"}, {"lower": "islands"}]}
 951 | {"label": "GPE", "pattern": [{"lower": "tc"}]}
 952 | {"label": "GPE", "pattern": [{"lower": "tuvalu"}]}
 953 | {"label": "GPE", "pattern": [{"lower": "tv"}]}
 954 | {"label": "GPE", "pattern": [{"lower": "uganda"}]}
 955 | {"label": "GPE", "pattern": [{"lower": "ug"}]}
 956 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "uganda"}]}
 957 | {"label": "GPE", "pattern": [{"lower": "jamhuri"}, {"lower": "ya"}, {"lower": "uganda"}]}
 958 | {"label": "GPE", "pattern": [{"lower": "ukraine"}]}
 959 | {"label": "GPE", "pattern": [{"lower": "ua"}]}
 960 | {"label": "GPE", "pattern": [{"lower": "ukrayina"}]}
 961 | {"label": "GPE", "pattern": [{"lower": "united"}, {"lower": "arab"}, {"lower": "emirates"}]}
 962 | {"label": "GPE", "pattern": [{"lower": "ae"}]}
 963 | {"label": "GPE", "pattern": [{"lower": "uae"}]}
 964 | {"label": "GPE", "pattern": [{"lower": "united"}, {"lower": "kingdom"}, {"lower": "of"}, {"lower": "great"}, {"lower": "britain"}, {"lower": "and"}, {"lower": "northern"}, {"lower": "ireland"}]}
 965 | {"label": "GPE", "pattern": [{"lower": "gb"}]}
 966 | {"label": "GPE", "pattern": [{"lower": "uk"}]}
 967 | {"label": "GPE", "pattern": [{"lower": "great"}, {"lower": "britain"}]}
 968 | {"label": "GPE", "pattern": [{"lower": "united"}, {"lower": "states"}, {"lower": "of"}, {"lower": "america"}]}
 969 | {"label": "GPE", "pattern": [{"lower": "us"}]}
 970 | {"label": "GPE", "pattern": [{"lower": "usa"}]}
 971 | {"label": "GPE", "pattern": [{"lower": "united"}, {"lower": "states"}, {"lower": "of"}, {"lower": "america"}]}
 972 | {"label": "GPE", "pattern": [{"lower": "uruguay"}]}
 973 | {"label": "GPE", "pattern": [{"lower": "uy"}]}
 974 | {"label": "GPE", "pattern": [{"lower": "oriental"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "uruguay"}]}
 975 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "oriental"}, {"lower": "del"}, {"lower": "uruguay"}]}
 976 | {"label": "GPE", "pattern": [{"lower": "uzbekistan"}]}
 977 | {"label": "GPE", "pattern": [{"lower": "uz"}]}
 978 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "uzbekistan"}]}
 979 | {"label": "GPE", "pattern": [{"lower": "o‘zbekiston"}, {"lower": "respublikasi"}]}
 980 | {"label": "GPE", "pattern": [{"lower": "ўзбекистон"}, {"lower": "республикаси"}]}
 981 | {"label": "GPE", "pattern": [{"lower": "vanuatu"}]}
 982 | {"label": "GPE", "pattern": [{"lower": "vu"}]}
 983 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "vanuatu"}]}
 984 | {"label": "GPE", "pattern": [{"lower": "ripablik"}, {"lower": "blong"}, {"lower": "vanuatu"}]}
 985 | {"label": "GPE", "pattern": [{"lower": "république"}, {"lower": "de"}, {"lower": "vanuatu"}]}
 986 | {"label": "GPE", "pattern": [{"lower": "venezuela"}, {"lower": "("}, {"lower": "bolivarian"}, {"lower": "republic"}, {"lower": "of"}, {"lower": ")"}]}
 987 | {"label": "GPE", "pattern": [{"lower": "ve"}]}
 988 | {"label": "GPE", "pattern": [{"lower": "bolivarian"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "venezuela"}]}
 989 | {"label": "GPE", "pattern": [{"lower": "república"}, {"lower": "bolivariana"}, {"lower": "de"}, {"lower": "venezuela"}]}
 990 | {"label": "GPE", "pattern": [{"lower": "viet"}, {"lower": "nam"}]}
 991 | {"label": "GPE", "pattern": [{"lower": "vn"}]}
 992 | {"label": "GPE", "pattern": [{"lower": "socialist"}, {"lower": "republic"}, {"lower": "of"}, {"lower": "vietnam"}]}
 993 | {"label": "GPE", "pattern": [{"lower": "cộng"}, {"lower": "hòa"}, {"lower": "xã"}, {"lower": "hội"}, {"lower": "chủ"}, {"lower": "nghĩa"}, {"lower": "việt"}, {"lower": "nam"}]}
 994 | {"label": "GPE", "pattern": [{"lower": "wallis"}, {"lower": "and"}, {"lower": "futuna"}]}
 995 | {"label": "GPE", "pattern": [{"lower": "wf"}]}
 996 | {"label": "GPE", "pattern": [{"lower": "territory"}, {"lower": "of"}, {"lower": "the"}, {"lower": "wallis"}, {"lower": "and"}, {"lower": "futuna"}, {"lower": "islands"}]}
 997 | {"label": "GPE", "pattern": [{"lower": "territoire"}, {"lower": "des"}, {"lower": "îles"}, {"lower": "wallis"}, {"lower": "et"}, {"lower": "futuna"}]}
 998 | {"label": "GPE", "pattern": [{"lower": "western"}, {"lower": "sahara"}]}
 999 | {"label": "GPE", "pattern": [{"lower": "eh"}]}
1000 | {"label": "GPE", "pattern": [{"lower": "taneẓroft"}, {"lower": "tutrimt"}]}
1001 | {"label": "GPE", "pattern": [{"lower": "yemen"}]}
1002 | {"label": "GPE", "pattern": [{"lower": "ye"}]}
1003 | {"label": "GPE", "pattern": [{"lower": "yemeni"}, {"lower": "republic"}]}
1004 | {"label": "GPE", "pattern": [{"lower": "al"}, {"lower": "-"}, {"lower": "jumhūriyyah"}, {"lower": "al"}, {"lower": "-"}, {"lower": "yamaniyyah"}]}
1005 | {"label": "GPE", "pattern": [{"lower": "zambia"}]}
1006 | {"label": "GPE", "pattern": [{"lower": "zm"}]}
1007 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "zambia"}]}
1008 | {"label": "GPE", "pattern": [{"lower": "zimbabwe"}]}
1009 | {"label": "GPE", "pattern": [{"lower": "zw"}]}
1010 | {"label": "GPE", "pattern": [{"lower": "republic"}, {"lower": "of"}, {"lower": "zimbabwe"}]}
1011 | 


--------------------------------------------------------------------------------
/src/recipes/example-patterns/patterns_drugs-DRUG.jsonl:
--------------------------------------------------------------------------------
  1 | {"label":"DRUG","pattern":[{"lower":"opiates"}]}
  2 | {"label":"DRUG","pattern":[{"lower":"cocaine"}]}
  3 | {"label":"DRUG","pattern":[{"lower":"methadone"}]}
  4 | {"label":"DRUG","pattern":[{"lower":"amphetamines"}]}
  5 | {"label":"DRUG","pattern":[{"lower":"meth"}]}
  6 | {"label":"DRUG","pattern":[{"lower":"morphine"}]}
  7 | {"label":"DRUG","pattern":[{"lower":"opium"}]}
  8 | {"label":"DRUG","pattern":[{"lower":"suboxone"}]}
  9 | {"label":"DRUG","pattern":[{"lower":"methamphetamine"}]}
 10 | {"label":"DRUG","pattern":[{"lower":"herion"}]}
 11 | {"label":"DRUG","pattern":[{"lower":"opioids"}]}
 12 | {"label":"DRUG","pattern":[{"lower":"oxycontin"}]}
 13 | {"label":"DRUG","pattern":[{"lower":"amphetamine"}]}
 14 | {"label":"DRUG","pattern":[{"lower":"hashish"}]}
 15 | {"label":"DRUG","pattern":[{"lower":"barbiturates"}]}
 16 | {"label":"DRUG","pattern":[{"lower":"nicotine"}]}
 17 | {"label":"DRUG","pattern":[{"lower":"alchohol"}]}
 18 | {"label":"DRUG","pattern":[{"lower":"cocain"}]}
 19 | {"label":"DRUG","pattern":[{"lower":"shrooms"}]}
 20 | {"label":"DRUG","pattern":[{"lower":"codeine"}]}
 21 | {"label":"DRUG","pattern":[{"lower":"marihuana"}]}
 22 | {"label":"DRUG","pattern":[{"lower":"hydrocodone"}]}
 23 | {"label":"DRUG","pattern":[{"lower":"xanax"}]}
 24 | {"label":"DRUG","pattern":[{"lower":"ketamine"}]}
 25 | {"label":"DRUG","pattern":[{"lower":"adderall"}]}
 26 | {"label":"DRUG","pattern":[{"lower":"ritalin"}]}
 27 | {"label":"DRUG","pattern":[{"lower":"ganja"}]}
 28 | {"label":"DRUG","pattern":[{"lower":"steroids"}]}
 29 | {"label":"DRUG","pattern":[{"lower":"mdma"}]}
 30 | {"label":"DRUG","pattern":[{"lower":"prozac"}]}
 31 | {"label":"DRUG","pattern":[{"lower":"coke"}]}
 32 | {"label":"DRUG","pattern":[{"lower":"benzodiazepine"}]}
 33 | {"label":"DRUG","pattern":[{"lower":"ambien"}]}
 34 | {"label":"DRUG","pattern":[{"lower":"ativan"}]}
 35 | {"label":"DRUG","pattern":[{"lower":"clonazepam"}]}
 36 | {"label":"DRUG","pattern":[{"lower":"subutex"}]}
 37 | {"label":"DRUG","pattern":[{"lower":"tylenol"}]}
 38 | {"label":"DRUG","pattern":[{"lower":"lorazepam"}]}
 39 | {"label":"DRUG","pattern":[{"lower":"laudanum"}]}
 40 | {"label":"DRUG","pattern":[{"lower":"paxil"}]}
 41 | {"label":"DRUG","pattern":[{"lower":"lortab"}]}
 42 | {"label":"DRUG","pattern":[{"lower":"opiods"}]}
 43 | {"label":"DRUG","pattern":[{"lower":"alprazolam"}]}
 44 | {"label":"DRUG","pattern":[{"lower":"alchol"}]}
 45 | {"label":"DRUG","pattern":[{"lower":"lexapro"}]}
 46 | {"label":"DRUG","pattern":[{"lower":"fentanyl"}]}
 47 | {"label":"DRUG","pattern":[{"lower":"effexor"}]}
 48 | {"label":"DRUG","pattern":[{"lower":"ghb"}]}
 49 | {"label":"DRUG","pattern":[{"lower":"cannabinoids"}]}
 50 | {"label":"DRUG","pattern":[{"lower":"steriods"}]}
 51 | {"label":"DRUG","pattern":[{"lower":"barbiturate"}]}
 52 | {"label":"DRUG","pattern":[{"lower":"bupropion"}]}
 53 | {"label":"DRUG","pattern":[{"lower":"kava"}]}
 54 | {"label":"DRUG","pattern":[{"lower":"khat"}]}
 55 | {"label":"DRUG","pattern":[{"lower":"flexeril"}]}
 56 | {"label":"DRUG","pattern":[{"lower":"oxycotin"}]}
 57 | {"label":"DRUG","pattern":[{"lower":"phencyclidine"}]}
 58 | {"label":"DRUG","pattern":[{"lower":"methodone"}]}
 59 | {"label":"DRUG","pattern":[{"lower":"paracetamol"}]}
 60 | {"label":"DRUG","pattern":[{"lower":"ibuprofen"}]}
 61 | {"label":"DRUG","pattern":[{"lower":"marijuanna"}]}
 62 | {"label":"DRUG","pattern":[{"lower":"zolpidem"}]}
 63 | {"label":"DRUG","pattern":[{"lower":"dilaudid"}]}
 64 | {"label":"DRUG","pattern":[{"lower":"kush"}]}
 65 | {"label":"DRUG","pattern":[{"lower":"phentermine"}]}
 66 | {"label":"DRUG","pattern":[{"lower":"dexedrine"}]}
 67 | {"label":"DRUG","pattern":[{"lower":"soma"}]}
 68 | {"label":"DRUG","pattern":[{"lower":"temazepam"}]}
 69 | {"label":"DRUG","pattern":[{"lower":"pcp"}]}
 70 | {"label":"DRUG","pattern":[{"lower":"naloxone"}]}
 71 | {"label":"DRUG","pattern":[{"lower":"ephedra"}]}
 72 | {"label":"DRUG","pattern":[{"lower":"darvocet"}]}
 73 | {"label":"DRUG","pattern":[{"lower":"sertraline"}]}
 74 | {"label":"DRUG","pattern":[{"lower":"oxys"}]}
 75 | {"label":"DRUG","pattern":[{"lower":"xanex"}]}
 76 | {"label":"DRUG","pattern":[{"lower":"promethazine"}]}
 77 | {"label":"DRUG","pattern":[{"lower":"diamorphine"}]}
 78 | {"label":"DRUG","pattern":[{"lower":"lamictal"}]}
 79 | {"label":"DRUG","pattern":[{"lower":"buspirone"}]}
 80 | {"label":"DRUG","pattern":[{"lower":"valerian"}]}
 81 | {"label":"DRUG","pattern":[{"lower":"alcahol"}]}
 82 | {"label":"DRUG","pattern":[{"lower":"neurontin"}]}
 83 | {"label":"DRUG","pattern":[{"lower":"pot"}]}
 84 | {"label":"DRUG","pattern":[{"lower":"percoset"}]}
 85 | {"label":"DRUG","pattern":[{"lower":"divinorum"}]}
 86 | {"label":"DRUG","pattern":[{"lower":"vicoden"}]}
 87 | {"label":"DRUG","pattern":[{"lower":"coccaine"}]}
 88 | {"label":"DRUG","pattern":[{"lower":"diflucan"}]}
 89 | {"label":"DRUG","pattern":[{"lower":"lortabs"}]}
 90 | {"label":"DRUG","pattern":[{"lower":"zopiclone"}]}
 91 | {"label":"DRUG","pattern":[{"lower":"diethylamide"}]}
 92 | {"label":"DRUG","pattern":[{"lower":"datura"}]}
 93 | {"label":"DRUG","pattern":[{"lower":"imitrex"}]}
 94 | {"label":"DRUG","pattern":[{"lower":"naproxen"}]}
 95 | {"label":"DRUG","pattern":[{"lower":"zyprexa"}]}
 96 | {"label":"DRUG","pattern":[{"lower":"ssris"}]}
 97 | {"label":"DRUG","pattern":[{"lower":"modafinil"}]}
 98 | {"label":"DRUG","pattern":[{"lower":"zanax"}]}
 99 | {"label":"DRUG","pattern":[{"lower":"oxycotton"}]}
100 | {"label":"DRUG","pattern":[{"lower":"scopolamine"}]}
101 | {"label":"DRUG","pattern":[{"lower":"baclofen"}]}
102 | {"label":"DRUG","pattern":[{"lower":"gabapentin"}]}
103 | {"label":"DRUG","pattern":[{"lower":"disulfiram"}]}
104 | {"label":"DRUG","pattern":[{"lower":"canabis"}]}
105 | {"label":"DRUG","pattern":[{"lower":"vicadin"}]}
106 | {"label":"DRUG","pattern":[{"lower":"poppers"}]}
107 | {"label":"DRUG","pattern":[{"lower":"nortriptyline"}]}
108 | {"label":"DRUG","pattern":[{"lower":"welbutrin"}]}
109 | {"label":"DRUG","pattern":[{"lower":"benedryl"}]}
110 | {"label":"DRUG","pattern":[{"lower":"nicotene"}]}
111 | {"label":"DRUG","pattern":[{"lower":"oxazepam"}]}
112 | {"label":"DRUG","pattern":[{"lower":"celebrex"}]}
113 | {"label":"DRUG","pattern":[{"lower":"caffein"}]}
114 | {"label":"DRUG","pattern":[{"lower":"azithromycin"}]}
115 | {"label":"DRUG","pattern":[{"lower":"advil"}]}
116 | {"label":"DRUG","pattern":[{"lower":"verapamil"}]}
117 | {"label":"DRUG","pattern":[{"lower":"sildenafil"}]}
118 | {"label":"DRUG","pattern":[{"lower":"sativa"}]}


--------------------------------------------------------------------------------
/src/recipes/example-patterns/patterns_insults-INSULT.jsonl:
--------------------------------------------------------------------------------
 1 | {"label":"INSULT","pattern":[{"lower":"prick"}]}
 2 | {"label":"INSULT","pattern":[{"lower":"arse"}]}
 3 | {"label":"INSULT","pattern":[{"lower":"fucker"}]}
 4 | {"label":"INSULT","pattern":[{"lower":"assholes"}]}
 5 | {"label":"INSULT","pattern":[{"lower":"moron"}]}
 6 | {"label":"INSULT","pattern":[{"lower":"bitch"}]}
 7 | {"label":"INSULT","pattern":[{"lower":"wanker"}]}
 8 | {"label":"INSULT","pattern":[{"lower":"bastard"}]}
 9 | {"label":"INSULT","pattern":[{"lower":"faggot"}]}
10 | {"label":"INSULT","pattern":[{"lower":"pussy"}]}
11 | {"label":"INSULT","pattern":[{"lower":"dumbass"}]}
12 | {"label":"INSULT","pattern":[{"lower":"jerk"}]}
13 | {"label":"INSULT","pattern":[{"lower":"pricks"}]}
14 | {"label":"INSULT","pattern":[{"lower":"fuckers"}]}
15 | {"label":"INSULT","pattern":[{"lower":"shithead"}]}
16 | {"label":"INSULT","pattern":[{"lower":"jerks"}]}
17 | {"label":"INSULT","pattern":[{"lower":"scumbag"}]}
18 | {"label":"INSULT","pattern":[{"lower":"asshat"}]}
19 | {"label":"INSULT","pattern":[{"lower":"slut"}]}
20 | {"label":"INSULT","pattern":[{"lower":"motherfucker"}]}
21 | {"label":"INSULT","pattern":[{"lower":"twats"}]}
22 | {"label":"INSULT","pattern":[{"lower":"nigger"}]}
23 | {"label":"INSULT","pattern":[{"lower":"jackass"}]}
24 | {"label":"INSULT","pattern":[{"lower":"skank"}]}
25 | {"label":"INSULT","pattern":[{"lower":"imbecile"}]}
26 | {"label":"INSULT","pattern":[{"lower":"wankers"}]}
27 | {"label":"INSULT","pattern":[{"lower":"morons"}]}
28 | {"label":"INSULT","pattern":[{"lower":"fool"}]}
29 | {"label":"INSULT","pattern":[{"lower":"dumbfuck"}]}
30 | {"label":"INSULT","pattern":[{"lower":"fatass"}]}
31 | {"label":"INSULT","pattern":[{"lower":"fuckface"}]}
32 | {"label":"INSULT","pattern":[{"lower":"mofo"}]}
33 | {"label":"INSULT","pattern":[{"lower":"faggots"}]}
34 | {"label":"INSULT","pattern":[{"lower":"twit"}]}
35 | {"label":"INSULT","pattern":[{"lower":"dumbshit"}]}
36 | {"label":"INSULT","pattern":[{"lower":"fagget"}]}
37 | {"label":"INSULT","pattern":[{"lower":"ahole"}]}
38 | {"label":"INSULT","pattern":[{"lower":"dimwit"}]}
39 | {"label":"INSULT","pattern":[{"lower":"cretin"}]}
40 | {"label":"INSULT","pattern":[{"lower":"bugger"}]}
41 | {"label":"INSULT","pattern":[{"lower":"douchbag"}]}
42 | {"label":"INSULT","pattern":[{"lower":"fags"}]}
43 | {"label":"INSULT","pattern":[{"lower":"douchebags"}]}
44 | {"label":"INSULT","pattern":[{"lower":"lowlife"}]}
45 | {"label":"INSULT","pattern":[{"lower":"wimp"}]}
46 | {"label":"INSULT","pattern":[{"lower":"crybaby"}]}
47 | {"label":"INSULT","pattern":[{"lower":"motherfuckers"}]}
48 | {"label":"INSULT","pattern":[{"lower":"shitbag"}]}
49 | {"label":"INSULT","pattern":[{"lower":"basterd"}]}
50 | {"label":"INSULT","pattern":[{"lower":"smartass"}]}
51 | {"label":"INSULT","pattern":[{"lower":"sissy"}]}
52 | {"label":"INSULT","pattern":[{"lower":"buffoon"}]}
53 | {"label":"INSULT","pattern":[{"lower":"hussy"}]}
54 | {"label":"INSULT","pattern":[{"lower":"nutcase"}]}
55 | {"label":"INSULT","pattern":[{"lower":"dirtbag"}]}
56 | {"label":"INSULT","pattern":[{"lower":"fuckwad"}]}
57 | {"label":"INSULT","pattern":[{"lower":"hick"}]}
58 | {"label":"INSULT","pattern":[{"lower":"bunghole"}]}
59 | {"label":"INSULT","pattern":[{"lower":"shitheads"}]}


--------------------------------------------------------------------------------
/src/recipes/image/image_manual.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.components.loaders import Images
 6 | from prodigy.util import split_string
 7 | 
 8 | 
 9 | # Recipe decorator with argument annotations: (description, argument type,
10 | # shortcut, type / converter function called on value before it's passed to
11 | # the function). Descriptions are also shown when typing --help.
12 | @prodigy.recipe('image.manual',
13 |     dataset=("The dataset to use", "positional", None, str),
14 |     source=("Path to a directory of images", "positional", None, str),
15 |     label=("One or more comma-separated labels", "option", "l", split_string),
16 |     exclude=("Names of datasets to exclude", "option", "e", split_string),
17 |     darken=("Darken image to make boxes stand out more", "flag", "D", bool)
18 | )
19 | def image_manual(dataset, source, label=None, exclude=None, darken=False):
20 |     """
21 |     Manually annotate images by drawing rectangular bounding boxes or polygon
22 |     shapes on the image.
23 |     """
24 |     # Load a stream of images from a directory and return a generator that
25 |     # yields a dictionary for each example in the data. All images are
26 |     # converted to base64-encoded data URIs.
27 |     stream = Images(source)
28 | 
29 |     return {
30 |         'view_id': 'image_manual', # Annotation interface to use
31 |         'dataset': dataset,        # Name of dataset to save annotations
32 |         'stream': stream,          # Incoming stream of examples
33 |         'exclude': exclude,        # List of dataset names to exclude
34 |         'config': {                # Additional config settings, mostly for app UI
35 |             'label': ', '.join(label) if label is not None else 'all',
36 |             'labels': label,       # Selectable label options,
37 |             'darken_image': 0.3 if darken else 0
38 |         }
39 |     }
40 | 


--------------------------------------------------------------------------------
/src/recipes/ner/ner_make_gold.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.components.loaders import JSONL
 6 | from prodigy.components.preprocess import add_tokens
 7 | from prodigy.util import split_string, set_hashes
 8 | import spacy
 9 | import copy
10 | 
11 | 
12 | def make_tasks(nlp, stream, labels):
13 |     """Add a 'spans' key to each example, with predicted entities."""
14 |     # Process the stream using spaCy's nlp.pipe, which yields doc objects.
15 |     # If as_tuples=True is set, you can pass in (text, context) tuples.
16 |     texts = ((eg['text'], eg) for eg in stream)
17 |     for doc, eg in nlp.pipe(texts, as_tuples=True):
18 |         task = copy.deepcopy(eg)
19 |         spans = []
20 |         for ent in doc.ents:
21 |             # Continue if predicted entity is not selected in labels
22 |             if labels and ent.label_ not in labels:
23 |                 continue
24 |             # Create span dict for the predicted entitiy
25 |             spans.append({
26 |                 'token_start': ent.start,
27 |                 'token_end': ent.end - 1,
28 |                 'start': ent.start_char,
29 |                 'end': ent.end_char,
30 |                 'text': ent.text,
31 |                 'label': ent.label_
32 |             })
33 |         task['spans'] = spans
34 |         # Rehash the newly created task so that hashes reflect added data
35 |         task = set_hashes(task)
36 |         yield task
37 | 
38 | 
39 | # Recipe decorator with argument annotations: (description, argument type,
40 | # shortcut, type / converter function called on value before it's passed to
41 | # the function). Descriptions are also shown when typing --help.
42 | @prodigy.recipe('ner.make-gold',
43 |     dataset=("The dataset to use", "positional", None, str),
44 |     spacy_model=("The base model", "positional", None, str),
45 |     source=("The source data as a JSONL file", "positional", None, str),
46 |     label=("One or more comma-separated labels", "options", "l", split_string),
47 |     exclude=("Names of datasets to exclude", "option", "e", split_string)
48 | )
49 | def ner_make_gold(dataset, spacy_model, source, label=None, exclude=None):
50 |     """
51 |     Create gold-standard data by correcting a model's predictions manually.
52 |     """
53 |     # Load the spaCy model
54 |     nlp = spacy.load(spacy_model)
55 | 
56 |     # Load the stream from a JSONL file and return a generator that yields a
57 |     # dictionary for each example in the data.
58 |     stream = JSONL(source)
59 | 
60 |     # Tokenize the incoming examples and add a "tokens" property to each
61 |     # example. Also handles pre-defined selected spans. Tokenization allows
62 |     # faster highlighting, because the selection can "snap" to token boundaries.
63 |     stream = add_tokens(nlp, stream)
64 | 
65 |     # Add the entities predicted by the model to the tasks in the stream
66 |     stream = make_tasks(nlp, stream, label)
67 | 
68 |     return {
69 |         'view_id': 'ner_manual', # Annotation interface to use
70 |         'dataset': dataset,      # Name of dataset to save annotations
71 |         'stream': stream,        # Incoming stream of examples
72 |         'exclude': exclude,      # List of dataset names to exclude
73 |         'config': {              # Additional config settings, mostly for app UI
74 |             'lang': nlp.lang,
75 |             'label': ', '.join(label) if label is not None else 'all',
76 |             'labels': label     # Selectable label options
77 |         }
78 |     }
79 | 


--------------------------------------------------------------------------------
/src/recipes/ner/ner_manual.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.components.loaders import JSONL
 6 | from prodigy.components.preprocess import add_tokens
 7 | from prodigy.util import split_string
 8 | import spacy
 9 | 
10 | 
11 | # Recipe decorator with argument annotations: (description, argument type,
12 | # shortcut, type / converter function called on value before it's passed to
13 | # the function). Descriptions are also shown when typing --help.
14 | @prodigy.recipe('ner.manual',
15 |     dataset=("The dataset to use", "positional", None, str),
16 |     spacy_model=("The base model", "positional", None, str),
17 |     source=("The source data as a JSONL file", "positional", None, str),
18 |     label=("One or more comma-separated labels", "option", "l", split_string),
19 |     exclude=("Names of datasets to exclude", "option", "e", split_string)
20 | )
21 | def ner_manual(dataset, spacy_model, source, label=None, exclude=None):
22 |     """
23 |     Mark spans manually by token. Requires only a tokenizer and no entity
24 |     recognizer, and doesn't do any active learning.
25 |     """
26 |     # Load the spaCy model for tokenization
27 |     nlp = spacy.load(spacy_model)
28 | 
29 |     # Load the stream from a JSONL file and return a generator that yields a
30 |     # dictionary for each example in the data.
31 |     stream = JSONL(source)
32 | 
33 |     # Tokenize the incoming examples and add a "tokens" property to each
34 |     # example. Also handles pre-defined selected spans. Tokenization allows
35 |     # faster highlighting, because the selection can "snap" to token boundaries.
36 |     stream = add_tokens(nlp, stream)
37 | 
38 |     return {
39 |         'view_id': 'ner_manual', # Annotation interface to use
40 |         'dataset': dataset,      # Name of dataset to save annotations
41 |         'stream': stream,        # Incoming stream of examples
42 |         'exclude': exclude,      # List of dataset names to exclude
43 |         'config': {              # Additional config settings, mostly for app UI
44 |             'lang': nlp.lang,
45 |             'label': ', '.join(label) if label is not None else 'all',
46 |             'labels': label      # Selectable label options
47 |         }
48 |     }
49 | 


--------------------------------------------------------------------------------
/src/recipes/ner/ner_match.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.components.loaders import JSONL
 6 | from prodigy.models.matcher import PatternMatcher
 7 | from prodigy.components.db import connect
 8 | from prodigy.util import split_string
 9 | import spacy
10 | 
11 | 
12 | # Recipe decorator with argument annotations: (description, argument type,
13 | # shortcut, type / converter function called on value before it's passed to
14 | # the function). Descriptions are also shown when typing --help.
15 | @prodigy.recipe('ner.match',
16 |     dataset=("The dataset to use", "positional", None, str),
17 |     spacy_model=("The base model", "positional", None, str),
18 |     source=("The source data as a JSONL file", "positional", None, str),
19 |     patterns=("Optional match patterns", "option", "p", str),
20 |     exclude=("Names of datasets to exclude", "option", "e", split_string),
21 |     resume=("Resume from existing dataset and update matcher accordingly", "flag", "R", bool)
22 | )
23 | def ner_match(dataset, spacy_model, source, patterns=None, exclude=None,
24 |               resume=False):
25 |     """
26 |     Suggest phrases that match a given patterns file, and mark whether they
27 |     are examples of the entity you're interested in. The patterns file can
28 |     include exact strings or token patterns for use with spaCy's `Matcher`.
29 |     """
30 |     # Load the spaCy model
31 |     nlp = spacy.load(spacy_model)
32 | 
33 |     # Initialize the pattern matcher and load in the JSONL patterns
34 |     matcher = PatternMatcher(nlp).from_disk(patterns)
35 | 
36 |     if resume:
37 |         # Connect to the database using the settings from prodigy.json
38 |         DB = connect()
39 |         if dataset and dataset in DB:
40 |             # Get the existing annotations and update the matcher
41 |             existing = DB.get_dataset(dataset)
42 |             matcher.update(existing)
43 | 
44 |     # Load the stream from a JSONL file and return a generator that yields a
45 |     # dictionary for each example in the data.
46 |     stream = JSONL(source)
47 | 
48 |     # Apply the matcher to the stream, which returns (score, example) tuples.
49 |     # Filter out the scores to only yield the examples for annotations.
50 |     stream = (eg for score, eg in matcher(stream))
51 | 
52 |     return {
53 |         'view_id': 'ner',       # Annotation interface to use
54 |         'dataset': dataset,     # Name of dataset to save annotations
55 |         'stream': stream,       # Incoming stream of examples
56 |         'exclude': exclude,     # List of dataset names to exclude
57 |         'config': {             # Additional config settings, mostly for app UI
58 |             'lang': nlp.lang
59 |         }
60 |     }
61 | 


--------------------------------------------------------------------------------
/src/recipes/ner/ner_silver_to_gold.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.models.ner import EntityRecognizer
 6 | from prodigy.components.preprocess import add_tokens
 7 | from prodigy.components.db import connect
 8 | from prodigy.util import split_string
 9 | import spacy
10 | 
11 | 
12 | # Recipe decorator with argument annotations: (description, argument type,
13 | # shortcut, type / converter function called on value before it's passed to
14 | # the function). Descriptions are also shown when typing --help.
15 | @prodigy.recipe('ner.silver-to-gold',
16 |     silver_dataset=("Existing dataset with binary annotations", "positional", None, str),
17 |     gold_dataset=("Name of dataset to save new annotations", "positional", None, str),
18 |     spacy_model=("The base model", "positional", None, str),
19 |     label=("One or more comma-separated labels", "option", "l", split_string)
20 | )
21 | def ner_silver_to_gold(silver_dataset, gold_dataset, spacy_model, label=[]):
22 |     """
23 |     Take an existing "silver" dataset with binary accept/reject annotations,
24 |     merge the annotations to find the best possible analysis given the
25 |     constraints defined in the annotations, and manually edit it to create
26 |     a perfect and complete "gold" dataset.
27 |     """
28 |     # Connect to the database using the settings from prodigy.json, check
29 |     # that the silver dataset exists and load it
30 |     DB = connect()
31 |     if silver_dataset not in DB:
32 |         raise ValueError("Can't find dataset '{}'.".format(silver_dataset))
33 |     silver_data = DB.get_dataset(silver_dataset)
34 | 
35 |     # Load the spaCy model
36 |     nlp = spacy.load(spacy_model)
37 |     if not label:
38 |         # Get the labels from the model by looking at the available moves, e.g.
39 |         # B-PERSON, I-PERSON, L-PERSON, U-PERSON
40 |         ner = nlp.get_pipe('ner')
41 |         moves = ner.move_names
42 |         label = [move.split('-')[1] for move in moves if move[0] in ('B', 'I', 'L', 'U')]
43 |         label = sorted(set(label))
44 | 
45 |     # Initialize Prodigy's entity recognizer model, which uses beam search to
46 |     # find all possible analyses and outputs (score, example) tuples
47 |     model = EntityRecognizer(nlp, label=label)
48 | 
49 |     # Merge all annotations and find the best possible analyses
50 |     stream = model.make_best(silver_data)
51 | 
52 |     # Tokenize the incoming examples and add a "tokens" property to each
53 |     # example. Also handles pre-defined selected spans. Tokenization allows
54 |     # faster highlighting, because the selection can "snap" to token boundaries.
55 |     stream = add_tokens(nlp, stream)
56 | 
57 |     return {
58 |         'view_id': 'ner_manual', # Annotation interface to use
59 |         'dataset': gold_dataset, # Name of dataset to save annotations
60 |         'stream': stream,        # Incoming stream of examples
61 |         'config': {              # Additional config settings, mostly for app UI
62 |             'lang': nlp.lang,
63 |             'labels': label     # Selectable label options
64 |         }
65 |     }
66 | 


--------------------------------------------------------------------------------
/src/recipes/ner/ner_teach.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.components.loaders import JSONL
 6 | from prodigy.models.ner import EntityRecognizer
 7 | from prodigy.models.matcher import PatternMatcher
 8 | from prodigy.components.preprocess import split_sentences
 9 | from prodigy.components.sorters import prefer_uncertain
10 | from prodigy.util import combine_models, split_string
11 | import spacy
12 | 
13 | 
14 | # Recipe decorator with argument annotations: (description, argument type,
15 | # shortcut, type / converter function called on value before it's passed to
16 | # the function). Descriptions are also shown when typing --help.
17 | @prodigy.recipe('ner.teach',
18 |     dataset=("The dataset to use", "positional", None, str),
19 |     spacy_model=("The base model", "positional", None, str),
20 |     source=("The source data as a JSONL file", "positional", None, str),
21 |     label=("One or more comma-separated labels", "option", "l", split_string),
22 |     patterns=("Optional match patterns", "option", "p", str),
23 |     exclude=("Names of datasets to exclude", "option", "e", split_string),
24 |     unsegmented=("Don't split sentences", "flag", "U", bool)
25 | )
26 | def ner_teach(dataset, spacy_model, source=None, label=None, patterns=None,
27 |               exclude=None, unsegmented=False):
28 |     """
29 |     Collect the best possible training data for a named entity recognition
30 |     model with the model in the loop. Based on your annotations, Prodigy will
31 |     decide which questions to ask next.
32 |     """
33 |     # Load the stream from a JSONL file and return a generator that yields a
34 |     # dictionary for each example in the data.
35 |     stream = JSONL(source)
36 | 
37 |     # Load the spaCy model
38 |     nlp = spacy.load(spacy_model)
39 | 
40 |     # Initialize Prodigy's entity recognizer model, which uses beam search to
41 |     # find all possible analyses and outputs (score, example) tuples
42 |     model = EntityRecognizer(nlp, label=label)
43 | 
44 |     if patterns is None:
45 |         # No patterns are used, so just use the NER model to suggest examples
46 |         # and only use the model's update method as the update callback
47 |         predict = model
48 |         update = model.update
49 |     else:
50 |         # Initialize the pattern matcher and load in the JSONL patterns
51 |         matcher = PatternMatcher(nlp).from_disk(patterns)
52 |         # Combine the NER model and the matcher and interleave their
53 |         # suggestions and update both at the same time
54 |         predict, update = combine_models(model, matcher)
55 | 
56 |     if not unsegmented:
57 |         # Use spaCy to split text into sentences
58 |         stream = split_sentences(nlp, stream)
59 | 
60 |     # Use the prefer_uncertain sorter to focus on suggestions that the model
61 |     # is most uncertain about (i.e. with a score closest to 0.5). The model
62 |     # yields (score, example) tuples and the sorter yields just the example
63 |     stream = prefer_uncertain(predict(stream))
64 | 
65 |     return {
66 |         'view_id': 'ner',       # Annotation interface to use
67 |         'dataset': dataset,     # Name of dataset to save annotations
68 |         'stream': stream,       # Incoming stream of examples
69 |         'update': update,       # Update callback, called with batch of answers
70 |         'exclude': exclude,     # List of dataset names to exclude
71 |         'config': {             # Additional config settings, mostly for app UI
72 |             'lang': nlp.lang,
73 |             'label': ', '.join(label) if label is not None else 'all'
74 |         }
75 |     }
76 | 


--------------------------------------------------------------------------------
/src/recipes/other/choice.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.components.loaders import JSONL
 6 | from prodigy.util import split_string
 7 | 
 8 | 
 9 | def add_options(stream, options):
10 |     """Helper function to add options to every task in a stream."""
11 |     options = [{'id': option, 'text': option} for option in options]
12 |     for task in stream:
13 |         task['options'] = options
14 |         yield task
15 | 
16 | 
17 | # Recipe decorator with argument annotations: (description, argument type,
18 | # shortcut, type / converter function called on value before it's passed to
19 | # the function). Descriptions are also shown when typing --help.
20 | @prodigy.recipe('choice',
21 |     dataset=("The dataset to use", "positional", None, str),
22 |     source=("The source data as a JSONL file", "positional", None, str),
23 |     options=("One or more comma-separated options", "option", "o", split_string),
24 |     multiple=("Allow multiple choice", "flag", "M", bool)
25 | )
26 | def choice(dataset, source=None, options=None, multiple=False):
27 |     """
28 |     Annotate data with multiple-choice options. The annotated examples will
29 |     have an additional property `"accept": []` mapping to the ID(s) of the
30 |     selected option(s).
31 |     """
32 |     # Load the stream from a JSONL file and return a generator that yields a
33 |     # dictionary for each example in the data.
34 |     stream = JSONL(source)
35 | 
36 |     # Add the options to all examples in the stream
37 |     stream = add_options(stream, options)
38 | 
39 |     return {
40 |         'view_id': 'choice',    # Annotation interface to use
41 |         'dataset': dataset,     # Name of dataset to save annotations
42 |         'stream': stream,       # Incoming stream of examples
43 |         'config': {             # Additional config settings
44 |             # Allow multiple choice if flag is set
45 |             'choice_style': 'multiple' if multiple else 'single',
46 |             # Automatically accept and "lock in" selected answers if only
47 |             # single choice is allowed
48 |             'choice_auto_accept': False if multiple else True
49 |         }
50 |     }
51 | 


--------------------------------------------------------------------------------
/src/recipes/other/mark.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.components.loaders import JSONL
 6 | from prodigy.util import split_string
 7 | from collections import Counter
 8 | 
 9 | 
10 | # Recipe decorator with argument annotations: (description, argument type,
11 | # shortcut, type / converter function called on value before it's passed to
12 | # the function). Descriptions are also shown when typing --help.
13 | @prodigy.recipe('mark',
14 |     dataset=("The dataset to use", "positional", None, str),
15 |     source=("The source data as a JSONL file", "positional", None, str),
16 |     view_id=("ID of annotation interface", "option", "o", str),
17 |     exclude=("Names of datasets to exclude", "option", "e", split_string)
18 | )
19 | def mark(dataset, source, view_id, exclude=None):
20 |     """
21 |     Click through pre-prepared examples, with no model in the loop.
22 |     """
23 |     counts = Counter()
24 | 
25 |     # Load the stream from a JSONL file and return a generator that yields a
26 |     # dictionary for each example in the data.
27 |     stream = JSONL(source)
28 | 
29 |     def on_load(controller):
30 |         # Check if current dataset is available in database. The on_load
31 |         # callback receives the controller as an argument, which exposes the
32 |         # database via controller.db
33 |         if dataset in controller.db:
34 |             examples = controller.db.get_dataset(dataset)
35 |             for eg in examples:
36 |                 # Update counts with existing answers
37 |                 counts[eg['answer']] += 1
38 | 
39 |     def receive_answers(answers):
40 |         for eg in answers:
41 |             # Update counts with new answers
42 |             counts[eg['answer']] += 1
43 | 
44 |     def on_exit(controller):
45 |         # Output the total annotation counts
46 |         print('Accept:', counts['accept'])
47 |         print('Reject:', counts['reject'])
48 |         print('Ignore:', counts['ignore'])
49 |         print('Total: ', sum(counts.values()))
50 | 
51 |     return {
52 |         'view_id': view_id,         # Annotation interface to use
53 |         'dataset': dataset,         # Name of dataset to save annotations
54 |         'stream': stream,           # Incoming stream of examples
55 |         'update': receive_answers,  # Update callback, called with answers
56 |         'on_load': on_load,         # Called on first load
57 |         'on_exit': on_exit          # Called when Prodigy server is stopped
58 |     }
59 | 


--------------------------------------------------------------------------------
/src/recipes/terms/terms_teach.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.components.db import connect
 6 | from prodigy.components.sorters import Probability
 7 | from prodigy.util import split_string, set_hashes
 8 | import spacy
 9 | from spacy.tokens import Doc
10 | 
11 | 
12 | # Recipe decorator with argument annotations: (description, argument type,
13 | # shortcut, type / converter function called on value before it's passed to
14 | # the function). Descriptions are also shown when typing --help.
15 | @prodigy.recipe('terms.teach',
16 |     dataset=("The dataset to use", "positional", None, str),
17 |     vectors=("Loadable spaCy model with word vectors", "positional", None, str),
18 |     seeds=("One or more comma-separated seed terms", "option", "o", split_string)
19 | )
20 | def terms_teach(dataset, vectors, seeds):
21 |     """
22 |     Bootstrap a terminology list with word vectors and seeds terms. Prodigy
23 |     will suggest similar terms based on the word vectors, and update the
24 |     target vector accordingly.
25 |     """
26 |     # Connect to the database using the settings from prodigy.json and add the
27 |     # seed terms to the dataset
28 |     DB = connect()
29 |     if dataset and dataset in DB:
30 |         seed_tasks = [set_hashes({'text': s, 'answer': 'accept'}) for s in seeds]
31 |         DB.add_examples(seed_tasks, datasets=[dataset])
32 | 
33 |     # Load the spaCy model with vectors
34 |     nlp = spacy.load(vectors)
35 | 
36 |     # Create two Doc objects for the accepted and rejected terms
37 |     accept_doc = Doc(nlp.vocab, words=seeds)
38 |     reject_doc = Doc(nlp.vocab, words=[])
39 |     score = 0
40 | 
41 |     def predict(term):
42 |         """Score a term given the current accept_doc and reject_doc."""
43 |         if len(accept_doc) == 0 and len(reject_doc) == 0:
44 |             return 0.5
45 |         # Use spaCy's .similarity() method to compare the term to the
46 |         # accepted and rejected Doc
47 |         accept_score = max(term.similarity(accept_doc), 0.0)
48 |         reject_score = max(term.similarity(reject_doc), 0.0)
49 |         score = accept_score / (accept_score + reject_score + 0.2)
50 |         return max(score, 0.0)
51 | 
52 |     def update(answers):
53 |         # Called whenever Prodigy receives new annotations
54 |         nonlocal accept_doc, reject_doc, score
55 |         accept_words = [t.text for t in accept_doc]
56 |         reject_words = [t.text for t in reject_doc]
57 |         for answer in answers:
58 |             # Increase or decrease score depending on answer and update
59 |             # list of accepted and rejected terms
60 |             if answer['answer'] == 'accept':
61 |                 score += 1
62 |                 accept_words.append(answer['text'])
63 |             elif answer['answer'] == 'reject':
64 |                 score -= 1
65 |                 reject_words.append(answer['text'])
66 |         # Update the target documents in place
67 |         accept_doc = Doc(nlp.vocab, words=accept_words)
68 |         reject_doc = Doc(nlp.vocab, words=reject_words)
69 | 
70 |     def score_stream(stream):
71 |         # Get all lexemes in the vocab and score them
72 |         lexemes = [lex for lex in stream if lex.is_alpha and lex.is_lower]
73 |         while True:
74 |             seen = set(w.orth for w in accept_doc)
75 |             seen.update(set(w.orth for w in reject_doc))
76 |             lexemes = [w for w in lexemes if w.orth not in seen]
77 |             by_score = [(predict(lex), lex) for lex in lexemes]
78 |             by_score.sort(reverse=True)
79 |             for _, term in by_score:
80 |                 score = predict(term)
81 |                 # Return (score, example) tuples for the scored terms
82 |                 yield score, {'text': term.text, 'meta': {'score': score}}
83 | 
84 |     # Sort the scored vocab by probability and return examples
85 |     stream = Probability(score_stream(nlp.vocab))
86 | 
87 |     return {
88 |         'view_id': 'text',          # Annotation interface to use
89 |         'dataset': dataset,         # Name of dataset to save annotations
90 |         'stream': stream,           # Incoming stream of examples
91 |         'update': update,           # Update callback, called with answers
92 |     }
93 | 


--------------------------------------------------------------------------------
/src/recipes/tests.py:
--------------------------------------------------------------------------------
  1 | # coding: utf8
  2 | from __future__ import unicode_literals
  3 | 
  4 | import pytest
  5 | import tempfile
  6 | from contextlib import contextmanager
  7 | from prodigy.components.db import connect
  8 | from prodigy.util import write_jsonl, INPUT_HASH_ATTR, TASK_HASH_ATTR
  9 | from prodigy.models.ner import merge_spans
 10 | 
 11 | from ner.ner_teach import ner_teach
 12 | from ner.ner_match import ner_match
 13 | from ner.ner_manual import ner_manual
 14 | from ner.ner_make_gold import ner_make_gold
 15 | from ner.ner_silver_to_gold import ner_silver_to_gold
 16 | from textcat.textcat_teach import textcat_teach
 17 | from textcat.textcat_custom_model import textcat_custom_model
 18 | from terms.terms_teach import terms_teach
 19 | from image.image_manual import image_manual
 20 | from other.mark import mark
 21 | from other.choice import choice
 22 | 
 23 | 
 24 | @pytest.fixture()
 25 | def dataset():
 26 |     return False
 27 | 
 28 | 
 29 | @pytest.fixture
 30 | def spacy_model():
 31 |     return 'en_core_web_sm'
 32 | 
 33 | 
 34 | @pytest.fixture
 35 | def vectors():
 36 |     return 'en_core_web_md'
 37 | 
 38 | 
 39 | @pytest.fixture
 40 | def labels():
 41 |     return ['PERSON', 'ORG']
 42 | 
 43 | 
 44 | @pytest.fixture()
 45 | def source():
 46 |     texts = ['This is a text about David Bowie', 'Apple makes iPhones']
 47 |     examples = [{'text': text} for text in texts]
 48 |     _, tmp_file = tempfile.mkstemp()
 49 |     write_jsonl(tmp_file, examples)
 50 |     return tmp_file
 51 | 
 52 | 
 53 | @pytest.fixture()
 54 | def patterns():
 55 |     examples = [{'label': 'PERSON', 'pattern': 'David Bowie'},
 56 |                 {'label': 'ORG', 'pattern': [{'lower': 'apple'}]}]
 57 |     _, tmp_file = tempfile.mkstemp()
 58 |     write_jsonl(tmp_file, examples)
 59 |     return tmp_file
 60 | 
 61 | 
 62 | @contextmanager
 63 | def tmp_dataset(name, examples=[]):
 64 |     DB = connect()
 65 |     DB.add_dataset(name)
 66 |     DB.add_examples(examples, datasets=[name])
 67 |     yield examples
 68 |     DB.drop_dataset(name)
 69 | 
 70 | 
 71 | def test_ner_teach(dataset, spacy_model, source, labels, patterns):
 72 |     recipe = ner_teach(dataset, spacy_model, source, labels, patterns)
 73 |     stream = list(recipe['stream'])
 74 |     assert recipe['view_id'] == 'ner'
 75 |     assert recipe['dataset'] == dataset
 76 |     assert len(stream) == 5
 77 |     assert 'spans' in stream[0]
 78 |     assert 'tokens' in stream[0]
 79 |     assert 'meta' in stream[0]
 80 |     assert 'score' in stream[0]['meta']
 81 | 
 82 | 
 83 | def test_ner_match(dataset, spacy_model, source, patterns):
 84 |     recipe = ner_match(dataset, spacy_model, source, patterns)
 85 |     stream = list(recipe['stream'])
 86 |     assert recipe['view_id'] == 'ner'
 87 |     assert recipe['dataset'] == dataset
 88 |     assert len(stream) == 2
 89 |     assert 'spans' in stream[0]
 90 |     assert len(stream[0]['spans']) == 1
 91 |     assert stream[0]['spans'][0]['label'] == 'PERSON'
 92 |     assert 'spans' in stream[1]
 93 |     assert len(stream[1]['spans']) == 1
 94 |     assert stream[1]['spans'][0]['label'] == 'ORG'
 95 | 
 96 | 
 97 | def test_ner_manual(dataset, spacy_model, source, labels):
 98 |     recipe = ner_manual(dataset, spacy_model, source, labels)
 99 |     stream = list(recipe['stream'])
100 |     assert recipe['view_id'] == 'ner_manual'
101 |     assert recipe['dataset'] == dataset
102 |     assert len(stream) == 2
103 |     assert 'tokens' in stream[0]
104 |     assert 'tokens' in stream[1]
105 | 
106 | 
107 | def test_ner_make_gold(dataset, spacy_model, source, labels):
108 |     recipe = ner_make_gold(dataset, spacy_model, source, labels)
109 |     stream = list(recipe['stream'])
110 |     assert recipe['view_id'] == 'ner_manual'
111 |     assert recipe['dataset'] == dataset
112 |     assert len(stream) == 2
113 |     assert 'spans' in stream[0]
114 |     assert 'tokens' in stream[0]
115 | 
116 | 
117 | def test_ner_silver_to_gold(dataset, spacy_model):
118 |     silver_dataset = '__test_ner_silver_to_gold__'
119 |     silver_examples = [
120 |         {
121 |             INPUT_HASH_ATTR: 1,
122 |             TASK_HASH_ATTR: 11,
123 |             'text': 'Hello world',
124 |             'answer': 'accept',
125 |             'spans': [{'start': 0, 'end': 5, 'label': 'PERSON'}]
126 |         },
127 |         {
128 |             INPUT_HASH_ATTR: 1,
129 |             TASK_HASH_ATTR: 12,
130 |             'text': 'Hello world',
131 |             'answer': 'reject',
132 |             'spans': [{'start': 6, 'end': 11, 'label': 'PERSON'}]
133 |         },
134 |         {
135 |             INPUT_HASH_ATTR: 2,
136 |             TASK_HASH_ATTR: 21,
137 |             'text': 'This is a test',
138 |             'answer': 'reject',
139 |             'spans': [{'start': 5, 'end': 7, 'label': 'ORG'}]
140 |         }
141 |     ]
142 |     with tmp_dataset(silver_dataset, silver_examples):
143 |         recipe = ner_silver_to_gold(silver_dataset, dataset, spacy_model)
144 |         stream = list(recipe['stream'])
145 |     assert recipe['view_id'] == 'ner_manual'
146 |     assert recipe['dataset'] == dataset
147 |     assert len(stream) == 2
148 |     assert stream[0]['text'] == 'Hello world'
149 |     assert 'tokens' in stream[0]
150 |     assert stream[1]['text'] == 'This is a test'
151 |     assert 'tokens' in stream[1]
152 | 
153 | 
154 | def test_textcat_teach(dataset, spacy_model, source, labels, patterns):
155 |     recipe = textcat_teach(dataset, spacy_model, source, labels, patterns)
156 |     stream = list(recipe['stream'])
157 |     assert recipe['view_id'] == 'classification'
158 |     assert recipe['dataset'] == dataset
159 |     assert len(stream) >= 2
160 |     assert 'label' in stream[0]
161 |     assert 'meta' in stream[0]
162 |     assert 'score' in stream[0]['meta']
163 | 
164 | 
165 | def test_textcat_custom_model(dataset, source, labels):
166 |     recipe = textcat_custom_model(dataset, source, labels)
167 |     stream = list(recipe['stream'])
168 |     assert recipe['view_id'] == 'classification'
169 |     assert recipe['dataset'] == dataset
170 |     assert len(stream) >= 1
171 |     assert 'label' in stream[0]
172 | 
173 | 
174 | def test_terms_teach(dataset, vectors):
175 |     seeds = ['cat', 'dog', 'mouse']
176 |     recipe = terms_teach(dataset, vectors, seeds)
177 |     assert recipe['view_id'] == 'text'
178 |     assert recipe['dataset'] == dataset
179 | 
180 | 
181 | def test_image_manual(dataset):
182 |     img_dir = tempfile.mkdtemp()
183 |     img1 = tempfile.NamedTemporaryFile(dir=img_dir, prefix='1', suffix='.jpg')
184 |     img2 = tempfile.NamedTemporaryFile(dir=img_dir, prefix='2', suffix='.png')
185 |     no_img = tempfile.NamedTemporaryFile(dir=img_dir, prefix='3', suffix='.txt')
186 |     recipe = image_manual(dataset, img_dir, ['PERSON', 'DOG', 'CAT'])
187 |     stream = list(recipe['stream'])
188 |     assert recipe['view_id'] == 'image_manual'
189 |     assert recipe['dataset'] == dataset
190 |     assert len(stream) == 2
191 | 
192 | 
193 | def test_mark(dataset, source):
194 |     view_id = 'text'
195 |     recipe = mark(dataset, source, view_id)
196 |     stream = list(recipe['stream'])
197 |     assert recipe['view_id'] == view_id
198 |     assert recipe['dataset'] == dataset
199 |     assert len(stream) == 2
200 |     assert hasattr(recipe['update'], '__call__')
201 |     assert hasattr(recipe['on_load'], '__call__')
202 |     assert hasattr(recipe['on_exit'], '__call__')
203 | 
204 | 
205 | def test_choice(dataset, source):
206 |     options = ['OPTION_A', 'OPTION_B', 'OPTION_C']
207 |     recipe = choice(dataset, source, options)
208 |     stream = list(recipe['stream'])
209 |     assert recipe['view_id'] == 'choice'
210 |     assert recipe['dataset'] == dataset
211 |     assert len(stream) == 2
212 |     assert 'options' in stream[0]
213 |     assert len(stream[0]['options']) == 3
214 |     assert stream[0]['options'][0]['id'] == 'OPTION_A'
215 |     assert recipe['config']['choice_style'] == 'single'
216 |     assert recipe['config']['choice_auto_accept']
217 | 


--------------------------------------------------------------------------------
/src/recipes/textcat/textcat_custom_model.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.components.loaders import JSONL
 6 | from prodigy.components.sorters import prefer_uncertain
 7 | from prodigy.util import split_string
 8 | import random
 9 | 
10 | 
11 | class DummyModel(object):
12 |     # This is a dummy model to help illustrate how to use Prodigy with a model
13 |     # in the loop. It currently "predicts" random numbers – but you can swap
14 |     # it out for any model of your choice, for example a text classification
15 |     # model implementation using PyTorch, TensorFlow or scikit-learn.
16 | 
17 |     def __init__(self, labels=None):
18 |         # The model can keep arbitrary state – let's use a simple random float
19 |         # to represent the current weights
20 |         self.weights = random.random()
21 |         self.labels = labels
22 | 
23 |     def __call__(self, stream):
24 |         for eg in stream:
25 |             # Score the example with respect to the current weights and
26 |             # assign a label
27 |             eg['label'] = random.choice(self.labels)
28 |             score = (random.random() + self.weights) / 2
29 |             yield (score, eg)
30 | 
31 |     def update(self, answers):
32 |         # Update the model weights with the new answers. This method receives
33 |         # the examples with an added "answer" key that either maps to "accept",
34 |         # "reject" or "ignore".
35 |         self.weights = random.random()
36 | 
37 | 
38 | # Recipe decorator with argument annotations: (description, argument type,
39 | # shortcut, type / converter function called on value before it's passed to
40 | # the function). Descriptions are also shown when typing --help.
41 | @prodigy.recipe('textcat.custom-model',
42 |     dataset=("The dataset to use", "positional", None, str),
43 |     source=("The source data as a JSONL file", "positional", None, str),
44 |     label=("One or more comma-separated labels", "option", "l", split_string)
45 | )
46 | def textcat_custom_model(dataset, source, label=[]):
47 |     """
48 |     Use active learning-powered text classification with a custom model. To
49 |     demonstrate how it works, this demo recipe uses a simple dummy model that
50 |     "precits" random scores. But you can swap it out for any model of your
51 |     choice, for example a text classification model implementation using
52 |     PyTorch, TensorFlow or scikit-learn.
53 |     """
54 |     # Load the stream from a JSONL file and return a generator that yields a
55 |     # dictionary for each example in the data.
56 |     stream = JSONL(source)
57 | 
58 |     # Load the dummy model
59 |     model = DummyModel(labels=label)
60 | 
61 |     # Use the prefer_uncertain sorter to focus on suggestions that the model
62 |     # is most uncertain about (i.e. with a score closest to 0.5). The model
63 |     # yields (score, example) tuples and the sorter yields just the example
64 |     stream = prefer_uncertain(model(stream))
65 | 
66 |     # The update method is called every time Prodigy receives new answers from
67 |     # the web app. It can be used to update the model in the loop.
68 |     update = model.update
69 | 
70 |     return {
71 |         'view_id': 'classification', # Annotation interface to use
72 |         'dataset': dataset,          # Name of dataset to save annotations
73 |         'stream': stream,            # Incoming stream of examples
74 |         'update': update,            # Update callback, called with batch of answers
75 |         'config': {                  # Additional config settings, mostly for app UI
76 |             'label': ', '.join(label)
77 |         }
78 |     }
79 | 


--------------------------------------------------------------------------------
/src/recipes/textcat/textcat_teach.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | from __future__ import unicode_literals
 3 | 
 4 | import prodigy
 5 | from prodigy.components.loaders import JSONL
 6 | from prodigy.models.textcat import TextClassifier
 7 | from prodigy.models.matcher import PatternMatcher
 8 | from prodigy.components.sorters import prefer_uncertain
 9 | from prodigy.util import combine_models, split_string
10 | import spacy
11 | 
12 | 
13 | # Recipe decorator with argument annotations: (description, argument type,
14 | # shortcut, type / converter function called on value before it's passed to
15 | # the function). Descriptions are also shown when typing --help.
16 | @prodigy.recipe('textcat.teach',
17 |     dataset=("The dataset to use", "positional", None, str),
18 |     spacy_model=("The base model", "positional", None, str),
19 |     source=("The source data as a JSONL file", "positional", None, str),
20 |     label=("One or more comma-separated labels", "option", "l", split_string),
21 |     patterns=("Optional match patterns", "option", "p", str),
22 |     exclude=("Names of datasets to exclude", "option", "e", split_string),
23 |     long_text=("Enable long-text classification mode", "flag", "L", bool)
24 | )
25 | def textcat_teach(dataset, spacy_model, source, label=None, patterns=None,
26 |                   exclude=None, long_text=False):
27 |     """
28 |     Collect the best possible training data for a text classification model
29 |     with the model in the loop. Based on your annotations, Prodigy will decide
30 |     which questions to ask next.
31 |     """
32 |     # Load the stream from a JSONL file and return a generator that yields a
33 |     # dictionary for each example in the data.
34 |     stream = JSONL(source)
35 | 
36 |     # Load the spaCy model
37 |     nlp = spacy.load(spacy_model)
38 | 
39 |     # Initialize Prodigy's text classifier model, which outputs
40 |     # (score, example) tuples
41 |     model = TextClassifier(nlp, label, long_text=long_text)
42 | 
43 |     if patterns is None:
44 |         # No patterns are used, so just use the model to suggest examples
45 |         # and only use the model's update method as the update callback
46 |         predict = model
47 |         update = model.update
48 |     else:
49 |         # Initialize the pattern matcher and load in the JSONL patterns.
50 |         # Set the matcher to not label the highlighted spans, only the text.
51 |         matcher = PatternMatcher(nlp, prior_correct=5., prior_incorrect=5.,
52 |                                  label_span=False, label_task=True)
53 |         matcher = matcher.from_disk(patterns)
54 |         # Combine the NER model and the matcher and interleave their
55 |         # suggestions and update both at the same time
56 |         predict, update = combine_models(model, matcher)
57 | 
58 |     # Use the prefer_uncertain sorter to focus on suggestions that the model
59 |     # is most uncertain about (i.e. with a score closest to 0.5). The model
60 |     # yields (score, example) tuples and the sorter yields just the example
61 |     stream = prefer_uncertain(predict(stream))
62 | 
63 |     return {
64 |         'view_id': 'classification', # Annotation interface to use
65 |         'dataset': dataset,          # Name of dataset to save annotations
66 |         'stream': stream,            # Incoming stream of examples
67 |         'update': update,            # Update callback, called with batch of answers
68 |         'exclude': exclude,          # List of dataset names to exclude
69 |         'config': {                  # Additional config settings, mostly for app UI
70 |             'lang': nlp.lang,
71 |             'label': ', '.join(label) if label is not None else 'n/a'
72 |         }
73 |     }
74 | 


--------------------------------------------------------------------------------
/src/static/fonts/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/fonts/lato-bold.woff


--------------------------------------------------------------------------------
/src/static/fonts/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/fonts/lato-bold.woff2


--------------------------------------------------------------------------------
/src/static/fonts/lato-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/fonts/lato-regular.woff


--------------------------------------------------------------------------------
/src/static/fonts/lato-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/fonts/lato-regular.woff2


--------------------------------------------------------------------------------
/src/static/fonts/robotocondensed-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/fonts/robotocondensed-bold.woff


--------------------------------------------------------------------------------
/src/static/fonts/robotocondensed-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/fonts/robotocondensed-bold.woff2


--------------------------------------------------------------------------------
/src/static/fonts/robotocondensed-bolditalic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/fonts/robotocondensed-bolditalic.woff


--------------------------------------------------------------------------------
/src/static/fonts/robotocondensed-bolditalic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/fonts/robotocondensed-bolditalic.woff2


--------------------------------------------------------------------------------
/src/static/fonts/sharetechmono-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/fonts/sharetechmono-regular.woff


--------------------------------------------------------------------------------
/src/static/fonts/sharetechmono-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/fonts/sharetechmono-regular.woff2


--------------------------------------------------------------------------------
/src/static/img/botonA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/img/botonA.png


--------------------------------------------------------------------------------
/src/static/img/botonS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/img/botonS.png


--------------------------------------------------------------------------------
/src/static/img/botonX.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/img/botonX.png


--------------------------------------------------------------------------------
/src/static/img/interfaz1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/img/interfaz1.png


--------------------------------------------------------------------------------
/src/static/img/interfaz2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bastiancy/prodigy-multi-annotator/69fbe523dac6b7d7abc72e55932568e228ac3b5c/src/static/img/interfaz2.png


--------------------------------------------------------------------------------
/src/templates/home.html:
--------------------------------------------------------------------------------
  1 | {% extends 'layout.html' %}
  2 | 
  3 | {% block content %}
  4 | <div class="container-fluid">
  5 |     <div class="row">
  6 |         <main role="main" class="ml-sm-auto col-lg-10 col-lg-offset-1 pt-3 px-4">
  7 |             <div class="d-flex justify-content-between flex-wrap flex-md-nowrap align-items-center pb-2 mb-3 border-bottom">
  8 |                 <h1 class="h2">Tareas de anotacion</h1>
  9 |                 <div class="btn-toolbar mb-2 mb-md-0">
 10 |                     <div class="btn-group mr-2">
 11 |                     </div>
 12 |                 </div>
 13 |             </div>
 14 |             <div class="my-3 p-3 bg-white rounded shadow-sm"
 15 |                 v-for="project in projects">
 16 |                 <div class="row">
 17 |                     <div class="col">
 18 |                         <p><b>[[ project.name ]]</b></p>
 19 |                         <p>[[ project.desc ]]</p>
 20 |                     </div>
 21 |                     <div class="col">
 22 |                         <div v-if="typeof stats[project.name] != 'undefined'" style="margin-bottom: 5px;">
 23 |                             <small class="text-muted">Estadisticas:</small>
 24 |                             <br>Meta: [[ stats[project.name].meta ]]
 25 |                             <br>Accept: [[ stats[project.name].accept ]]
 26 |                             <br>Reject: [[ stats[project.name].reject ]]
 27 |                             <br>Ignore: [[ stats[project.name].ignore ]]
 28 |                         </div>
 29 |                         <a v-if="current_task != project.name" @click="startJob(project)" class="btn btn-sm btn-primary" href="#" role="button">Comenzar anotacion</a>
 30 |                         <a v-if="current_task == project.name && loading_task" class="btn btn-sm btn-warning disabled" href="#" role="button"><i class="fa fa-spin fa-spinner"></i> Cargando</a>
 31 |                         <a v-if="current_task == project.name && !loading_task" @click="startJob(project)" class="btn btn-sm btn-success" href="#" role="button">Continuar anotacion</a>
 32 |                         <a href="#" @click="showInstructions(project)" class="btn btn-sm btn-outline-secondary">Ver Instrucciones</a>
 33 |                         <a href="#" @click="showComments(project)" class="btn btn-sm btn-outline-secondary">Comentar</a>
 34 |                     </div>
 35 |                 </div>
 36 |             </div>
 37 | 
 38 |         </main>
 39 |     </div>
 40 | </div>
 41 | <b-modal ref="instructionsModal" hide-footer size="lg" title="Instrucciones de la tarea">
 42 |     <div v-html="instructions"></div>
 43 | </b-modal>
 44 | <b-modal ref="commentsModal" hide-footer title="Comentar sobre esta tarea">
 45 |     <b-form-textarea id="textarea1"
 46 |                      v-model="comments.text"
 47 |                      placeholder="Ingrese sus comentarios aqui.."
 48 |                      :rows="5">
 49 |     </b-form-textarea>
 50 |     <br>
 51 |     <b-button variant="primary" @click="saveComments()">Guardar</b-button>
 52 | </b-modal>
 53 | {% endblock %}
 54 | 
 55 | {% block scripts %}
 56 | <script>
 57 |     var base_url = '{{ base_url }}';
 58 |     var app = new Vue({
 59 |         delimiters: ['[[',']]'],
 60 |         el: '#app',
 61 |         data: {
 62 |             logged_in: false,
 63 |             user: {token: '', name: ''},
 64 |             projects: [],
 65 |             stats: {},
 66 |             instructions: '',
 67 |             comments: {
 68 |                 project: null,
 69 |                 text: '',
 70 |             },
 71 |             current_task: null,
 72 |             loading_task: false,
 73 |         },
 74 |         mounted: function() {
 75 |             var that = this;
 76 |             fetch(base_url + '/api/project')
 77 |             .then(function(response) {
 78 |                 return response.json();
 79 |             })
 80 |             .then(function(data) {
 81 |                 that.projects = data;
 82 | //                for (var item of data) {
 83 | //                    that.getStatPerProjectAndUser(item);
 84 | //                }
 85 |             });
 86 |         },
 87 |         methods: {
 88 |             logout: function() {
 89 |                 this.$cookies.remove('logged_user');
 90 |                 window.location = '/login';
 91 |                 return false;
 92 |             },
 93 |             showInstructions: function(project) {
 94 |                 this.instructions = project.instructions;
 95 |                 this.$refs.instructionsModal.show();
 96 |             },
 97 |             showComments: function(project) {
 98 |                 var that = this;
 99 |                 fetch(base_url + '/api/project/' + project.name + '/comments/' + this.user.token)
100 |                 .then(function(response) {
101 |                     return response.json();
102 |                 })
103 |                 .then(function(data) {
104 |                     that.comments.project = project;
105 |                     that.comments.text = data.comments;
106 |                     that.$refs.commentsModal.show();
107 |                 });
108 |             },
109 |             saveComments: function() {
110 |                 var that = this;
111 |                 var formData = new FormData();
112 |                 formData.append('comments', this.comments.text);
113 | 
114 |                 fetch(base_url + '/api/project/' + this.comments.project.name + '/comments/' + this.user.token, {
115 |                     method: "POST",
116 |                     body: formData
117 |                 })
118 |                 .then(function(response) {
119 |                     that.$refs.commentsModal.hide();
120 |                 });
121 |             },
122 |             getStatPerProjectAndUser: function(project) {
123 |                 var that = this;
124 |                 fetch(base_url + '/api/project/' + project.name + '/stats/' + this.user.token)
125 |                 .then(function(response) {
126 |                     return response.json();
127 |                 })
128 |                 .then(function(data) {
129 |                     that.$set(that.stats, project.name, data);
130 |                 });
131 |             },
132 |             startJob: function (project) {
133 |                 var that = this;
134 |                 this.current_task = project.name;
135 |                 this.loading_task = true;
136 | 
137 |                 fetch(base_url + '/api/project/' + project.name + '/start_job/' + this.user.token)
138 |                 .then(function(response) {
139 |                     return response.json();
140 |                 })
141 |                 .then(function(data) {
142 |                     that.loading_task = false;
143 |                     window.open(base_url + data.url, '_blank');
144 |                 });
145 |             }
146 |         }
147 |     });
148 | </script>
149 | {% endblock %}
150 | 


--------------------------------------------------------------------------------
/src/templates/layout.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en" xmlns:v-bind="http://www.w3.org/1999/xhtml">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 6 |     <meta http-equiv="content-type" content="text/html; charset=UTF-8">
 7 |     <title>{% block title %}Sistema de Anotacion{% endblock %}</title>
 8 | 
 9 |     <link type="text/css" rel="stylesheet" href="https://unpkg.com/bootstrap/dist/css/bootstrap.min.css"/>
10 |     <link type="text/css" rel="stylesheet" href="https://unpkg.com/bootstrap-vue@latest/dist/bootstrap-vue.css"/>
11 |     <link type="text/css" rel="stylesheet"
12 |           href="https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css"/>
13 |     {% block styles %}{% endblock %}
14 | </head>
15 | <body>
16 | <div id="app">
17 |     <nav class="navbar navbar-dark sticky-top navbar-expand-lg bg-dark">
18 |         <a class="navbar-brand" href="#"> Multi-annotator</a>
19 |         <div class="collapse navbar-collapse" id="navbarSupportedContent">
20 |             <ul class="navbar-nav mr-auto">
21 |                 <li class="nav-item active">
22 |                     <a class="nav-link" href="#">Annotation Tasks <span class="sr-only">(current)</span></a>
23 |                 </li>
24 |                 <li class="nav-item">
25 |                     <a class="nav-link" href="#">Corpus Analisis</a>
26 |                 </li>
27 |                 <li class="nav-item">
28 |                     <a class="nav-link" href="#">Test Models</a>
29 |                 </li>
30 |                 <li class="nav-item dropdown">
31 |                     <a class="nav-link dropdown-toggle" href="#" id="navbarDropdownMenuLink" data-toggle="dropdown"
32 |                        aria-haspopup="true" aria-expanded="false">
33 |                         Settings
34 |                     </a>
35 |                     <div class="dropdown-menu" aria-labelledby="navbarDropdownMenuLink">
36 |                         <a class="dropdown-item" href="#">User management</a>
37 |                     </div>
38 |                 </li>
39 |             </ul>
40 |             <span class="navbar-text text-nowrap">
41 |                 Logged as <b>{{ current_user.username }}</b>
42 |             </span>
43 |             <div class="nav-item">
44 |                 <a class="nav-link" href="/logout">Logout</a>
45 |             </div>
46 |         </div>
47 |     </nav>
48 |     {% block content %}{% endblock %}
49 | </div>
50 | 
51 | <script src="https://unpkg.com/vue"></script>
52 | <script src="https://unpkg.com/vue-cookies@1.5.5/vue-cookies.js"></script>
53 | <script src="https://unpkg.com/babel-polyfill@latest/dist/polyfill.min.js"></script>
54 | <script src="https://unpkg.com/bootstrap-vue@latest/dist/bootstrap-vue.js"></script>
55 | 
56 | {% block scripts %}{% endblock %}
57 | </body>
58 | </html>


--------------------------------------------------------------------------------
/src/templates/login.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en" xmlns:v-bind="http://www.w3.org/1999/xhtml">
 3 | <head>
 4 |     <!-- Required meta tags -->
 5 |     <meta charset="utf-8">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 7 |     <meta http-equiv="content-type" content="text/html; charset=UTF-8">
 8 |     <title>Prodigy multi-annotator</title>
 9 | 
10 |     <!-- Required Stylesheets -->
11 |     <link type="text/css" rel="stylesheet" href="https://unpkg.com/bootstrap/dist/css/bootstrap.min.css"/>
12 |     <link type="text/css" rel="stylesheet" href="https://unpkg.com/bootstrap-vue@latest/dist/bootstrap-vue.css"/>
13 |     <link type="text/css" rel="stylesheet" href="https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css"/>
14 |     <style type="text/css">
15 |         html,
16 |         body {
17 |           height: 100%;
18 |         }
19 | 
20 |         #app {
21 |           height: 100%;
22 |           display: -ms-flexbox;
23 |           display: flex;
24 |           -ms-flex-align: center;
25 |           align-items: center;
26 |           padding-top: 40px;
27 |           padding-bottom: 40px;
28 |           background-color: #f5f5f5;
29 |         }
30 | 
31 |         .form-signin {
32 |           width: 100%;
33 |           max-width: 330px;
34 |           padding: 15px;
35 |           margin: auto;
36 |         }
37 |         .form-signin .checkbox {
38 |           font-weight: 400;
39 |         }
40 |         .form-signin .form-control {
41 |           position: relative;
42 |           box-sizing: border-box;
43 |           height: auto;
44 |           padding: 10px;
45 |           font-size: 16px;
46 |         }
47 |         .form-signin .form-control:focus {
48 |           z-index: 2;
49 |         }
50 |         .form-signin input[type="email"] {
51 |           margin-bottom: -1px;
52 |           border-bottom-right-radius: 0;
53 |           border-bottom-left-radius: 0;
54 |         }
55 |         .form-signin input[type="password"] {
56 |           margin-bottom: 10px;
57 |           border-top-left-radius: 0;
58 |           border-top-right-radius: 0;
59 |         }
60 |     </style>
61 | 
62 |     <!-- Required scripts -->
63 |     <script src="https://unpkg.com/vue"></script>
64 |     <script src="https://unpkg.com/vue-cookies@1.5.5/vue-cookies.js"></script>
65 |     <script src="https://unpkg.com/babel-polyfill@latest/dist/polyfill.min.js"></script>
66 |     <script src="https://unpkg.com/bootstrap-vue@latest/dist/bootstrap-vue.js"></script>
67 | </head>
68 | 
69 | <body>
70 | <!-- Our application root element -->
71 | <div id="app" class="text-center">
72 |     {% if error %}<p class=error><strong>Error:</strong> {{ error }}{% endif %}
73 |     <form action="{{ url_for('login') }}" method="post" class="form-signin">
74 |         <h1 class="h3 mb-3 font-weight-normal">Sistema de Anotacion</h1>
75 |         <p>Por favor inicie sesion para continuar.</p>
76 |         <br>
77 |         <dl>
78 |           <dt><label for="username" class="sr-only">Username</label>
79 |           <dd><input type="text" id="username" name="username" class="form-control" required autofocus>
80 |           <dt><label for="password" class="sr-only">Password</label>
81 |           <dd><input type="password" id="password" name="password" class="form-control" required>
82 |           <dd><input type=submit value="Login" class="btn btn-lg btn-primary btn-block">
83 |         </dl>
84 |     </form>
85 | </div>
86 | 
87 | </body>
88 | </html>


--------------------------------------------------------------------------------
/src/templates/prodigy/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |     <head>
 4 |         <base href="/prodigy/">
 5 |         <meta charset="UTF-8">
 6 |         <meta name="viewport" content="width=device-width, initial-scale=1.0">
 7 |         <meta name="apple-mobile-web-app-capable" content="yes">
 8 |         <title>Prodigy | {{ job }}</title>
 9 |         <link rel="shortcut icon" href="favicon.ico">
10 |     </head>
11 |     <body>
12 |         <div id="root"></div>
13 |         <script>
14 |             (function () {
15 |                 var prefix_url = '/prodigy/' + '{{ job }}';
16 |                 var originalFetch = fetch;
17 |                 fetch = function() {
18 |                     arguments[0] = prefix_url + arguments[0];
19 |                     return originalFetch.apply(this, arguments).then(function(data) {
20 |                         return data;
21 |                     });
22 |                 };
23 |             })();
24 |         </script>
25 |         <script src="bundle.js"></script>
26 |     </body>
27 | </html>
28 | 


--------------------------------------------------------------------------------
/src/templates/task_list.html:
--------------------------------------------------------------------------------
  1 | {% extends 'layout.html' %}
  2 | 
  3 | {% block content %}
  4 | <div class="container-fluid">
  5 |     <div class="row">
  6 |         <main role="main" class="col-md-10 col-md-offset-1 pt-3 px-4">
  7 |             <div class="d-flex justify-content-between flex-wrap flex-md-nowrap align-items-center pb-2 mb-3 border-bottom">
  8 |                 <h1 class="h2">Available tasks</h1>
  9 |                 <div class="btn-toolbar mb-2 mb-md-0">
 10 |                     <div class="btn-group mr-2">
 11 |                     </div>
 12 |                 </div>
 13 |             </div>
 14 |             {% for project in projects %}
 15 |             <div class="my-3 p-3 bg-white rounded shadow-sm">
 16 |                 <div class="row">
 17 |                     <div class="col">
 18 |                         <p><b>{{ project.name }}</b></p>
 19 |                         <p>{{ project.desc }}</p>
 20 |                     </div>
 21 |                     <div class="col">
 22 |                         {% if project.stats %}
 23 |                         <div style="margin-bottom: 5px;">
 24 |                             <small class="text-muted">Estadisticas:</small>
 25 |                             <br>Meta: {{ stats.meta }}
 26 |                             <br>Accept: {{ project.stats.accept }}
 27 |                             <br>Reject: {{ project.stats.reject }}
 28 |                             <br>Ignore: {{ project.stats.ignore }}
 29 |                         </div>
 30 |                         {% endif %}
 31 |                         <a v-if="current_task != project.name" @click="startJob(project)" class="btn btn-sm btn-primary" href="#" role="button">Comenzar anotacion</a>
 32 |                         <a v-if="current_task == project.name && loading_task" class="btn btn-sm btn-warning disabled" href="#" role="button"><i class="fa fa-spin fa-spinner"></i> Cargando</a>
 33 |                         <a v-if="current_task == project.name && !loading_task" @click="startJob(project)" class="btn btn-sm btn-success" href="#" role="button">Continuar anotacion</a>
 34 |                         <a href="#" @click="showInstructions(project)" class="btn btn-sm btn-outline-secondary">Ver Instrucciones</a>
 35 |                         <a href="#" @click="showComments(project)" class="btn btn-sm btn-outline-secondary">Comentar</a>
 36 |                     </div>
 37 |                 </div>
 38 |             </div>
 39 |             {% endfor  %}
 40 |         </main>
 41 |     </div>
 42 | </div>
 43 | {% endblock %}
 44 | 
 45 | {% block scripts %}
 46 | <script>
 47 |     var base_url = '{{ base_url }}';
 48 |     var app = new Vue({
 49 |         delimiters: ['[[',']]'],
 50 |         el: '#app',
 51 |         data: {
 52 |             current_task: null,
 53 |             loading_task: false
 54 |         },
 55 |         mounted: function() {
 56 |             var that = this;
 57 |             fetch(base_url + '/api/project')
 58 |                 .then(function(response) {
 59 |                     return response.json();
 60 |                 })
 61 |                 .then(function(data) {
 62 |                     that.projects = data;
 63 |                 });
 64 |         },
 65 |         methods: {
 66 |             showInstructions: function(project) {
 67 |                 this.instructions = project.instructions;
 68 |                 this.$refs.instructionsModal.show();
 69 |             },
 70 |             showComments: function(project) {
 71 |                 var that = this;
 72 |                 fetch(base_url + '/api/project/' + project.name + '/comments/' + this.user.token)
 73 |                 .then(function(response) {
 74 |                     return response.json();
 75 |                 })
 76 |                 .then(function(data) {
 77 |                     that.comments.project = project;
 78 |                     that.comments.text = data.comments;
 79 |                     that.$refs.commentsModal.show();
 80 |                 });
 81 |             },
 82 |             saveComments: function() {
 83 |                 var that = this;
 84 |                 var formData = new FormData();
 85 |                 formData.append('comments', this.comments.text);
 86 | 
 87 |                 fetch(base_url + '/api/project/' + this.comments.project.name + '/comments/' + this.user.token, {
 88 |                     method: "POST",
 89 |                     body: formData
 90 |                 })
 91 |                 .then(function(response) {
 92 |                     that.$refs.commentsModal.hide();
 93 |                 });
 94 |             },
 95 |             getStatPerProjectAndUser: function(project) {
 96 |                 var that = this;
 97 |                 fetch(base_url + '/api/project/' + project.name + '/stats/' + this.user.token)
 98 |                 .then(function(response) {
 99 |                     return response.json();
100 |                 })
101 |                 .then(function(data) {
102 |                     that.$set(that.stats, project.name, data);
103 |                 });
104 |             },
105 |             startJob: function (project) {
106 |                 var that = this;
107 |                 this.current_task = project.name;
108 |                 this.loading_task = true;
109 | 
110 |                 fetch(base_url + '/api/project/' + project.name + '/start_job/' + this.user.token)
111 |                 .then(function(response) {
112 |                     return response.json();
113 |                 })
114 |                 .then(function(data) {
115 |                     that.loading_task = false;
116 |                     window.open(base_url + data.url, '_blank');
117 |                 });
118 |             }
119 |         }
120 |     });
121 | </script>
122 | {% endblock %}
123 | 


--------------------------------------------------------------------------------