├── pids.sample.cfg
├── setup.cfg
├── elastic_wikidata
├── __metadata__.py
├── __init__.py
├── config.py
├── sparql_to_es.py
├── sparql_helpers.py
├── http.py
├── dump_to_es.py
└── wd_entities.py
├── queries
└── humans.rq
├── requirements_dev.txt
├── requirements.txt
├── config.sample.ini
├── .flake8
├── Pipfile
├── .pre-commit-config.yaml
├── .github
└── workflows
│ └── python-publish.yml
├── LICENSE.txt
├── setup.py
├── tests
└── test_wd_entities.py
├── CHANGELOG.md
├── .gitignore
├── examples
└── paginate query.ipynb
├── README.md
├── cli.py
└── Pipfile.lock
/pids.sample.cfg:
--------------------------------------------------------------------------------
1 | P31
2 | P279
3 | P18
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
--------------------------------------------------------------------------------
/elastic_wikidata/__metadata__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.3.0"
2 |
--------------------------------------------------------------------------------
/queries/humans.rq:
--------------------------------------------------------------------------------
1 | SELECT ?item WHERE {
2 | ?item wdt:P31 wd:Q5.
3 | }
--------------------------------------------------------------------------------
/elastic_wikidata/__init__.py:
--------------------------------------------------------------------------------
1 | from elastic_wikidata.__metadata__ import __version__
2 |
--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | pre-commit
2 | black
3 | pytest
4 | pylint
5 | flake8
6 | jupyterlab
7 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | click==7.1.2
2 | elasticsearch==7.8.1
3 | SPARQLWrapper==1.8.5
4 | tqdm>=4.48.2
5 | requests==2.24.0
--------------------------------------------------------------------------------
/config.sample.ini:
--------------------------------------------------------------------------------
1 | [ELASTIC]
2 | ELASTIC_SEARCH_CLUSTER =
3 | ELASTIC_SEARCH_USER =
4 | ELASTIC_SEARCH_PASSWORD =
5 |
6 | [HTTP]
7 | CONTACT_DETAILS =
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E203, E266, E501, W503, F403, F401, W291, E402, C901
3 | max-line-length = 79
4 | max-complexity = 18
5 | select = B,C,E,F,W,T4,B9
--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | name = "pypi"
3 | url = "https://pypi.org/simple"
4 | verify_ssl = true
5 |
6 | [dev-packages]
7 | pre-commit = "*"
8 | black = "*"
9 | pytest = "*"
10 | pylint = "*"
11 | flake8 = "*"
12 | jupyterlab = "*"
13 |
14 | [packages]
15 | elasticsearch = "*"
16 | click = "*"
17 | tqdm = "*"
18 | sparqlwrapper = "*"
19 | requests = "*"
20 |
21 | [requires]
22 | python_version = "3.7"
23 |
24 | [pipenv]
25 | allow_prereleases = true
26 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/ambv/black
3 | rev: 21.5b1
4 | hooks:
5 | - id: black
6 | language_version: python3
7 | - repo: https://gitlab.com/pycqa/flake8
8 | rev: 3.9.2
9 | hooks:
10 | - id: flake8
11 | - repo: https://github.com/pre-commit/pre-commit-hooks
12 | rev: v4.0.1
13 | hooks:
14 | - id: check-json
15 | - id: check-merge-conflict
--------------------------------------------------------------------------------
/elastic_wikidata/config.py:
--------------------------------------------------------------------------------
1 | class RuntimeConfig:
2 | def __init__(self):
3 | self.items = {}
4 |
5 | def add_item(self, item: dict):
6 | """
7 | Add an item to the runtime config
8 | """
9 |
10 | self.items.update(item)
11 |
12 | def get(self, key: str):
13 | """
14 | Get specific item from config. Returns None if key doesn't exist.
15 | """
16 |
17 | return self.items.get(key, None)
18 |
19 | def get_all(self) -> dict:
20 | """
21 | Return all items from runtime config
22 | """
23 |
24 | return self.items
25 |
26 |
27 | runtime_config = RuntimeConfig()
28 |
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Upload Python Package
5 |
6 | on:
7 | release:
8 | types: [created]
9 |
10 | jobs:
11 | deploy:
12 |
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Set up Python
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: '3.x'
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install setuptools wheel twine
25 | - name: Build and publish
26 | env:
27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 | run: |
30 | python setup.py sdist bdist_wheel
31 | twine upload dist/*
32 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2020 The Board of Trustees of the Science Museum
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | with open("README.md", "r") as fh:
4 | long_description = fh.read()
5 |
6 | setuptools.setup(
7 | name="elastic-wikidata",
8 | version="1.0.1",
9 | author="Science Museum Group",
10 | description="elastic-wikidata",
11 | long_description=long_description,
12 | long_description_content_type="text/markdown",
13 | url="https://github.com/TheScienceMuseum/elastic-wikidata",
14 | download_url="https://github.com/TheScienceMuseum/elastic-wikidata/archive/v1.0.1.tar.gz",
15 | classifiers=[
16 | "Programming Language :: Python :: 3",
17 | "License :: OSI Approved :: MIT License",
18 | "Operating System :: OS Independent",
19 | ],
20 | python_requires=">=3.6",
21 | install_requires=[
22 | "click==7.1.2",
23 | "elasticsearch==7.8.1",
24 | "SPARQLWrapper==1.8.5",
25 | "tqdm>=4.48.2",
26 | "requests==2.24.0",
27 | ],
28 | py_modules=["cli", "elastic_wikidata"],
29 | packages=["elastic_wikidata"],
30 | entry_points="""
31 | [console_scripts]
32 | ew=cli:main
33 | """,
34 | )
35 |
--------------------------------------------------------------------------------
/tests/test_wd_entities.py:
--------------------------------------------------------------------------------
1 | from elastic_wikidata import wd_entities
2 | import pytest
3 |
4 |
5 | @pytest.fixture
6 | def ge():
7 | ge = wd_entities.get_entities()
8 |
9 | return ge
10 |
11 |
12 | def test_get_all_results(ge):
13 | qids = ["Q203545", "Q706475", "Q18637243"]
14 |
15 | res = ge.get_all_results(qids, timeout=6)
16 |
17 | assert isinstance(res, list)
18 | assert len(res) == len(qids)
19 | assert [item["id"] for item in res] == qids
20 |
21 |
22 | def test_get_labels(ge):
23 | qids = ["Q203545", "Q706475", "Q18637243", "Q82340"]
24 |
25 | label_dict = ge.get_labels(qids, timeout=6)
26 |
27 | # the last QID has no english label so a blank string is returned as its value
28 | assert label_dict == {
29 | "Q18637243": "Michaela Coel",
30 | "Q203545": "Michael Gambon",
31 | "Q706475": "Steve McQueen",
32 | "Q82340": "",
33 | }
34 |
35 |
36 | def test_simplify_wbgetentities_result(ge):
37 | res = ge.get_all_results(["Q203545", "Q706475", "Q18637243"])
38 | pids = ["P31", "P21", "P735", "P734", "P1971"]
39 |
40 | res_simplified = [
41 | wd_entities.simplify_wbgetentities_result(doc, lang="en", properties=pids)
42 | for doc in res
43 | ]
44 |
45 | assert [doc["claims"]["P31"] == ["Q5"] for doc in res_simplified]
46 | assert res_simplified[1]["claims"]["P1971"][0] == "+2"
47 |
--------------------------------------------------------------------------------
/elastic_wikidata/sparql_to_es.py:
--------------------------------------------------------------------------------
1 | import re
2 | from math import ceil
3 | from itertools import islice
4 | from tqdm.auto import tqdm
5 | from elastic_wikidata.sparql_helpers import run_query, paginate_sparql_query
6 | from elastic_wikidata.http import generate_user_agent
7 |
8 |
9 | def url_to_qid(url: str) -> str:
10 | """
11 | Maps Wikidata URL of an entity to QID e.g. http://www.wikidata.org/entity/Q7187777 -> Q7187777.
12 | """
13 |
14 | return re.findall(r"(Q\d+)", url)[0]
15 |
16 |
17 | def get_entities_from_query(query, page_size=None, limit=None) -> list:
18 | """
19 | Get a list of entities from a query. Optionally:
20 | paginate the query using page_size
21 | limit the total number of entities returned using limit
22 |
23 | Returns list of entities in form (Qd+).
24 | """
25 |
26 | if page_size:
27 | pages = paginate_sparql_query(query, page_size=page_size)
28 | else:
29 | pages = [query]
30 |
31 | if limit:
32 | page_limit = ceil(limit / page_size)
33 | pages = islice(pages, page_limit)
34 |
35 | all_entities = []
36 |
37 | for query in tqdm(pages, total=(page_limit or None)):
38 | res = run_query(query)
39 | var = res["head"]["vars"][0]
40 | entities = [url_to_qid(x[var]["value"]) for x in res["results"]["bindings"]]
41 | all_entities += entities
42 |
43 | # stop when page of query returns fewer items than the page size
44 | if len(entities) < page_size:
45 | break
46 |
47 | return all_entities
48 |
--------------------------------------------------------------------------------
/elastic_wikidata/sparql_helpers.py:
--------------------------------------------------------------------------------
1 | from SPARQLWrapper import SPARQLWrapper, JSON
2 | import urllib
3 | import time
4 | from elastic_wikidata.http import generate_user_agent
5 |
6 |
7 | def run_query(query: str, endpoint_url="https://query.wikidata.org/sparql") -> dict:
8 | """
9 | Run a SPARQL query against the Wikidata endpoint. Obeys retry-after headers for sensible bulk querying.
10 |
11 | Args:
12 | query (str): SPARQL query
13 | endpoint_url (optional)
14 |
15 | Returns:
16 | query_result (dict): the JSON result of the query as a dict
17 | """
18 |
19 | user_agent = generate_user_agent()
20 |
21 | sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
22 | sparql.setQuery(query)
23 | sparql.setMethod("POST")
24 | sparql.setReturnFormat(JSON)
25 |
26 | try:
27 | return sparql.query().convert()
28 | except urllib.error.HTTPError as e:
29 | if e.code == 429:
30 | if isinstance(e.headers.get("retry-after", None), int):
31 | time.sleep(e.headers["retry-after"])
32 | else:
33 | time.sleep(10)
34 | return run_query(query, endpoint_url)
35 | raise
36 |
37 |
38 | def paginate_sparql_query(query: str, page_size: int):
39 | """
40 | Paginates a SELECT query, returning a generator which yields paginated queries.
41 | """
42 |
43 | # check query
44 | if "select" not in query.lower():
45 | raise ValueError("Must be a SELECT query")
46 |
47 | if "order by" not in query.lower():
48 | print(
49 | "WARNING: no ORDER BY logic in the SPARQL query. This could result in duplicate or missing entities."
50 | )
51 |
52 | # paginate
53 | i = 0
54 | while True:
55 | yield f"""{query}
56 | LIMIT {page_size}
57 | OFFSET {i*page_size}
58 | """
59 | i += 1
60 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes documented below.
4 |
5 | ## 1.0.1
6 | - **bug fix:** no longer silently fails when object value has unsupported data type, instead returning the entire object in dict format.
7 | ## 1.0.0
8 | - **enhancement (breaking change):** properties now passed as whitespace-separated list rather than comma-separated. They can also be passed through a config file by giving the `--properties` option a filename to a file that exists.
9 | - **stability improvements:** `elasticsearch.helpers.streaming_bulk` now used instead of `elasticsearch.helpers.parallel_bulk` due to issues with memory usage of the latter. Bulk load now retries on timeout.
10 |
11 | ## 0.3.7
12 | - **fix:** reading from JSON dump forces utf-8
13 | ## 0.3.6
14 |
15 | - **fix:** handles documents which are missing any of *labels/aliases/descriptions/claims* fields.
16 | - **enhancement:** `wd_entities.simplify_wbgetentities_result` gives the option to return the redirected QID for Wikidata pages which redirect. By default it returns the undirected QID: the same one that was passed into the function.
17 |
18 | ## 0.3.5
19 |
20 | - **fix:** `wd_entities.simplify_wbgetentities_result` can handle type *quantity*, and returns the value of *amount*.
21 |
22 | ## 0.3.4
23 |
24 | - **enhancement:** `wd_entities.get_entities` now has a `get_labels` method to get labels for a list of QIDs in a particular language using the wbgetentities API.
25 |
26 | ## 0.3.2
27 |
28 | - **enhancement:** add `labels_aliases` field for faster text search of both labels and aliases using an [Elasticsearch match query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html)
29 |
30 | ## 0.3.1
31 |
32 | - **fix:** property values without types are ignored
33 | - **enhancement:** refresh is disabled for the duration of data load by default, using `--disable_refresh` flag. This is beneficial for large datasets or low-resource machines as refreshing the search index is CPU-intensive and can cause the data load to freeze.
34 |
35 | ## 0.3.0
36 |
37 | - add changeable timeout for `wbgetentities` GET request
38 | - handle more Wikidata claims than just QIDs
39 | - generate User Agent from request in line with Wikidata guidelines
40 | - make Wikidata-related methods importable (rather than just runnable from CLI)
41 |
--------------------------------------------------------------------------------
/elastic_wikidata/http.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import sys
3 | from urllib.parse import quote
4 | from elastic_wikidata import __version__ as ew_version
5 | from elastic_wikidata.config import runtime_config
6 |
7 |
8 | def generate_user_agent():
9 | """
10 | Generates user agent string according to Wikidata User Agent Guidelines (https://meta.wikimedia.org/wiki/User-Agent_policy).
11 | Uses contact information from `runtime_config.get('user_agent_contact')`.
12 |
13 | Returns:
14 | str: user agent string
15 | """
16 | v_params = {
17 | "python": "Python/" + ".".join(str(i) for i in sys.version_info),
18 | "http_backend": "requests/" + requests.__version__,
19 | "ew": "Elastic Wikidata bot/" + ew_version,
20 | }
21 |
22 | contact_information = runtime_config.get("user_agent_contact")
23 |
24 | if contact_information is not None:
25 | contact_information = " ".join(
26 | [process_user_agent_username(i) for i in contact_information.split(" ")]
27 | )
28 | return f"{v_params['ew']} ({contact_information}) {v_params['http_backend']} {v_params['python']}"
29 | else:
30 | if runtime_config.get("cli"):
31 | print(
32 | "WARNING: please consider adding contact information through config.ini or the -contact flag to improve the User Agent header for Wikidata requests."
33 | )
34 | return f"{v_params['ew']} {v_params['http_backend']} {v_params['python']}"
35 |
36 |
37 | def process_user_agent_username(username=None):
38 | """
39 | **Credit to [pywikibot](https://www.mediawiki.org/wiki/Manual:Pywikibot)**
40 |
41 | Reduce username to a representation permitted in HTTP headers.
42 |
43 | To achieve that, this function:
44 | 1) replaces spaces (' ') with '_'
45 | 2) encodes the username as 'utf-8' and if the username is not ASCII
46 | 3) URL encodes the username if it is not ASCII, or contains '%'
47 | """
48 | if not username:
49 | return ""
50 |
51 | username = username.replace(" ", "_") # Avoid spaces or %20.
52 | try:
53 | username.encode("ascii") # just test, but not actually use it
54 | except UnicodeEncodeError:
55 | username = quote(username.encode("utf-8"))
56 | else:
57 | # % is legal in the default $wgLegalTitleChars
58 | # This is so that ops know the real pywikibot will not
59 | # allow a useragent in the username to allow through a hand-coded
60 | # percent-encoded value.
61 | if "%" in username:
62 | username = quote(username)
63 | return username
64 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # repo-specific
2 | config.ini
3 | experiments/
4 |
5 | # node
6 | node_modules/
7 |
8 | # Byte-compiled / optimized / DLL files
9 | __pycache__/
10 | *.py[cod]
11 | *$py.class
12 |
13 | # C extensions
14 | *.so
15 |
16 | # Distribution / packaging
17 | .Python
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | wheels/
30 | share/python-wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 | MANIFEST
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Editors
47 | .idea
48 | .vscode
49 |
50 | # Unit test / coverage reports
51 | htmlcov/
52 | .tox/
53 | .nox/
54 | .coverage
55 | .coverage.*
56 | .cache
57 | nosetests.xml
58 | coverage.xml
59 | *.cover
60 | *.py,cover
61 | .hypothesis/
62 | .pytest_cache/
63 | cover/
64 |
65 | # Translations
66 | *.mo
67 | *.pot
68 |
69 | # Django stuff:
70 | *.log
71 | local_settings.py
72 | db.sqlite3
73 | db.sqlite3-journal
74 |
75 | # Flask stuff:
76 | instance/
77 | .webassets-cache
78 |
79 | # Scrapy stuff:
80 | .scrapy
81 |
82 | # Sphinx documentation
83 | docs/_build/
84 |
85 | # PyBuilder
86 | .pybuilder/
87 | target/
88 |
89 | # Jupyter Notebook
90 | .ipynb_checkpoints
91 |
92 | # IPython
93 | profile_default/
94 | ipython_config.py
95 |
96 | # pyenv
97 | # For a library or package, you might want to ignore these files since the code is
98 | # intended to run in multiple environments; otherwise, check them in:
99 | .python-version
100 |
101 | # pipenv
102 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
103 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
104 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
105 | # install all needed dependencies.
106 | #Pipfile.lock
107 |
108 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
109 | __pypackages__/
110 |
111 | # Celery stuff
112 | celerybeat-schedule
113 | celerybeat.pid
114 |
115 | # SageMath parsed files
116 | *.sage.py
117 |
118 | # Environments
119 | .env
120 | .venv
121 | env/
122 | venv/
123 | ENV/
124 | env.bak/
125 | venv.bak/
126 |
127 | # Spyder project settings
128 | .spyderproject
129 | .spyproject
130 |
131 | # Rope project settings
132 | .ropeproject
133 |
134 | # mkdocs documentation
135 | /site
136 |
137 | # mypy
138 | .mypy_cache/
139 | .dmypy.json
140 | dmypy.json
141 |
142 | # Pyre type checker
143 | .pyre/
144 |
145 | # pytype static type analyzer
146 | .pytype/
147 |
148 | # Cython debug symbols
149 | cython_debug/
150 |
151 |
152 | ### Linux ###
153 | *~
154 |
155 | # temporary files which can be created if a process still has a handle open of a deleted file
156 | .fuse_hidden*
157 |
158 | # KDE directory preferences
159 | .directory
160 |
161 | # Linux trash folder which might appear on any partition or disk
162 | .Trash-*
163 |
164 | # .nfs files are created when an open file is removed but is still being accessed
165 | .nfs*
166 |
167 | ### OSX ###
168 | # General
169 | .DS_Store
170 | .AppleDouble
171 | .LSOverride
172 |
173 | # Icon must end with two \r
174 | Icon
175 |
176 | # Thumbnails
177 | ._*
178 |
179 | # Files that might appear in the root of a volume
180 | .DocumentRevisions-V100
181 | .fseventsd
182 | .Spotlight-V100
183 | .TemporaryItems
184 | .Trashes
185 | .VolumeIcon.icns
186 | .com.apple.timemachine.donotpresent
187 |
188 | # Directories potentially created on remote AFP share
189 | .AppleDB
190 | .AppleDesktop
191 | Network Trash Folder
192 | Temporary Items
193 | .apdisk
194 |
195 | ### Windows ###
196 | # Windows thumbnail cache files
197 | Thumbs.db
198 | ehthumbs.db
199 | ehthumbs_vista.db
200 |
201 | # Dump file
202 | *.stackdump
203 |
204 | # Folder config file
205 | [Dd]esktop.ini
206 |
207 | # Recycle Bin used on file shares
208 | $RECYCLE.BIN/
209 |
210 | # Windows Installer files
211 | *.cab
212 | *.msi
213 | *.msix
214 | *.msm
215 | *.msp
216 |
217 | # Windows shortcuts
218 | *.lnk
219 |
220 | # End of https://www.toptal.com/developers/gitignore/api/osx,windows,linux
--------------------------------------------------------------------------------
/examples/paginate query.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Automatically Paginating a SPARQL query"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "name": "stdout",
17 | "output_type": "stream",
18 | "text": [
19 | "Requirement already satisfied: sparqlwrapper in /Users/kalyan/.pyenv/versions/3.7.3/lib/python3.7/site-packages (1.8.5)\n",
20 | "Requirement already satisfied: rdflib>=4.0 in /Users/kalyan/.pyenv/versions/3.7.3/lib/python3.7/site-packages (from sparqlwrapper) (5.0.0)\n",
21 | "Requirement already satisfied: six in /Users/kalyan/.pyenv/versions/3.7.3/lib/python3.7/site-packages (from rdflib>=4.0->sparqlwrapper) (1.12.0)\n",
22 | "Requirement already satisfied: isodate in /Users/kalyan/.pyenv/versions/3.7.3/lib/python3.7/site-packages (from rdflib>=4.0->sparqlwrapper) (0.6.0)\n",
23 | "Requirement already satisfied: pyparsing in /Users/kalyan/.pyenv/versions/3.7.3/lib/python3.7/site-packages (from rdflib>=4.0->sparqlwrapper) (2.4.2)\n",
24 | "\u001b[33mYou are using pip version 19.0.3, however version 20.2 is available.\n",
25 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
26 | ]
27 | }
28 | ],
29 | "source": [
30 | "!pip install sparqlwrapper"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 2,
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "import sys\n",
40 | "sys.path.append(\"..\")\n",
41 | "\n",
42 | "from itertools import islice\n",
43 | "from elastic_wikidata import sparql_helpers, sparql_to_es\n",
44 | "\n",
45 | "from tqdm.auto import tqdm"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {},
51 | "source": [
52 | "Let's write a query to get all humans. There are over 8 million humans on Wikidata so we'll get a timeout if we try to run the entire query at once. "
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 3,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "query = \"\"\"\n",
62 | "SELECT ?human WHERE {{\n",
63 | " ?human wdt:P31 wd:Q5. \n",
64 | "}}\n",
65 | "\"\"\""
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "We can use `elastic_wikidata` to paginate the query instead."
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 4,
78 | "metadata": {},
79 | "outputs": [
80 | {
81 | "name": "stdout",
82 | "output_type": "stream",
83 | "text": [
84 | "WARNING: no ORDER BY logic in the SPARQL query. This could result in incorrect pages.\n"
85 | ]
86 | },
87 | {
88 | "data": {
89 | "text/plain": [
90 | "'\\nSELECT ?human WHERE {{\\n ?human wdt:P31 wd:Q5. \\n}}\\n\\n LIMIT 500\\n OFFSET 0\\n '"
91 | ]
92 | },
93 | "execution_count": 4,
94 | "metadata": {},
95 | "output_type": "execute_result"
96 | }
97 | ],
98 | "source": [
99 | "pages = sparql_helpers.paginate_sparql_query(query, page_size=500)\n",
100 | "next(pages)"
101 | ]
102 | },
103 | {
104 | "cell_type": "markdown",
105 | "metadata": {},
106 | "source": [
107 | "## Running paginated queries\n",
108 | "\n",
109 | "Putting this all together, we can use `sparql_to_es.get_entities_from_query` to:\n",
110 | "1. paginate a query to fetch entities\n",
111 | "2. run each page against the Wikidata Query Service\n",
112 | "3. combine the results"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": 6,
118 | "metadata": {},
119 | "outputs": [
120 | {
121 | "data": {
122 | "application/vnd.jupyter.widget-view+json": {
123 | "model_id": "10e58e8a57434553918a3b57a67df597",
124 | "version_major": 2,
125 | "version_minor": 0
126 | },
127 | "text/plain": [
128 | "HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
129 | ]
130 | },
131 | "metadata": {},
132 | "output_type": "display_data"
133 | },
134 | {
135 | "name": "stdout",
136 | "output_type": "stream",
137 | "text": [
138 | "WARNING: no ORDER BY logic in the SPARQL query. This could result in incorrect pages.\n",
139 | "\n",
140 | "1000 entities returned\n",
141 | "1000 unique entities returned\n"
142 | ]
143 | }
144 | ],
145 | "source": [
146 | "entities = sparql_to_es.get_entities_from_query(query, page_size=100, limit=1000)\n",
147 | "\n",
148 | "print(f\"{len(entities)} entities returned\")\n",
149 | "print(f\"{len(set(entities))} unique entities returned\")"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {},
156 | "outputs": [],
157 | "source": []
158 | }
159 | ],
160 | "metadata": {
161 | "kernelspec": {
162 | "display_name": "Python 3",
163 | "language": "python",
164 | "name": "python3"
165 | },
166 | "language_info": {
167 | "codemirror_mode": {
168 | "name": "ipython",
169 | "version": 3
170 | },
171 | "file_extension": ".py",
172 | "mimetype": "text/x-python",
173 | "name": "python",
174 | "nbconvert_exporter": "python",
175 | "pygments_lexer": "ipython3",
176 | "version": "3.7.3"
177 | }
178 | },
179 | "nbformat": 4,
180 | "nbformat_minor": 4
181 | }
182 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Elastic Wikidata
2 |
3 | Simple CLI tools to load a subset of Wikidata into Elasticsearch. Part of the [Heritage Connector](https://www.sciencemuseumgroup.org.uk/project/heritage-connector/) project.
4 |
5 | - [Why?](#why)
6 | - [Installation](#installation)
7 | - [Setup](#setup)
8 | - [Usage](#usage)
9 | - [Loading from Wikidata dump (.ndjson)](#loading-from-wikidata-dump-ndjson)
10 | - [Loading from SPARQL query](#loading-from-sparql-query)
11 | - [Temporary side effects](#temporary-side-effects)
12 |
13 |
14 |
15 | 
16 | 
17 | 
18 |
19 | ## Why?
20 |
21 | Running text search programmatically on Wikidata means using the MediaWiki query API, either [directly](https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=John_Snow&srlimit=10&srprop=size&formatversion=2) or [through the Wikidata query service/SPARQL](https://query.wikidata.org/#SELECT%20%2a%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Amwapi%20%7B%0A%20%20%20%20%20%20bd%3AserviceParam%20wikibase%3Aendpoint%20%22en.wikipedia.org%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20wikibase%3Aapi%20%22Search%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20mwapi%3Asrsearch%20%22John%20Snow%22.%0A%20%20%20%20%20%20%3Ftitle%20wikibase%3AapiOutput%20mwapi%3Atitle.%0A%20%20%7D%0A%20%20%20hint%3APrior%20hint%3ArunLast%20%22true%22.%0A%20%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22en%22.%20%7D%0A%7D%20LIMIT%2020).
22 |
23 | There are a couple of reasons you may not want to do this when running searches programmatically:
24 |
25 | - *time constraints/large volumes:* APIs are rate-limited, and you can only do one text search per SPARQL query
26 | - *better search:* using Elasticsearch allows for more flexible and powerful text search capabilities.* We're using our own Elasticsearch instance to do nearest neighbour search on embeddings, too.
27 |
28 | ** [CirrusSearch](https://www.mediawiki.org/wiki/Extension:CirrusSearch) is a Wikidata extension that enables direct search on Wikidata using Elasticsearch, if you require powerful search and are happy with the rate limit.*
29 |
30 | ## Installation
31 |
32 | from pypi: `pip install elastic_wikidata`
33 |
34 | from repo:
35 |
36 | 1. Download
37 | 2. `cd` into root
38 | 3. `pip install -e .`
39 |
40 | ## Setup
41 |
42 | elastic-wikidata needs the Elasticsearch credentials `ELASTICSEARCH_CLUSTER`, `ELASTICSEARCH_USER` and `ELASTICSEARCH_PASSWORD` to connect to your ES instance. You can set these in one of three ways:
43 |
44 | 1. Using environment variables: `export ELASTICSEARCH_CLUSTER=https://...` etc
45 | 2. Using config.ini: pass the `-c` parameter followed by a path to an ini file containing your Elasticsearch credentials. [Example here](./config.sample.ini).
46 | 3. Pass each variable in at runtime using options `--cluster/-c`, `--user/-u`, `--password/-p`.
47 |
48 | ## Usage
49 |
50 | Once installed the package is accessible through the keyword `ew`. A call is structured as follows:
51 |
52 | ``` bash
53 | ew
54 | ```
55 |
56 | *Task* is either:
57 |
58 | - `dump`: [load data from Wikidata JSON dump](#loading-from-wikidata-dump-ndjson), or
59 | - `query`: [load data from SPARQL query](#loading-from-sparql-query).
60 |
61 | A full list of options can be found with `ew --help`, but the following are likely to be useful:
62 |
63 | - `--index/-i`: the index name to push to. If not specified at runtime, elastic-wikidata will prompt for it
64 | - `--limit/-l`: limit the number of records pushed into ES. You might want to use this for a small trial run before importing the whole thing.
65 | - `--properties/-prop`: a whitespace-separated list of properties to include in the ES index e.g. *'p31 p21'*, or the path to a text file containing newline-separated properties e.g. [this one](./pids.sample.cfg).
66 | - `--language/-lang`: [Wikimedia language code](https://www.wikidata.org/wiki/Help:Wikimedia_language_codes/lists/all). Only one supported at this time.
67 |
68 | ### Loading from Wikidata dump (.ndjson)
69 |
70 | ``` bash
71 | ew dump -p
72 | ```
73 |
74 | This is useful if you want to create one or more large subsets of Wikidata in different Elasticsearch indexes (millions of entities).
75 |
76 | **Time estimate:** Loading all ~8million humans into an AWS Elasticsearch index took me about 20 minutes. Creating the humans subset using `wikibase-dump-filter` took about 3 hours using its [instructions for parallelising](https://github.com/maxlath/wikibase-dump-filter/blob/master/docs/parallelize.md).
77 |
78 | 1. Download the complete Wikidata dump (latest-all.json.gz from [here](https://dumps.wikimedia.org/wikidatawiki/entities/)). This is a *large* file: 87GB on 07/2020.
79 | 2. Use [maxlath](https://github.com/maxlath)'s [wikibase-dump-filter](https://github.com/maxlath/wikibase-dump-filter/) to create a subset of the Wikidata dump. **Note: don't use the `--simplify` flag when running the dump. elastic-wikidata will take care of simplification.**
80 | 3. Run `ew dump` with flag `-p` pointing to the JSON subset. You might want to test it with a limit (using the `-l` flag) first.
81 |
82 | ### Loading from SPARQL query
83 |
84 | ``` bash
85 | ew query -p
86 | ```
87 |
88 | For smaller collections of Wikidata entities it might be easier to populate an Elasticsearch index directly from a SPARQL query rather than downloading the whole Wikidata dump to take a subset. `ew query` [automatically paginates SPARQL queries](examples/paginate%20query.ipynb) so that a heavy query like *'return all the humans'* doesn't result in a timeout error.
89 |
90 | **Time estimate:** Loading 10,000 entities into Wikidata into an AWS hosted Elasticsearch index took me about 6 minutes.
91 |
92 | 1. Write a SPARQL query and save it to a text/.rq file. See [example](queries/humans.rq).
93 | 2. Run `ew query` with the `-p` option pointing to the file containing the SPARQL query. Optionally add a `--page_size` for the SPARQL query.
94 |
95 | ### Temporary side effects
96 |
97 | As of version *0.3.1* refreshing the search index is disabled for the duration of load by default, as [recommended by ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-indexing-speed.html#_unset_or_increase_the_refresh_interval). Refresh is re-enabled to the default interval of `1s` after load is complete. To disable this behaviour use the flag `--no_disable_refresh/-ndr`.
98 |
--------------------------------------------------------------------------------
/cli.py:
--------------------------------------------------------------------------------
1 | from elastic_wikidata import dump_to_es, sparql_to_es
2 | from elastic_wikidata.config import runtime_config
3 | import os
4 | import click
5 | from configparser import ConfigParser
6 |
7 |
8 | @click.command()
9 | @click.argument("source", nargs=1)
10 | @click.option("--path", "-p", type=click.Path(exists=True))
11 | @click.option(
12 | "--cluster", envvar="ELASTICSEARCH_CLUSTER", help="Elasticsearch cluster URL"
13 | )
14 | @click.option("--user", envvar="ELASTICSEARCH_USER", help="Elasticsearch username")
15 | @click.option(
16 | "--password", envvar="ELASTICSEARCH_PASSWORD", help="Elasticsearch password"
17 | )
18 | @click.option(
19 | "--agent_contact",
20 | "-contact",
21 | envvar="WIKIMEDIA_AGENT_CONTACT",
22 | help="(optional) Contact details to add to the User Agent header for Wikidata requests",
23 | default=None,
24 | )
25 | @click.option(
26 | "--config",
27 | "-c",
28 | type=click.Path(exists=True),
29 | help="Path to .ini file containing Elasticsearch credentials",
30 | )
31 | @click.option(
32 | "--index",
33 | "-i",
34 | prompt="Elasticsearch index",
35 | help="Name of Elasticsearch index to load into",
36 | )
37 | @click.option(
38 | "--limit", "-l", type=int, help="(optional) Limit the number of entities loaded in"
39 | )
40 | @click.option("--page_size", type=int, help="Page size for SPARQL query.", default=100)
41 | @click.option(
42 | "--language", "-lang", type=str, help="Language (Wikimedia language code)"
43 | )
44 | @click.option(
45 | "--properties",
46 | "-prop",
47 | type=str,
48 | help="One or more Wikidata property e.g. 'p31' or 'p31 p21'. A path to a file containing newline-separated properties can also be passed. Not case-sensitive",
49 | )
50 | @click.option(
51 | "--timeout",
52 | "-t",
53 | type=int,
54 | help="Timeout for Wikidata requests (seconds)",
55 | default=6,
56 | )
57 | @click.option(
58 | "--disable_refresh/--no_disable_refresh",
59 | "-dr/-ndr",
60 | help="Whether to disable Elasticsearch's (CPU-intensive) refresh during data load. Defaults to True. Recommended to leave this on for low-resource machines or large datasets.",
61 | default=True,
62 | )
63 | def main(
64 | source,
65 | path,
66 | cluster,
67 | user,
68 | password,
69 | agent_contact,
70 | config,
71 | index,
72 | limit,
73 | page_size,
74 | language,
75 | properties,
76 | timeout,
77 | disable_refresh,
78 | ):
79 |
80 | # get elasticsearch credentials
81 | if config:
82 | # read .ini file
83 | parser = ConfigParser()
84 | parser.optionxform = str # make option names case sensitive
85 | parser.read(config)
86 | es_credentials = parser._sections["ELASTIC"]
87 | check_es_credentials(es_credentials)
88 |
89 | runtime_config.add_item(
90 | {
91 | "user_agent_contact": parser._sections["HTTP"].get(
92 | "CONTACT_DETAILS", None
93 | )
94 | }
95 | )
96 | else:
97 | # check environment variables/flags
98 | es_credentials = {}
99 |
100 | if cluster:
101 | es_credentials["ELASTICSEARCH_CLUSTER"] = cluster
102 | if user:
103 | es_credentials["ELASTICSEARCH_USER"] = user
104 | if password:
105 | es_credentials["ELASTICSEARCH_PASSWORD"] = password
106 |
107 | check_es_credentials(es_credentials)
108 |
109 | runtime_config.add_item({"user_agent_contact": agent_contact})
110 |
111 | runtime_config.add_item({"http_timeout": timeout})
112 |
113 | # global flag for all functions that the module is being run through the CLI
114 | runtime_config.add_item({"cli": True})
115 |
116 | # set kwargs
117 | kwargs = {}
118 | if language:
119 | kwargs["lang"] = language
120 | if properties:
121 | if os.path.exists(properties):
122 | with open(properties, "r") as f:
123 | kwargs["properties"] = f.read().splitlines()
124 | else:
125 | kwargs["properties"] = properties.split()
126 |
127 | if disable_refresh:
128 | kwargs["disable_refresh_on_index"] = disable_refresh
129 |
130 | # run job
131 | if source == "dump":
132 | load_from_dump(path, es_credentials, index, limit, **kwargs)
133 | elif source == "query":
134 | load_from_sparql(path, es_credentials, index, limit, page_size, **kwargs)
135 | else:
136 | raise ValueError(f"Argument {source} must be either dump or sparql")
137 |
138 |
139 | def load_from_dump(path, es_credentials, index, limit, **kwargs):
140 | if not kwargs:
141 | kwargs = {}
142 | if limit:
143 | kwargs["doc_limit"] = limit
144 |
145 | # limit is used when dumping JSON to Elasticsearch
146 | d = dump_to_es.processDump(
147 | dump=path, es_credentials=es_credentials, index_name=index, **kwargs
148 | )
149 | d.start_elasticsearch()
150 | d.dump_to_es()
151 |
152 |
153 | def load_from_sparql(path, es_credentials, index, limit, page_size=100, **kwargs):
154 | if not kwargs:
155 | kwargs = {}
156 |
157 | with open(path, "r") as f:
158 | query = f.read()
159 |
160 | # limit is used when getting list of entities
161 | print("Getting entities from SPARQL query")
162 | entity_list = sparql_to_es.get_entities_from_query(
163 | query, page_size=100, limit=limit
164 | )
165 |
166 | print(
167 | f"Retrieving information from wbgetentities API and pushing to ES index {index}"
168 | )
169 | d = dump_to_es.processDump(
170 | dump=entity_list, es_credentials=es_credentials, index_name=index, **kwargs
171 | )
172 | d.start_elasticsearch()
173 | d.dump_to_es()
174 |
175 |
176 | def check_es_credentials(credentials: dict):
177 | credentials_present = set(credentials.keys())
178 | credentials_required = {
179 | "ELASTICSEARCH_CLUSTER",
180 | "ELASTICSEARCH_USER",
181 | "ELASTICSEARCH_PASSWORD",
182 | }
183 | missing_credentials = credentials_required - credentials_present
184 |
185 | if len(missing_credentials) > 0:
186 | raise ValueError(f"Missing Elasticsearch credentials: {missing_credentials}")
187 |
188 |
189 | if __name__ == "__main__":
190 | # main(
191 | # source='dump',
192 | # path="../wikidata/all_no_articles.ndjson",
193 | # properties="p31,p279",
194 | # config="./config.ini",
195 | # index='wikidump2',
196 | # cluster=None,
197 | # user=None,
198 | # password=None,
199 | # agent_contact=False,
200 | # limit=None,
201 | # page_size=100,
202 | # language='en',
203 | # timeout=6,
204 | # disable_refresh=True
205 | # )
206 | main()
207 |
--------------------------------------------------------------------------------
/elastic_wikidata/dump_to_es.py:
--------------------------------------------------------------------------------
1 | import json
2 | from itertools import islice
3 | from tqdm.auto import tqdm
4 | from elasticsearch import Elasticsearch
5 | from elasticsearch.helpers import streaming_bulk
6 | from typing import Union
7 | from elastic_wikidata.wd_entities import (
8 | get_entities,
9 | wiki_property_check,
10 | simplify_wbgetentities_result,
11 | )
12 |
13 |
14 | class processDump:
15 | def __init__(
16 | self, dump: Union[str, list], es_credentials: dict, index_name: str, **kwargs
17 | ):
18 | self.config = {
19 | "chunk_size": 1000,
20 | "queue_size": 8,
21 | }
22 |
23 | self.es_credentials = es_credentials
24 |
25 | if isinstance(dump, str):
26 | self.dump_path = dump
27 | self.entities = None
28 | elif isinstance(dump, list):
29 | self.entities = dump
30 | self.dump_path = None
31 | else:
32 | raise ValueError(
33 | "dump must either be path to JSON dump or Python list of entities"
34 | )
35 |
36 | self.index_name = index_name
37 |
38 | # process kwargs/set defaults
39 | self.disable_refresh_on_index = kwargs["disable_refresh_on_index"]
40 |
41 | if "doc_limit" in kwargs:
42 | self.doc_limit = kwargs["doc_limit"]
43 | else:
44 | self.doc_limit = None
45 |
46 | self.wiki_options = {}
47 |
48 | if "lang" in kwargs:
49 | self.wiki_options["lang"] = kwargs["lang"]
50 | else:
51 | self.wiki_options["lang"] = "en"
52 |
53 | if "user_agent_contact" in kwargs:
54 | self.user_agent_contact = kwargs["user_agent_contact"]
55 | else:
56 | self.user_agent_contact = None
57 |
58 | if "properties" in kwargs:
59 | if isinstance(kwargs["properties"], str) and wiki_property_check(
60 | kwargs["properties"]
61 | ):
62 | self.wiki_options["properties"] = [
63 | kwargs["properties"].upper()
64 | ] # [P31], not [p31]
65 | elif isinstance(kwargs["properties"], list):
66 | self.wiki_options["properties"] = [
67 | item.upper()
68 | for item in kwargs["properties"]
69 | if wiki_property_check(item)
70 | ]
71 | else:
72 | self.wiki_options["properties"] = ["P31"]
73 |
74 | def start_elasticsearch(self):
75 | """
76 | Creates an Elasticsearch index. If SEARCH_CLUSTER, ELASTICSEARCH_USER & ELASTICSEARCH_PASSWORD
77 | are specified in config it uses those, otherwise uses a locally running Elasticsearch instance.
78 | """
79 |
80 | if "ELASTICSEARCH_CLUSTER" in self.es_credentials:
81 | print(
82 | f"Connecting to Elasticsearch at {self.es_credentials['ELASTICSEARCH_CLUSTER']}"
83 | )
84 | self.es = Elasticsearch(
85 | [self.es_credentials["ELASTICSEARCH_CLUSTER"]],
86 | http_auth=(
87 | self.es_credentials["ELASTICSEARCH_USER"],
88 | self.es_credentials["ELASTICSEARCH_PASSWORD"],
89 | ),
90 | max_retries=100,
91 | retry_on_timeout=True,
92 | )
93 | else:
94 | # run on localhost
95 | print("Connecting to Elasticsearch on localhost")
96 | self.es = Elasticsearch(
97 | max_retries=100,
98 | retry_on_timeout=True,
99 | )
100 |
101 | mappings = {
102 | "mappings": {
103 | "properties": {
104 | "labels": {"type": "text", "copy_to": "labels_aliases"},
105 | "aliases": {"type": "text", "copy_to": "labels_aliases"},
106 | "labels_aliases": {"type": "text", "store": "true"},
107 | }
108 | }
109 | }
110 |
111 | self.es.indices.create(index=self.index_name, ignore=400, body=mappings)
112 |
113 | if self.disable_refresh_on_index:
114 | print(
115 | "Temporary disabling refresh for the index. Will reset refresh interval to the default (1s) after load is complete."
116 | )
117 | self.es.indices.put_settings({"index": {"refresh_interval": -1}})
118 |
119 | def dump_to_es(self):
120 | print("Indexing documents...")
121 | successes = 0
122 | errors = []
123 |
124 | # if dump_path, use generator that passes
125 | if self.dump_path:
126 | action_generator = self.generate_actions_from_dump()
127 | elif self.entities:
128 | action_generator = self.generate_actions_from_entities()
129 |
130 | try:
131 | for ok, action in tqdm(
132 | streaming_bulk(
133 | client=self.es,
134 | index=self.index_name,
135 | actions=action_generator,
136 | chunk_size=self.config["chunk_size"],
137 | # queue_size=self.config["queue_size"],
138 | max_retries=3,
139 | ),
140 | ):
141 | if not ok:
142 | print(action)
143 | errors.append(action)
144 | successes += ok
145 |
146 | finally:
147 | if self.disable_refresh_on_index:
148 | # reset back to default
149 | print("Refresh interval set back to default of 1s.")
150 | self.es.indices.put_settings({"index": {"refresh_interval": "1s"}})
151 |
152 | def process_doc(self, doc: dict) -> dict:
153 | """
154 | Processes a single document from the JSON dump, returning a filtered version of that document.
155 | """
156 |
157 | lang = self.wiki_options["lang"]
158 | properties = self.wiki_options["properties"]
159 |
160 | return simplify_wbgetentities_result(doc, lang, properties)
161 |
162 | def generate_actions_from_dump(self):
163 | """
164 | Generator to yield a processed document from the Wikidata JSON dump.
165 | Each line of the Wikidata JSON dump is a separate document.
166 | """
167 | with open(self.dump_path, "r", encoding="utf-8") as f:
168 | objects = (json.loads(line) for line in f)
169 |
170 | # optionally limit number that are loaded
171 | if self.doc_limit is not None:
172 | objects = islice(objects, self.doc_limit)
173 |
174 | for item in objects:
175 | doc = self.process_doc(item)
176 |
177 | yield doc
178 |
179 | def generate_actions_from_entities(self):
180 | """
181 | Generator to yield processed document from list of entities. Calls are made to
182 | wbgetentities API with page size of 50 to retrieve documents.
183 | """
184 |
185 | json_generator = get_entities.result_generator(
186 | self.entities, lang=self.wiki_options["lang"]
187 | )
188 |
189 | for page in json_generator:
190 | for item in page:
191 | yield self.process_doc(item)
192 |
--------------------------------------------------------------------------------
/elastic_wikidata/wd_entities.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from tqdm.auto import tqdm
3 | from typing import List, Union
4 | from math import ceil
5 | import re
6 | from elastic_wikidata.http import generate_user_agent
7 | from elastic_wikidata.config import runtime_config
8 |
9 |
10 | class get_entities:
11 | def __init__(self):
12 | """
13 | One instance of this class per list of qcodes. The JSON response for a list of qcodes is made to Wikidata on
14 | creation of a class instance.
15 |
16 | Args:
17 | qcodes (str/list): Wikidata qcode or list of qcodes/
18 | lang (str, optional): Defaults to 'en'.
19 | page_limit (int): page limit for Wikidata API. Usually 50, can reach 500.
20 | """
21 | self.endpoint = (
22 | "http://www.wikidata.org/w/api.php?action=wbgetentities&format=json"
23 | )
24 |
25 | self.properties = ["labels", "aliases", "claims", "descriptions"]
26 |
27 | @staticmethod
28 | def _param_join(params: List[str]) -> str:
29 | """
30 | Joins list of parameters for the URL. ['a', 'b'] -> "a%7Cb"
31 |
32 | Args:
33 | params (list): list of parameters (strings)
34 |
35 | Returns:
36 | str
37 | """
38 |
39 | return "%7C".join(params) if len(params) > 1 else params[0]
40 |
41 | @classmethod
42 | def get_all_results(
43 | self, qcodes, lang="en", page_limit=50, timeout: int = None
44 | ) -> list:
45 | """
46 | Get response through the `wbgetentities` API.
47 |
48 | Returns:
49 | list: each item is a the response for an entity
50 | """
51 |
52 | results = self().result_generator(qcodes, lang, page_limit, timeout)
53 |
54 | all_results = []
55 |
56 | print(f"Getting {len(qcodes)} wikidata documents in pages of {page_limit}")
57 |
58 | for res in tqdm(results, total=ceil(len(qcodes) / page_limit)):
59 | all_results += res
60 |
61 | return all_results
62 |
63 | @classmethod
64 | def result_generator(
65 | self, qcodes, lang="en", page_limit=50, timeout: int = None
66 | ) -> list:
67 | """
68 | Get response through the `wbgetentities` API. Yields `page_limit` entities at a time.
69 |
70 | Returns:
71 | list: each item is a the response for an entity
72 | """
73 |
74 | if isinstance(qcodes, str):
75 | qcodes = [qcodes]
76 |
77 | qcodes_paginated = [
78 | qcodes[i : i + page_limit] for i in range(0, len(qcodes), page_limit)
79 | ]
80 |
81 | headers = {"User-Agent": generate_user_agent()}
82 |
83 | if timeout is None:
84 | timeout = runtime_config.get("http_timeout")
85 |
86 | with requests.Session() as s:
87 | for page in qcodes_paginated:
88 | url = f"http://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids={self._param_join(page)}&props={self._param_join(self().properties)}&languages={lang}&languagefallback=1&formatversion=2"
89 | response = s.get(url, headers=headers, timeout=timeout).json()
90 | yield [v for _, v in response["entities"].items()]
91 |
92 | def get_labels(self, qcodes, lang="en", page_limit=50, timeout: int = None) -> dict:
93 | """
94 | Get labels from Wikidata qcodes. If the item associated with a qcode has no label, its value
95 | in the dictionary is an empty string.
96 |
97 | Returns:
98 | dict: {qid1: label1, qid2: label2, ...}
99 | """
100 |
101 | qid_label_mapping = dict()
102 | qcodes = list(set(qcodes))
103 |
104 | docs = self.get_all_results(qcodes, lang, page_limit, timeout)
105 |
106 | for doc in docs:
107 | qid_label_mapping[doc["id"]] = doc["labels"].get(lang, {}).get("value", "")
108 |
109 | return qid_label_mapping
110 |
111 |
112 | def simplify_wbgetentities_result(
113 | doc: Union[dict, List[dict]],
114 | lang: str,
115 | properties: list,
116 | use_redirected_qid: bool = False,
117 | ) -> Union[dict, List[dict]]:
118 | """
119 | Processes a single document or set of documents from the JSON result of wbgetentities, returning a simplified version of that document.
120 |
121 | Args:
122 | doc (Union[dict, List[dict]]): JSON result from Wikidata wbgetentities API
123 | lang (str): Wikimedia language code
124 | properties (list): list of Wikidata properties
125 | use_redirected_qid (bool, optional): whether to return the redirected QID value under the 'id' field instead of the original QID
126 | if there is one. Defaults to False.
127 |
128 | Returns:
129 | Union[dict, List[dict]]: dict if single record passed in; list if multiple records
130 | """
131 |
132 | # if list of dicts, run this function for each dict
133 | if isinstance(doc, list) and isinstance(doc[0], dict):
134 | return [simplify_wbgetentities_result(item, lang, properties) for item in doc]
135 |
136 | wd_type_mapping = {
137 | "wikibase-entityid": "id",
138 | "time": "time",
139 | "monolingualtext": "text",
140 | "quantity": "amount",
141 | }
142 |
143 | # check for redirected URL
144 | if "redirects" in doc:
145 | if use_redirected_qid:
146 | newdoc = {"id": doc["redirects"]["to"]}
147 | else:
148 | newdoc = {"id": doc["redirects"]["from"]}
149 |
150 | else:
151 | newdoc = {"id": doc["id"]}
152 |
153 | # add label(s)
154 | if lang in doc.get("labels", {}):
155 | newdoc["labels"] = doc["labels"][lang]["value"]
156 |
157 | # add descriptions(s)
158 | if lang in doc.get("descriptions", {}):
159 | newdoc["descriptions"] = doc["descriptions"][lang]["value"]
160 |
161 | # add aliases
162 | if (len(doc.get("aliases", {})) > 0) and (lang in doc.get("aliases", {})):
163 | newdoc["aliases"] = [i["value"] for i in doc["aliases"][lang]]
164 | else:
165 | newdoc["aliases"] = []
166 |
167 | # add claims (property values)
168 | newdoc["claims"] = {}
169 |
170 | if "claims" in doc:
171 | for p in properties:
172 | if p in doc["claims"]:
173 | claims = []
174 | for i in doc["claims"][p]:
175 | try:
176 | value_type = i["mainsnak"]["datavalue"]["type"]
177 | if value_type in wd_type_mapping.keys():
178 | # Return specific value for certain types.
179 | value_name = wd_type_mapping[value_type]
180 | claims.append(
181 | i["mainsnak"]["datavalue"]["value"][value_name]
182 | )
183 | else:
184 | # Otherwise return the whole dictionary.
185 | claims.append(i["mainsnak"]["datavalue"]["value"])
186 | except KeyError:
187 | print(
188 | f"WARNING: property {p} with datatype {value_type} failed to process. Consider forking this code and implementing support for it."
189 | )
190 |
191 | newdoc["claims"][p] = claims
192 |
193 | return newdoc
194 |
195 |
196 | def wiki_property_check(p):
197 | if len(re.findall(r"(p\d+)", p.lower())) == 1:
198 | return True
199 | else:
200 | print(f"WARNING: property {p} is not a valid Wikidata property")
201 | return False
202 |
--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
1 | {
2 | "_meta": {
3 | "hash": {
4 | "sha256": "acf1c4fff67ab549cc8414c05abe84231399324830aa057d3dbe493e19a132b6"
5 | },
6 | "pipfile-spec": 6,
7 | "requires": {
8 | "python_version": "3.7"
9 | },
10 | "sources": [
11 | {
12 | "name": "pypi",
13 | "url": "https://pypi.org/simple",
14 | "verify_ssl": true
15 | }
16 | ]
17 | },
18 | "default": {
19 | "certifi": {
20 | "hashes": [
21 | "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3",
22 | "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"
23 | ],
24 | "version": "==2020.6.20"
25 | },
26 | "chardet": {
27 | "hashes": [
28 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
29 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
30 | ],
31 | "version": "==3.0.4"
32 | },
33 | "click": {
34 | "hashes": [
35 | "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a",
36 | "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"
37 | ],
38 | "index": "pypi",
39 | "version": "==7.1.2"
40 | },
41 | "elasticsearch": {
42 | "hashes": [
43 | "sha256:2ffbd746fc7d2db08e5ede29c822483705f29c4bf43b0875c238637d5d843d44",
44 | "sha256:92b534931865a186906873f75ae0b91808ff5036b0f2b9269eb5f6dc09644b55"
45 | ],
46 | "index": "pypi",
47 | "version": "==7.8.1"
48 | },
49 | "idna": {
50 | "hashes": [
51 | "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
52 | "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
53 | ],
54 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
55 | "version": "==2.10"
56 | },
57 | "isodate": {
58 | "hashes": [
59 | "sha256:2e364a3d5759479cdb2d37cce6b9376ea504db2ff90252a2e5b7cc89cc9ff2d8",
60 | "sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81"
61 | ],
62 | "version": "==0.6.0"
63 | },
64 | "pyparsing": {
65 | "hashes": [
66 | "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
67 | "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
68 | ],
69 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
70 | "version": "==2.4.7"
71 | },
72 | "rdflib": {
73 | "hashes": [
74 | "sha256:78149dd49d385efec3b3adfbd61c87afaf1281c30d3fcaf1b323b34f603fb155",
75 | "sha256:88208ea971a87886d60ae2b1a4b2cdc263527af0454c422118d43fe64b357877"
76 | ],
77 | "version": "==5.0.0"
78 | },
79 | "requests": {
80 | "hashes": [
81 | "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b",
82 | "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898"
83 | ],
84 | "index": "pypi",
85 | "version": "==2.24.0"
86 | },
87 | "six": {
88 | "hashes": [
89 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
90 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
91 | ],
92 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
93 | "version": "==1.15.0"
94 | },
95 | "sparqlwrapper": {
96 | "hashes": [
97 | "sha256:17ec44b08b8ae2888c801066249f74fe328eec25d90203ce7eadaf82e64484c7",
98 | "sha256:357ee8a27bc910ea13d77836dbddd0b914991495b8cc1bf70676578155e962a8",
99 | "sha256:8cf6c21126ed76edc85c5c232fd6f77b9f61f8ad1db90a7147cdde2104aff145",
100 | "sha256:c7f9c9d8ebb13428771bc3b6dee54197422507dcc3dea34e30d5dcfc53478dec",
101 | "sha256:d6a66b5b8cda141660e07aeb00472db077a98d22cb588c973209c7336850fb3c"
102 | ],
103 | "index": "pypi",
104 | "version": "==1.8.5"
105 | },
106 | "tqdm": {
107 | "hashes": [
108 | "sha256:1a336d2b829be50e46b84668691e0a2719f26c97c62846298dd5ae2937e4d5cf",
109 | "sha256:564d632ea2b9cb52979f7956e093e831c28d441c11751682f84c86fc46e4fd21"
110 | ],
111 | "index": "pypi",
112 | "version": "==4.48.2"
113 | },
114 | "urllib3": {
115 | "hashes": [
116 | "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a",
117 | "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461"
118 | ],
119 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
120 | "version": "==1.25.10"
121 | }
122 | },
123 | "develop": {
124 | "appdirs": {
125 | "hashes": [
126 | "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41",
127 | "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"
128 | ],
129 | "version": "==1.4.4"
130 | },
131 | "appnope": {
132 | "hashes": [
133 | "sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0",
134 | "sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71"
135 | ],
136 | "markers": "sys_platform == 'darwin' and platform_system == 'Darwin'",
137 | "version": "==0.1.0"
138 | },
139 | "argon2-cffi": {
140 | "hashes": [
141 | "sha256:05a8ac07c7026542377e38389638a8a1e9b78f1cd8439cd7493b39f08dd75fbf",
142 | "sha256:0bf066bc049332489bb2d75f69216416329d9dc65deee127152caeb16e5ce7d5",
143 | "sha256:18dee20e25e4be86680b178b35ccfc5d495ebd5792cd00781548d50880fee5c5",
144 | "sha256:392c3c2ef91d12da510cfb6f9bae52512a4552573a9e27600bdb800e05905d2b",
145 | "sha256:57358570592c46c420300ec94f2ff3b32cbccd10d38bdc12dc6979c4a8484fbc",
146 | "sha256:6678bb047373f52bcff02db8afab0d2a77d83bde61cfecea7c5c62e2335cb203",
147 | "sha256:6ea92c980586931a816d61e4faf6c192b4abce89aa767ff6581e6ddc985ed003",
148 | "sha256:77e909cc756ef81d6abb60524d259d959bab384832f0c651ed7dcb6e5ccdbb78",
149 | "sha256:7d455c802727710e9dfa69b74ccaab04568386ca17b0ad36350b622cd34606fe",
150 | "sha256:9bee3212ba4f560af397b6d7146848c32a800652301843df06b9e8f68f0f7361",
151 | "sha256:9dfd5197852530294ecb5795c97a823839258dfd5eb9420233c7cfedec2058f2",
152 | "sha256:b160416adc0f012fb1f12588a5e6954889510f82f698e23ed4f4fa57f12a0647",
153 | "sha256:ba7209b608945b889457f949cc04c8e762bed4fe3fec88ae9a6b7765ae82e496",
154 | "sha256:cc0e028b209a5483b6846053d5fd7165f460a1f14774d79e632e75e7ae64b82b",
155 | "sha256:d8029b2d3e4b4cea770e9e5a0104dd8fa185c1724a0f01528ae4826a6d25f97d",
156 | "sha256:da7f0445b71db6d3a72462e04f36544b0de871289b0bc8a7cc87c0f5ec7079fa"
157 | ],
158 | "version": "==20.1.0"
159 | },
160 | "astroid": {
161 | "hashes": [
162 | "sha256:2f4078c2a41bf377eea06d71c9d2ba4eb8f6b1af2135bec27bbbb7d8f12bb703",
163 | "sha256:bc58d83eb610252fd8de6363e39d4f1d0619c894b0ed24603b881c02e64c7386"
164 | ],
165 | "markers": "python_version >= '3.5'",
166 | "version": "==2.4.2"
167 | },
168 | "attrs": {
169 | "hashes": [
170 | "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c",
171 | "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"
172 | ],
173 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
174 | "version": "==19.3.0"
175 | },
176 | "backcall": {
177 | "hashes": [
178 | "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e",
179 | "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"
180 | ],
181 | "version": "==0.2.0"
182 | },
183 | "black": {
184 | "hashes": [
185 | "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b",
186 | "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539"
187 | ],
188 | "index": "pypi",
189 | "version": "==19.10b0"
190 | },
191 | "bleach": {
192 | "hashes": [
193 | "sha256:2bce3d8fab545a6528c8fa5d9f9ae8ebc85a56da365c7f85180bfe96a35ef22f",
194 | "sha256:3c4c520fdb9db59ef139915a5db79f8b51bc2a7257ea0389f30c846883430a4b"
195 | ],
196 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
197 | "version": "==3.1.5"
198 | },
199 | "certifi": {
200 | "hashes": [
201 | "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3",
202 | "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41"
203 | ],
204 | "version": "==2020.6.20"
205 | },
206 | "cffi": {
207 | "hashes": [
208 | "sha256:267adcf6e68d77ba154334a3e4fc921b8e63cbb38ca00d33d40655d4228502bc",
209 | "sha256:26f33e8f6a70c255767e3c3f957ccafc7f1f706b966e110b855bfe944511f1f9",
210 | "sha256:3cd2c044517f38d1b577f05927fb9729d3396f1d44d0c659a445599e79519792",
211 | "sha256:4a03416915b82b81af5502459a8a9dd62a3c299b295dcdf470877cb948d655f2",
212 | "sha256:4ce1e995aeecf7cc32380bc11598bfdfa017d592259d5da00fc7ded11e61d022",
213 | "sha256:4f53e4128c81ca3212ff4cf097c797ab44646a40b42ec02a891155cd7a2ba4d8",
214 | "sha256:4fa72a52a906425416f41738728268072d5acfd48cbe7796af07a923236bcf96",
215 | "sha256:66dd45eb9530e3dde8f7c009f84568bc7cac489b93d04ac86e3111fb46e470c2",
216 | "sha256:6923d077d9ae9e8bacbdb1c07ae78405a9306c8fd1af13bfa06ca891095eb995",
217 | "sha256:833401b15de1bb92791d7b6fb353d4af60dc688eaa521bd97203dcd2d124a7c1",
218 | "sha256:8416ed88ddc057bab0526d4e4e9f3660f614ac2394b5e019a628cdfff3733849",
219 | "sha256:892daa86384994fdf4856cb43c93f40cbe80f7f95bb5da94971b39c7f54b3a9c",
220 | "sha256:98be759efdb5e5fa161e46d404f4e0ce388e72fbf7d9baf010aff16689e22abe",
221 | "sha256:a6d28e7f14ecf3b2ad67c4f106841218c8ab12a0683b1528534a6c87d2307af3",
222 | "sha256:b1d6ebc891607e71fd9da71688fcf332a6630b7f5b7f5549e6e631821c0e5d90",
223 | "sha256:b2a2b0d276a136146e012154baefaea2758ef1f56ae9f4e01c612b0831e0bd2f",
224 | "sha256:b87dfa9f10a470eee7f24234a37d1d5f51e5f5fa9eeffda7c282e2b8f5162eb1",
225 | "sha256:bac0d6f7728a9cc3c1e06d4fcbac12aaa70e9379b3025b27ec1226f0e2d404cf",
226 | "sha256:c991112622baee0ae4d55c008380c32ecfd0ad417bcd0417ba432e6ba7328caa",
227 | "sha256:cda422d54ee7905bfc53ee6915ab68fe7b230cacf581110df4272ee10462aadc",
228 | "sha256:d3148b6ba3923c5850ea197a91a42683f946dba7e8eb82dfa211ab7e708de939",
229 | "sha256:d6033b4ffa34ef70f0b8086fd4c3df4bf801fee485a8a7d4519399818351aa8e",
230 | "sha256:ddff0b2bd7edcc8c82d1adde6dbbf5e60d57ce985402541cd2985c27f7bec2a0",
231 | "sha256:e23cb7f1d8e0f93addf0cae3c5b6f00324cccb4a7949ee558d7b6ca973ab8ae9",
232 | "sha256:effd2ba52cee4ceff1a77f20d2a9f9bf8d50353c854a282b8760ac15b9833168",
233 | "sha256:f90c2267101010de42f7273c94a1f026e56cbc043f9330acd8a80e64300aba33",
234 | "sha256:f960375e9823ae6a07072ff7f8a85954e5a6434f97869f50d0e41649a1c8144f",
235 | "sha256:fcf32bf76dc25e30ed793145a57426064520890d7c02866eb93d3e4abe516948"
236 | ],
237 | "version": "==1.14.1"
238 | },
239 | "cfgv": {
240 | "hashes": [
241 | "sha256:32e43d604bbe7896fe7c248a9c2276447dbef840feb28fe20494f62af110211d",
242 | "sha256:cf22deb93d4bcf92f345a5c3cd39d3d41d6340adc60c78bbbd6588c384fda6a1"
243 | ],
244 | "markers": "python_full_version >= '3.6.1'",
245 | "version": "==3.2.0"
246 | },
247 | "chardet": {
248 | "hashes": [
249 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
250 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
251 | ],
252 | "version": "==3.0.4"
253 | },
254 | "click": {
255 | "hashes": [
256 | "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a",
257 | "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"
258 | ],
259 | "index": "pypi",
260 | "version": "==7.1.2"
261 | },
262 | "decorator": {
263 | "hashes": [
264 | "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760",
265 | "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7"
266 | ],
267 | "version": "==4.4.2"
268 | },
269 | "defusedxml": {
270 | "hashes": [
271 | "sha256:6687150770438374ab581bb7a1b327a847dd9c5749e396102de3fad4e8a3ef93",
272 | "sha256:f684034d135af4c6cbb949b8a4d2ed61634515257a67299e5f940fbaa34377f5"
273 | ],
274 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
275 | "version": "==0.6.0"
276 | },
277 | "distlib": {
278 | "hashes": [
279 | "sha256:8c09de2c67b3e7deef7184574fc060ab8a793e7adbb183d942c389c8b13c52fb",
280 | "sha256:edf6116872c863e1aa9d5bb7cb5e05a022c519a4594dc703843343a9ddd9bff1"
281 | ],
282 | "version": "==0.3.1"
283 | },
284 | "entrypoints": {
285 | "hashes": [
286 | "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19",
287 | "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"
288 | ],
289 | "markers": "python_version >= '2.7'",
290 | "version": "==0.3"
291 | },
292 | "filelock": {
293 | "hashes": [
294 | "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59",
295 | "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"
296 | ],
297 | "version": "==3.0.12"
298 | },
299 | "flake8": {
300 | "hashes": [
301 | "sha256:15e351d19611c887e482fb960eae4d44845013cc142d42896e9862f775d8cf5c",
302 | "sha256:f04b9fcbac03b0a3e58c0ab3a0ecc462e023a9faf046d57794184028123aa208"
303 | ],
304 | "index": "pypi",
305 | "version": "==3.8.3"
306 | },
307 | "identify": {
308 | "hashes": [
309 | "sha256:110ed090fec6bce1aabe3c72d9258a9de82207adeaa5a05cd75c635880312f9a",
310 | "sha256:ccd88716b890ecbe10920659450a635d2d25de499b9a638525a48b48261d989b"
311 | ],
312 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
313 | "version": "==1.4.25"
314 | },
315 | "idna": {
316 | "hashes": [
317 | "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
318 | "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
319 | ],
320 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
321 | "version": "==2.10"
322 | },
323 | "importlib-metadata": {
324 | "hashes": [
325 | "sha256:90bb658cdbbf6d1735b6341ce708fc7024a3e14e99ffdc5783edea9f9b077f83",
326 | "sha256:dc15b2969b4ce36305c51eebe62d418ac7791e9a157911d58bfb1f9ccd8e2070"
327 | ],
328 | "markers": "python_version < '3.8'",
329 | "version": "==1.7.0"
330 | },
331 | "iniconfig": {
332 | "hashes": [
333 | "sha256:80cf40c597eb564e86346103f609d74efce0f6b4d4f30ec8ce9e2c26411ba437",
334 | "sha256:e5f92f89355a67de0595932a6c6c02ab4afddc6fcdc0bfc5becd0d60884d3f69"
335 | ],
336 | "version": "==1.0.1"
337 | },
338 | "ipykernel": {
339 | "hashes": [
340 | "sha256:9b2652af1607986a1b231c62302d070bc0534f564c393a5d9d130db9abbbe89d",
341 | "sha256:d6fbba26dba3cebd411382bc484f7bc2caa98427ae0ddb4ab37fe8bfeb5c7dd3"
342 | ],
343 | "markers": "python_version >= '3.5'",
344 | "version": "==5.3.4"
345 | },
346 | "ipython": {
347 | "hashes": [
348 | "sha256:5a8f159ca8b22b9a0a1f2a28befe5ad2b703339afb58c2ffe0d7c8d7a3af5999",
349 | "sha256:b70974aaa2674b05eb86a910c02ed09956a33f2dd6c71afc60f0b128a77e7f28"
350 | ],
351 | "markers": "python_version >= '3.7'",
352 | "version": "==7.17.0"
353 | },
354 | "ipython-genutils": {
355 | "hashes": [
356 | "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8",
357 | "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8"
358 | ],
359 | "version": "==0.2.0"
360 | },
361 | "isort": {
362 | "hashes": [
363 | "sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1",
364 | "sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd"
365 | ],
366 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
367 | "version": "==4.3.21"
368 | },
369 | "jedi": {
370 | "hashes": [
371 | "sha256:86ed7d9b750603e4ba582ea8edc678657fb4007894a12bcf6f4bb97892f31d20",
372 | "sha256:98cc583fa0f2f8304968199b01b6b4b94f469a1f4a74c1560506ca2a211378b5"
373 | ],
374 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
375 | "version": "==0.17.2"
376 | },
377 | "jinja2": {
378 | "hashes": [
379 | "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0",
380 | "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035"
381 | ],
382 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
383 | "version": "==2.11.2"
384 | },
385 | "json5": {
386 | "hashes": [
387 | "sha256:703cfee540790576b56a92e1c6aaa6c4b0d98971dc358ead83812aa4d06bdb96",
388 | "sha256:af1a1b9a2850c7f62c23fde18be4749b3599fd302f494eebf957e2ada6b9e42c"
389 | ],
390 | "version": "==0.9.5"
391 | },
392 | "jsonschema": {
393 | "hashes": [
394 | "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163",
395 | "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a"
396 | ],
397 | "version": "==3.2.0"
398 | },
399 | "jupyter-client": {
400 | "hashes": [
401 | "sha256:7ad9aa91505786420d77edc5f9fb170d51050c007338ba8d196f603223fd3b3a",
402 | "sha256:b360f8d4638bc577a4656e93f86298db755f915098dc763f6fc05da0c5d7a595"
403 | ],
404 | "markers": "python_version >= '3.5'",
405 | "version": "==6.1.6"
406 | },
407 | "jupyter-core": {
408 | "hashes": [
409 | "sha256:394fd5dd787e7c8861741880bdf8a00ce39f95de5d18e579c74b882522219e7e",
410 | "sha256:a4ee613c060fe5697d913416fc9d553599c05e4492d58fac1192c9a6844abb21"
411 | ],
412 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
413 | "version": "==4.6.3"
414 | },
415 | "jupyterlab": {
416 | "hashes": [
417 | "sha256:8aa9bc4b5020e7b9ec6e006d516d48bddf7d2528680af65840464ee722d59db3",
418 | "sha256:d0d743ea75b8eee20a18b96ccef24f76ee009bafb2617f3f330698fe3a00026e"
419 | ],
420 | "index": "pypi",
421 | "version": "==2.2.2"
422 | },
423 | "jupyterlab-server": {
424 | "hashes": [
425 | "sha256:5431d9dde96659364b7cc877693d5d21e7b80cea7ae3959ecc2b87518e5f5d8c",
426 | "sha256:55d256077bf13e5bc9e8fbd5aac51bef82f6315111cec6b712b9a5ededbba924"
427 | ],
428 | "markers": "python_version >= '3.5'",
429 | "version": "==1.2.0"
430 | },
431 | "lazy-object-proxy": {
432 | "hashes": [
433 | "sha256:0c4b206227a8097f05c4dbdd323c50edf81f15db3b8dc064d08c62d37e1a504d",
434 | "sha256:194d092e6f246b906e8f70884e620e459fc54db3259e60cf69a4d66c3fda3449",
435 | "sha256:1be7e4c9f96948003609aa6c974ae59830a6baecc5376c25c92d7d697e684c08",
436 | "sha256:4677f594e474c91da97f489fea5b7daa17b5517190899cf213697e48d3902f5a",
437 | "sha256:48dab84ebd4831077b150572aec802f303117c8cc5c871e182447281ebf3ac50",
438 | "sha256:5541cada25cd173702dbd99f8e22434105456314462326f06dba3e180f203dfd",
439 | "sha256:59f79fef100b09564bc2df42ea2d8d21a64fdcda64979c0fa3db7bdaabaf6239",
440 | "sha256:8d859b89baf8ef7f8bc6b00aa20316483d67f0b1cbf422f5b4dc56701c8f2ffb",
441 | "sha256:9254f4358b9b541e3441b007a0ea0764b9d056afdeafc1a5569eee1cc6c1b9ea",
442 | "sha256:9651375199045a358eb6741df3e02a651e0330be090b3bc79f6d0de31a80ec3e",
443 | "sha256:97bb5884f6f1cdce0099f86b907aa41c970c3c672ac8b9c8352789e103cf3156",
444 | "sha256:9b15f3f4c0f35727d3a0fba4b770b3c4ebbb1fa907dbcc046a1d2799f3edd142",
445 | "sha256:a2238e9d1bb71a56cd710611a1614d1194dc10a175c1e08d75e1a7bcc250d442",
446 | "sha256:a6ae12d08c0bf9909ce12385803a543bfe99b95fe01e752536a60af2b7797c62",
447 | "sha256:ca0a928a3ddbc5725be2dd1cf895ec0a254798915fb3a36af0964a0a4149e3db",
448 | "sha256:cb2c7c57005a6804ab66f106ceb8482da55f5314b7fcb06551db1edae4ad1531",
449 | "sha256:d74bb8693bf9cf75ac3b47a54d716bbb1a92648d5f781fc799347cfc95952383",
450 | "sha256:d945239a5639b3ff35b70a88c5f2f491913eb94871780ebfabb2568bd58afc5a",
451 | "sha256:eba7011090323c1dadf18b3b689845fd96a61ba0a1dfbd7f24b921398affc357",
452 | "sha256:efa1909120ce98bbb3777e8b6f92237f5d5c8ea6758efea36a473e1d38f7d3e4",
453 | "sha256:f3900e8a5de27447acbf900b4750b0ddfd7ec1ea7fbaf11dfa911141bc522af0"
454 | ],
455 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
456 | "version": "==1.4.3"
457 | },
458 | "markupsafe": {
459 | "hashes": [
460 | "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473",
461 | "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161",
462 | "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235",
463 | "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5",
464 | "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42",
465 | "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff",
466 | "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b",
467 | "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1",
468 | "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e",
469 | "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183",
470 | "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66",
471 | "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b",
472 | "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1",
473 | "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15",
474 | "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1",
475 | "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e",
476 | "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b",
477 | "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905",
478 | "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735",
479 | "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d",
480 | "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e",
481 | "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d",
482 | "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c",
483 | "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21",
484 | "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2",
485 | "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5",
486 | "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b",
487 | "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6",
488 | "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f",
489 | "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f",
490 | "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2",
491 | "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7",
492 | "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be"
493 | ],
494 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
495 | "version": "==1.1.1"
496 | },
497 | "mccabe": {
498 | "hashes": [
499 | "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
500 | "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
501 | ],
502 | "version": "==0.6.1"
503 | },
504 | "mistune": {
505 | "hashes": [
506 | "sha256:59a3429db53c50b5c6bcc8a07f8848cb00d7dc8bdb431a4ab41920d201d4756e",
507 | "sha256:88a1051873018da288eee8538d476dffe1262495144b33ecb586c4ab266bb8d4"
508 | ],
509 | "version": "==0.8.4"
510 | },
511 | "more-itertools": {
512 | "hashes": [
513 | "sha256:68c70cc7167bdf5c7c9d8f6954a7837089c6a36bf565383919bb595efb8a17e5",
514 | "sha256:b78134b2063dd214000685165d81c154522c3ee0a1c0d4d113c80361c234c5a2"
515 | ],
516 | "markers": "python_version >= '3.5'",
517 | "version": "==8.4.0"
518 | },
519 | "nbconvert": {
520 | "hashes": [
521 | "sha256:21fb48e700b43e82ba0e3142421a659d7739b65568cc832a13976a77be16b523",
522 | "sha256:f0d6ec03875f96df45aa13e21fd9b8450c42d7e1830418cccc008c0df725fcee"
523 | ],
524 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
525 | "version": "==5.6.1"
526 | },
527 | "nbformat": {
528 | "hashes": [
529 | "sha256:54d4d6354835a936bad7e8182dcd003ca3dc0cedfee5a306090e04854343b340",
530 | "sha256:ea55c9b817855e2dfcd3f66d74857342612a60b1f09653440f4a5845e6e3523f"
531 | ],
532 | "markers": "python_version >= '3.5'",
533 | "version": "==5.0.7"
534 | },
535 | "nodeenv": {
536 | "hashes": [
537 | "sha256:4b0b77afa3ba9b54f4b6396e60b0c83f59eaeb2d63dc3cc7a70f7f4af96c82bc"
538 | ],
539 | "version": "==1.4.0"
540 | },
541 | "notebook": {
542 | "hashes": [
543 | "sha256:42391d8f3b88676e774316527599e49c11f3a7e51c41035e9e44c1b58e1398d5",
544 | "sha256:4cc4e44a43a83a7c2f5e85bfdbbfe1c68bed91b857741df9e593d213a6fc2d27"
545 | ],
546 | "markers": "python_version >= '3.5'",
547 | "version": "==6.1.1"
548 | },
549 | "packaging": {
550 | "hashes": [
551 | "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8",
552 | "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181"
553 | ],
554 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
555 | "version": "==20.4"
556 | },
557 | "pandocfilters": {
558 | "hashes": [
559 | "sha256:b3dd70e169bb5449e6bc6ff96aea89c5eea8c5f6ab5e207fc2f521a2cf4a0da9"
560 | ],
561 | "version": "==1.4.2"
562 | },
563 | "parso": {
564 | "hashes": [
565 | "sha256:97218d9159b2520ff45eb78028ba8b50d2bc61dcc062a9682666f2dc4bd331ea",
566 | "sha256:caba44724b994a8a5e086460bb212abc5a8bc46951bf4a9a1210745953622eb9"
567 | ],
568 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
569 | "version": "==0.7.1"
570 | },
571 | "pathspec": {
572 | "hashes": [
573 | "sha256:7d91249d21749788d07a2d0f94147accd8f845507400749ea19c1ec9054a12b0",
574 | "sha256:da45173eb3a6f2a5a487efba21f050af2b41948be6ab52b6a1e3ff22bb8b7061"
575 | ],
576 | "version": "==0.8.0"
577 | },
578 | "pexpect": {
579 | "hashes": [
580 | "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937",
581 | "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"
582 | ],
583 | "markers": "sys_platform != 'win32'",
584 | "version": "==4.8.0"
585 | },
586 | "pickleshare": {
587 | "hashes": [
588 | "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca",
589 | "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"
590 | ],
591 | "version": "==0.7.5"
592 | },
593 | "pluggy": {
594 | "hashes": [
595 | "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
596 | "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
597 | ],
598 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
599 | "version": "==0.13.1"
600 | },
601 | "pre-commit": {
602 | "hashes": [
603 | "sha256:1657663fdd63a321a4a739915d7d03baedd555b25054449090f97bb0cb30a915",
604 | "sha256:e8b1315c585052e729ab7e99dcca5698266bedce9067d21dc909c23e3ceed626"
605 | ],
606 | "index": "pypi",
607 | "version": "==2.6.0"
608 | },
609 | "prometheus-client": {
610 | "hashes": [
611 | "sha256:983c7ac4b47478720db338f1491ef67a100b474e3bc7dafcbaefb7d0b8f9b01c",
612 | "sha256:c6e6b706833a6bd1fd51711299edee907857be10ece535126a158f911ee80915"
613 | ],
614 | "version": "==0.8.0"
615 | },
616 | "prompt-toolkit": {
617 | "hashes": [
618 | "sha256:563d1a4140b63ff9dd587bda9557cffb2fe73650205ab6f4383092fb882e7dc8",
619 | "sha256:df7e9e63aea609b1da3a65641ceaf5bc7d05e0a04de5bd45d05dbeffbabf9e04"
620 | ],
621 | "markers": "python_full_version >= '3.6.1'",
622 | "version": "==3.0.5"
623 | },
624 | "ptyprocess": {
625 | "hashes": [
626 | "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
627 | "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f"
628 | ],
629 | "markers": "os_name != 'nt'",
630 | "version": "==0.6.0"
631 | },
632 | "py": {
633 | "hashes": [
634 | "sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2",
635 | "sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342"
636 | ],
637 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
638 | "version": "==1.9.0"
639 | },
640 | "pycodestyle": {
641 | "hashes": [
642 | "sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367",
643 | "sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e"
644 | ],
645 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
646 | "version": "==2.6.0"
647 | },
648 | "pycparser": {
649 | "hashes": [
650 | "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
651 | "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"
652 | ],
653 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
654 | "version": "==2.20"
655 | },
656 | "pyflakes": {
657 | "hashes": [
658 | "sha256:0d94e0e05a19e57a99444b6ddcf9a6eb2e5c68d3ca1e98e90707af8152c90a92",
659 | "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8"
660 | ],
661 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
662 | "version": "==2.2.0"
663 | },
664 | "pygments": {
665 | "hashes": [
666 | "sha256:647344a061c249a3b74e230c739f434d7ea4d8b1d5f3721bc0f3558049b38f44",
667 | "sha256:ff7a40b4860b727ab48fad6360eb351cc1b33cbf9b15a0f689ca5353e9463324"
668 | ],
669 | "markers": "python_version >= '3.5'",
670 | "version": "==2.6.1"
671 | },
672 | "pylint": {
673 | "hashes": [
674 | "sha256:7dd78437f2d8d019717dbf287772d0b2dbdfd13fc016aa7faa08d67bccc46adc",
675 | "sha256:d0ece7d223fe422088b0e8f13fa0a1e8eb745ebffcb8ed53d3e95394b6101a1c"
676 | ],
677 | "index": "pypi",
678 | "version": "==2.5.3"
679 | },
680 | "pyparsing": {
681 | "hashes": [
682 | "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
683 | "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
684 | ],
685 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
686 | "version": "==2.4.7"
687 | },
688 | "pyrsistent": {
689 | "hashes": [
690 | "sha256:28669905fe725965daa16184933676547c5bb40a5153055a8dee2a4bd7933ad3"
691 | ],
692 | "version": "==0.16.0"
693 | },
694 | "pytest": {
695 | "hashes": [
696 | "sha256:85228d75db9f45e06e57ef9bf4429267f81ac7c0d742cc9ed63d09886a9fe6f4",
697 | "sha256:8b6007800c53fdacd5a5c192203f4e531eb2a1540ad9c752e052ec0f7143dbad"
698 | ],
699 | "index": "pypi",
700 | "version": "==6.0.1"
701 | },
702 | "python-dateutil": {
703 | "hashes": [
704 | "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
705 | "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"
706 | ],
707 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
708 | "version": "==2.8.1"
709 | },
710 | "pyyaml": {
711 | "hashes": [
712 | "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
713 | "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76",
714 | "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
715 | "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648",
716 | "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
717 | "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f",
718 | "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2",
719 | "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
720 | "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
721 | "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
722 | "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"
723 | ],
724 | "version": "==5.3.1"
725 | },
726 | "pyzmq": {
727 | "hashes": [
728 | "sha256:00dca814469436455399660247d74045172955459c0bd49b54a540ce4d652185",
729 | "sha256:046b92e860914e39612e84fa760fc3f16054d268c11e0e25dcb011fb1bc6a075",
730 | "sha256:09d24a80ccb8cbda1af6ed8eb26b005b6743e58e9290566d2a6841f4e31fa8e0",
731 | "sha256:0a422fc290d03958899743db091f8154958410fc76ce7ee0ceb66150f72c2c97",
732 | "sha256:276ad604bffd70992a386a84bea34883e696a6b22e7378053e5d3227321d9702",
733 | "sha256:296540a065c8c21b26d63e3cea2d1d57902373b16e4256afe46422691903a438",
734 | "sha256:29d51279060d0a70f551663bc592418bcad7f4be4eea7b324f6dd81de05cb4c1",
735 | "sha256:36ab114021c0cab1a423fe6689355e8f813979f2c750968833b318c1fa10a0fd",
736 | "sha256:3fa6debf4bf9412e59353defad1f8035a1e68b66095a94ead8f7a61ae90b2675",
737 | "sha256:5120c64646e75f6db20cc16b9a94203926ead5d633de9feba4f137004241221d",
738 | "sha256:59f1e54627483dcf61c663941d94c4af9bf4163aec334171686cdaee67974fe5",
739 | "sha256:5d9fc809aa8d636e757e4ced2302569d6e60e9b9c26114a83f0d9d6519c40493",
740 | "sha256:654d3e06a4edc566b416c10293064732516cf8871a4522e0a2ba00cc2a2e600c",
741 | "sha256:720d2b6083498a9281eaee3f2927486e9fe02cd16d13a844f2e95217f243efea",
742 | "sha256:73483a2caaa0264ac717af33d6fb3f143d8379e60a422730ee8d010526ce1913",
743 | "sha256:8a6ada5a3f719bf46a04ba38595073df8d6b067316c011180102ba2a1925f5b5",
744 | "sha256:8b66b94fe6243d2d1d89bca336b2424399aac57932858b9a30309803ffc28112",
745 | "sha256:99cc0e339a731c6a34109e5c4072aaa06d8e32c0b93dc2c2d90345dd45fa196c",
746 | "sha256:a7e7f930039ee0c4c26e4dfee015f20bd6919cd8b97c9cd7afbde2923a5167b6",
747 | "sha256:ab0d01148d13854de716786ca73701012e07dff4dfbbd68c4e06d8888743526e",
748 | "sha256:c1a31cd42905b405530e92bdb70a8a56f048c8a371728b8acf9d746ecd4482c0",
749 | "sha256:c20dd60b9428f532bc59f2ef6d3b1029a28fc790d408af82f871a7db03e722ff",
750 | "sha256:c36ffe1e5aa35a1af6a96640d723d0d211c5f48841735c2aa8d034204e87eb87",
751 | "sha256:c40fbb2b9933369e994b837ee72193d6a4c35dfb9a7c573257ef7ff28961272c",
752 | "sha256:d46fb17f5693244de83e434648b3dbb4f4b0fec88415d6cbab1c1452b6f2ae17",
753 | "sha256:e36f12f503511d72d9bdfae11cadbadca22ff632ff67c1b5459f69756a029c19",
754 | "sha256:f1a25a61495b6f7bb986accc5b597a3541d9bd3ef0016f50be16dbb32025b302",
755 | "sha256:fa411b1d8f371d3a49d31b0789eb6da2537dadbb2aef74a43aa99a78195c3f76"
756 | ],
757 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
758 | "version": "==19.0.2"
759 | },
760 | "regex": {
761 | "hashes": [
762 | "sha256:0dc64ee3f33cd7899f79a8d788abfbec168410be356ed9bd30bbd3f0a23a7204",
763 | "sha256:1269fef3167bb52631ad4fa7dd27bf635d5a0790b8e6222065d42e91bede4162",
764 | "sha256:14a53646369157baa0499513f96091eb70382eb50b2c82393d17d7ec81b7b85f",
765 | "sha256:3a3af27a8d23143c49a3420efe5b3f8cf1a48c6fc8bc6856b03f638abc1833bb",
766 | "sha256:46bac5ca10fb748d6c55843a931855e2727a7a22584f302dd9bb1506e69f83f6",
767 | "sha256:4c037fd14c5f4e308b8370b447b469ca10e69427966527edcab07f52d88388f7",
768 | "sha256:51178c738d559a2d1071ce0b0f56e57eb315bcf8f7d4cf127674b533e3101f88",
769 | "sha256:5ea81ea3dbd6767873c611687141ec7b06ed8bab43f68fad5b7be184a920dc99",
770 | "sha256:6961548bba529cac7c07af2fd4d527c5b91bb8fe18995fed6044ac22b3d14644",
771 | "sha256:75aaa27aa521a182824d89e5ab0a1d16ca207318a6b65042b046053cfc8ed07a",
772 | "sha256:7a2dd66d2d4df34fa82c9dc85657c5e019b87932019947faece7983f2089a840",
773 | "sha256:8a51f2c6d1f884e98846a0a9021ff6861bdb98457879f412fdc2b42d14494067",
774 | "sha256:9c568495e35599625f7b999774e29e8d6b01a6fb684d77dee1f56d41b11b40cd",
775 | "sha256:9eddaafb3c48e0900690c1727fba226c4804b8e6127ea409689c3bb492d06de4",
776 | "sha256:bbb332d45b32df41200380fff14712cb6093b61bd142272a10b16778c418e98e",
777 | "sha256:bc3d98f621898b4a9bc7fecc00513eec8f40b5b83913d74ccb445f037d58cd89",
778 | "sha256:c11d6033115dc4887c456565303f540c44197f4fc1a2bfb192224a301534888e",
779 | "sha256:c50a724d136ec10d920661f1442e4a8b010a4fe5aebd65e0c2241ea41dbe93dc",
780 | "sha256:d0a5095d52b90ff38592bbdc2644f17c6d495762edf47d876049cfd2968fbccf",
781 | "sha256:d6cff2276e502b86a25fd10c2a96973fdb45c7a977dca2138d661417f3728341",
782 | "sha256:e46d13f38cfcbb79bfdb2964b0fe12561fe633caf964a77a5f8d4e45fe5d2ef7"
783 | ],
784 | "version": "==2020.7.14"
785 | },
786 | "requests": {
787 | "hashes": [
788 | "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b",
789 | "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898"
790 | ],
791 | "index": "pypi",
792 | "version": "==2.24.0"
793 | },
794 | "send2trash": {
795 | "hashes": [
796 | "sha256:60001cc07d707fe247c94f74ca6ac0d3255aabcb930529690897ca2a39db28b2",
797 | "sha256:f1691922577b6fa12821234aeb57599d887c4900b9ca537948d2dac34aea888b"
798 | ],
799 | "version": "==1.5.0"
800 | },
801 | "six": {
802 | "hashes": [
803 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
804 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
805 | ],
806 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
807 | "version": "==1.15.0"
808 | },
809 | "terminado": {
810 | "hashes": [
811 | "sha256:4804a774f802306a7d9af7322193c5390f1da0abb429e082a10ef1d46e6fb2c2",
812 | "sha256:a43dcb3e353bc680dd0783b1d9c3fc28d529f190bc54ba9a229f72fe6e7a54d7"
813 | ],
814 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
815 | "version": "==0.8.3"
816 | },
817 | "testpath": {
818 | "hashes": [
819 | "sha256:60e0a3261c149755f4399a1fff7d37523179a70fdc3abdf78de9fc2604aeec7e",
820 | "sha256:bfcf9411ef4bf3db7579063e0546938b1edda3d69f4e1fb8756991f5951f85d4"
821 | ],
822 | "version": "==0.4.4"
823 | },
824 | "toml": {
825 | "hashes": [
826 | "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f",
827 | "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88"
828 | ],
829 | "version": "==0.10.1"
830 | },
831 | "tornado": {
832 | "hashes": [
833 | "sha256:0fe2d45ba43b00a41cd73f8be321a44936dc1aba233dee979f17a042b83eb6dc",
834 | "sha256:22aed82c2ea340c3771e3babc5ef220272f6fd06b5108a53b4976d0d722bcd52",
835 | "sha256:2c027eb2a393d964b22b5c154d1a23a5f8727db6fda837118a776b29e2b8ebc6",
836 | "sha256:5217e601700f24e966ddab689f90b7ea4bd91ff3357c3600fa1045e26d68e55d",
837 | "sha256:5618f72e947533832cbc3dec54e1dffc1747a5cb17d1fd91577ed14fa0dc081b",
838 | "sha256:5f6a07e62e799be5d2330e68d808c8ac41d4a259b9cea61da4101b83cb5dc673",
839 | "sha256:c58d56003daf1b616336781b26d184023ea4af13ae143d9dda65e31e534940b9",
840 | "sha256:c952975c8ba74f546ae6de2e226ab3cc3cc11ae47baf607459a6728585bb542a",
841 | "sha256:c98232a3ac391f5faea6821b53db8db461157baa788f5d6222a193e9456e1740"
842 | ],
843 | "markers": "python_version >= '3.5'",
844 | "version": "==6.0.4"
845 | },
846 | "traitlets": {
847 | "hashes": [
848 | "sha256:70b4c6a1d9019d7b4f6846832288f86998aa3b9207c6821f3578a6a6a467fe44",
849 | "sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7"
850 | ],
851 | "version": "==4.3.3"
852 | },
853 | "typed-ast": {
854 | "hashes": [
855 | "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355",
856 | "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919",
857 | "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa",
858 | "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652",
859 | "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75",
860 | "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01",
861 | "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d",
862 | "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1",
863 | "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907",
864 | "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c",
865 | "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3",
866 | "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b",
867 | "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614",
868 | "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb",
869 | "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b",
870 | "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41",
871 | "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6",
872 | "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34",
873 | "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe",
874 | "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4",
875 | "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7"
876 | ],
877 | "markers": "python_version < '3.8' and implementation_name == 'cpython'",
878 | "version": "==1.4.1"
879 | },
880 | "urllib3": {
881 | "hashes": [
882 | "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a",
883 | "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461"
884 | ],
885 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
886 | "version": "==1.25.10"
887 | },
888 | "virtualenv": {
889 | "hashes": [
890 | "sha256:8aa9c37b082664dbce2236fa420759c02d64109d8e6013593ad13914718a30fd",
891 | "sha256:f14a0a98ea4397f0d926cff950361766b6a73cd5975ae7eb259d12919f819a25"
892 | ],
893 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
894 | "version": "==20.0.29"
895 | },
896 | "wcwidth": {
897 | "hashes": [
898 | "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784",
899 | "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"
900 | ],
901 | "version": "==0.2.5"
902 | },
903 | "webencodings": {
904 | "hashes": [
905 | "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78",
906 | "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"
907 | ],
908 | "version": "==0.5.1"
909 | },
910 | "wrapt": {
911 | "hashes": [
912 | "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"
913 | ],
914 | "version": "==1.12.1"
915 | },
916 | "zipp": {
917 | "hashes": [
918 | "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b",
919 | "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96"
920 | ],
921 | "markers": "python_version >= '3.6'",
922 | "version": "==3.1.0"
923 | }
924 | }
925 | }
926 |
--------------------------------------------------------------------------------