├── pids.sample.cfg ├── setup.cfg ├── elastic_wikidata ├── __metadata__.py ├── __init__.py ├── config.py ├── sparql_to_es.py ├── sparql_helpers.py ├── http.py ├── dump_to_es.py └── wd_entities.py ├── queries └── humans.rq ├── requirements_dev.txt ├── requirements.txt ├── config.sample.ini ├── .flake8 ├── Pipfile ├── .pre-commit-config.yaml ├── .github └── workflows │ └── python-publish.yml ├── LICENSE.txt ├── setup.py ├── tests └── test_wd_entities.py ├── CHANGELOG.md ├── .gitignore ├── examples └── paginate query.ipynb ├── README.md ├── cli.py └── Pipfile.lock /pids.sample.cfg: -------------------------------------------------------------------------------- 1 | P31 2 | P279 3 | P18 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /elastic_wikidata/__metadata__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.3.0" 2 | -------------------------------------------------------------------------------- /queries/humans.rq: -------------------------------------------------------------------------------- 1 | SELECT ?item WHERE { 2 | ?item wdt:P31 wd:Q5. 3 | } -------------------------------------------------------------------------------- /elastic_wikidata/__init__.py: -------------------------------------------------------------------------------- 1 | from elastic_wikidata.__metadata__ import __version__ 2 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | pre-commit 2 | black 3 | pytest 4 | pylint 5 | flake8 6 | jupyterlab 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click==7.1.2 2 | elasticsearch==7.8.1 3 | SPARQLWrapper==1.8.5 4 | tqdm>=4.48.2 5 | requests==2.24.0 -------------------------------------------------------------------------------- /config.sample.ini: -------------------------------------------------------------------------------- 1 | [ELASTIC] 2 | ELASTIC_SEARCH_CLUSTER = 3 | ELASTIC_SEARCH_USER = 4 | ELASTIC_SEARCH_PASSWORD = 5 | 6 | [HTTP] 7 | CONTACT_DETAILS = -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203, E266, E501, W503, F403, F401, W291, E402, C901 3 | max-line-length = 79 4 | max-complexity = 18 5 | select = B,C,E,F,W,T4,B9 -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | pre-commit = "*" 8 | black = "*" 9 | pytest = "*" 10 | pylint = "*" 11 | flake8 = "*" 12 | jupyterlab = "*" 13 | 14 | [packages] 15 | elasticsearch = "*" 16 | click = "*" 17 | tqdm = "*" 18 | sparqlwrapper = "*" 19 | requests = "*" 20 | 21 | [requires] 22 | python_version = "3.7" 23 | 24 | [pipenv] 25 | allow_prereleases = true 26 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/ambv/black 3 | rev: 21.5b1 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | - repo: https://gitlab.com/pycqa/flake8 8 | rev: 3.9.2 9 | hooks: 10 | - id: flake8 11 | - repo: https://github.com/pre-commit/pre-commit-hooks 12 | rev: v4.0.1 13 | hooks: 14 | - id: check-json 15 | - id: check-merge-conflict -------------------------------------------------------------------------------- /elastic_wikidata/config.py: -------------------------------------------------------------------------------- 1 | class RuntimeConfig: 2 | def __init__(self): 3 | self.items = {} 4 | 5 | def add_item(self, item: dict): 6 | """ 7 | Add an item to the runtime config 8 | """ 9 | 10 | self.items.update(item) 11 | 12 | def get(self, key: str): 13 | """ 14 | Get specific item from config. Returns None if key doesn't exist. 15 | """ 16 | 17 | return self.items.get(key, None) 18 | 19 | def get_all(self) -> dict: 20 | """ 21 | Return all items from runtime config 22 | """ 23 | 24 | return self.items 25 | 26 | 27 | runtime_config = RuntimeConfig() 28 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2020 The Board of Trustees of the Science Museum 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="elastic-wikidata", 8 | version="1.0.1", 9 | author="Science Museum Group", 10 | description="elastic-wikidata", 11 | long_description=long_description, 12 | long_description_content_type="text/markdown", 13 | url="https://github.com/TheScienceMuseum/elastic-wikidata", 14 | download_url="https://github.com/TheScienceMuseum/elastic-wikidata/archive/v1.0.1.tar.gz", 15 | classifiers=[ 16 | "Programming Language :: Python :: 3", 17 | "License :: OSI Approved :: MIT License", 18 | "Operating System :: OS Independent", 19 | ], 20 | python_requires=">=3.6", 21 | install_requires=[ 22 | "click==7.1.2", 23 | "elasticsearch==7.8.1", 24 | "SPARQLWrapper==1.8.5", 25 | "tqdm>=4.48.2", 26 | "requests==2.24.0", 27 | ], 28 | py_modules=["cli", "elastic_wikidata"], 29 | packages=["elastic_wikidata"], 30 | entry_points=""" 31 | [console_scripts] 32 | ew=cli:main 33 | """, 34 | ) 35 | -------------------------------------------------------------------------------- /tests/test_wd_entities.py: -------------------------------------------------------------------------------- 1 | from elastic_wikidata import wd_entities 2 | import pytest 3 | 4 | 5 | @pytest.fixture 6 | def ge(): 7 | ge = wd_entities.get_entities() 8 | 9 | return ge 10 | 11 | 12 | def test_get_all_results(ge): 13 | qids = ["Q203545", "Q706475", "Q18637243"] 14 | 15 | res = ge.get_all_results(qids, timeout=6) 16 | 17 | assert isinstance(res, list) 18 | assert len(res) == len(qids) 19 | assert [item["id"] for item in res] == qids 20 | 21 | 22 | def test_get_labels(ge): 23 | qids = ["Q203545", "Q706475", "Q18637243", "Q82340"] 24 | 25 | label_dict = ge.get_labels(qids, timeout=6) 26 | 27 | # the last QID has no english label so a blank string is returned as its value 28 | assert label_dict == { 29 | "Q18637243": "Michaela Coel", 30 | "Q203545": "Michael Gambon", 31 | "Q706475": "Steve McQueen", 32 | "Q82340": "", 33 | } 34 | 35 | 36 | def test_simplify_wbgetentities_result(ge): 37 | res = ge.get_all_results(["Q203545", "Q706475", "Q18637243"]) 38 | pids = ["P31", "P21", "P735", "P734", "P1971"] 39 | 40 | res_simplified = [ 41 | wd_entities.simplify_wbgetentities_result(doc, lang="en", properties=pids) 42 | for doc in res 43 | ] 44 | 45 | assert [doc["claims"]["P31"] == ["Q5"] for doc in res_simplified] 46 | assert res_simplified[1]["claims"]["P1971"][0] == "+2" 47 | -------------------------------------------------------------------------------- /elastic_wikidata/sparql_to_es.py: -------------------------------------------------------------------------------- 1 | import re 2 | from math import ceil 3 | from itertools import islice 4 | from tqdm.auto import tqdm 5 | from elastic_wikidata.sparql_helpers import run_query, paginate_sparql_query 6 | from elastic_wikidata.http import generate_user_agent 7 | 8 | 9 | def url_to_qid(url: str) -> str: 10 | """ 11 | Maps Wikidata URL of an entity to QID e.g. http://www.wikidata.org/entity/Q7187777 -> Q7187777. 12 | """ 13 | 14 | return re.findall(r"(Q\d+)", url)[0] 15 | 16 | 17 | def get_entities_from_query(query, page_size=None, limit=None) -> list: 18 | """ 19 | Get a list of entities from a query. Optionally: 20 | paginate the query using page_size 21 | limit the total number of entities returned using limit 22 | 23 | Returns list of entities in form (Qd+). 24 | """ 25 | 26 | if page_size: 27 | pages = paginate_sparql_query(query, page_size=page_size) 28 | else: 29 | pages = [query] 30 | 31 | if limit: 32 | page_limit = ceil(limit / page_size) 33 | pages = islice(pages, page_limit) 34 | 35 | all_entities = [] 36 | 37 | for query in tqdm(pages, total=(page_limit or None)): 38 | res = run_query(query) 39 | var = res["head"]["vars"][0] 40 | entities = [url_to_qid(x[var]["value"]) for x in res["results"]["bindings"]] 41 | all_entities += entities 42 | 43 | # stop when page of query returns fewer items than the page size 44 | if len(entities) < page_size: 45 | break 46 | 47 | return all_entities 48 | -------------------------------------------------------------------------------- /elastic_wikidata/sparql_helpers.py: -------------------------------------------------------------------------------- 1 | from SPARQLWrapper import SPARQLWrapper, JSON 2 | import urllib 3 | import time 4 | from elastic_wikidata.http import generate_user_agent 5 | 6 | 7 | def run_query(query: str, endpoint_url="https://query.wikidata.org/sparql") -> dict: 8 | """ 9 | Run a SPARQL query against the Wikidata endpoint. Obeys retry-after headers for sensible bulk querying. 10 | 11 | Args: 12 | query (str): SPARQL query 13 | endpoint_url (optional) 14 | 15 | Returns: 16 | query_result (dict): the JSON result of the query as a dict 17 | """ 18 | 19 | user_agent = generate_user_agent() 20 | 21 | sparql = SPARQLWrapper(endpoint_url, agent=user_agent) 22 | sparql.setQuery(query) 23 | sparql.setMethod("POST") 24 | sparql.setReturnFormat(JSON) 25 | 26 | try: 27 | return sparql.query().convert() 28 | except urllib.error.HTTPError as e: 29 | if e.code == 429: 30 | if isinstance(e.headers.get("retry-after", None), int): 31 | time.sleep(e.headers["retry-after"]) 32 | else: 33 | time.sleep(10) 34 | return run_query(query, endpoint_url) 35 | raise 36 | 37 | 38 | def paginate_sparql_query(query: str, page_size: int): 39 | """ 40 | Paginates a SELECT query, returning a generator which yields paginated queries. 41 | """ 42 | 43 | # check query 44 | if "select" not in query.lower(): 45 | raise ValueError("Must be a SELECT query") 46 | 47 | if "order by" not in query.lower(): 48 | print( 49 | "WARNING: no ORDER BY logic in the SPARQL query. This could result in duplicate or missing entities." 50 | ) 51 | 52 | # paginate 53 | i = 0 54 | while True: 55 | yield f"""{query} 56 | LIMIT {page_size} 57 | OFFSET {i*page_size} 58 | """ 59 | i += 1 60 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes documented below. 4 | 5 | ## 1.0.1 6 | - **bug fix:** no longer silently fails when object value has unsupported data type, instead returning the entire object in dict format. 7 | ## 1.0.0 8 | - **enhancement (breaking change):** properties now passed as whitespace-separated list rather than comma-separated. They can also be passed through a config file by giving the `--properties` option a filename to a file that exists. 9 | - **stability improvements:** `elasticsearch.helpers.streaming_bulk` now used instead of `elasticsearch.helpers.parallel_bulk` due to issues with memory usage of the latter. Bulk load now retries on timeout. 10 | 11 | ## 0.3.7 12 | - **fix:** reading from JSON dump forces utf-8 13 | ## 0.3.6 14 | 15 | - **fix:** handles documents which are missing any of *labels/aliases/descriptions/claims* fields. 16 | - **enhancement:** `wd_entities.simplify_wbgetentities_result` gives the option to return the redirected QID for Wikidata pages which redirect. By default it returns the undirected QID: the same one that was passed into the function. 17 | 18 | ## 0.3.5 19 | 20 | - **fix:** `wd_entities.simplify_wbgetentities_result` can handle type *quantity*, and returns the value of *amount*. 21 | 22 | ## 0.3.4 23 | 24 | - **enhancement:** `wd_entities.get_entities` now has a `get_labels` method to get labels for a list of QIDs in a particular language using the wbgetentities API. 25 | 26 | ## 0.3.2 27 | 28 | - **enhancement:** add `labels_aliases` field for faster text search of both labels and aliases using an [Elasticsearch match query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html) 29 | 30 | ## 0.3.1 31 | 32 | - **fix:** property values without types are ignored 33 | - **enhancement:** refresh is disabled for the duration of data load by default, using `--disable_refresh` flag. This is beneficial for large datasets or low-resource machines as refreshing the search index is CPU-intensive and can cause the data load to freeze. 34 | 35 | ## 0.3.0 36 | 37 | - add changeable timeout for `wbgetentities` GET request 38 | - handle more Wikidata claims than just QIDs 39 | - generate User Agent from request in line with Wikidata guidelines 40 | - make Wikidata-related methods importable (rather than just runnable from CLI) 41 | -------------------------------------------------------------------------------- /elastic_wikidata/http.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import sys 3 | from urllib.parse import quote 4 | from elastic_wikidata import __version__ as ew_version 5 | from elastic_wikidata.config import runtime_config 6 | 7 | 8 | def generate_user_agent(): 9 | """ 10 | Generates user agent string according to Wikidata User Agent Guidelines (https://meta.wikimedia.org/wiki/User-Agent_policy). 11 | Uses contact information from `runtime_config.get('user_agent_contact')`. 12 | 13 | Returns: 14 | str: user agent string 15 | """ 16 | v_params = { 17 | "python": "Python/" + ".".join(str(i) for i in sys.version_info), 18 | "http_backend": "requests/" + requests.__version__, 19 | "ew": "Elastic Wikidata bot/" + ew_version, 20 | } 21 | 22 | contact_information = runtime_config.get("user_agent_contact") 23 | 24 | if contact_information is not None: 25 | contact_information = " ".join( 26 | [process_user_agent_username(i) for i in contact_information.split(" ")] 27 | ) 28 | return f"{v_params['ew']} ({contact_information}) {v_params['http_backend']} {v_params['python']}" 29 | else: 30 | if runtime_config.get("cli"): 31 | print( 32 | "WARNING: please consider adding contact information through config.ini or the -contact flag to improve the User Agent header for Wikidata requests." 33 | ) 34 | return f"{v_params['ew']} {v_params['http_backend']} {v_params['python']}" 35 | 36 | 37 | def process_user_agent_username(username=None): 38 | """ 39 | **Credit to [pywikibot](https://www.mediawiki.org/wiki/Manual:Pywikibot)** 40 | 41 | Reduce username to a representation permitted in HTTP headers. 42 | 43 | To achieve that, this function: 44 | 1) replaces spaces (' ') with '_' 45 | 2) encodes the username as 'utf-8' and if the username is not ASCII 46 | 3) URL encodes the username if it is not ASCII, or contains '%' 47 | """ 48 | if not username: 49 | return "" 50 | 51 | username = username.replace(" ", "_") # Avoid spaces or %20. 52 | try: 53 | username.encode("ascii") # just test, but not actually use it 54 | except UnicodeEncodeError: 55 | username = quote(username.encode("utf-8")) 56 | else: 57 | # % is legal in the default $wgLegalTitleChars 58 | # This is so that ops know the real pywikibot will not 59 | # allow a useragent in the username to allow through a hand-coded 60 | # percent-encoded value. 61 | if "%" in username: 62 | username = quote(username) 63 | return username 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # repo-specific 2 | config.ini 3 | experiments/ 4 | 5 | # node 6 | node_modules/ 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Editors 47 | .idea 48 | .vscode 49 | 50 | # Unit test / coverage reports 51 | htmlcov/ 52 | .tox/ 53 | .nox/ 54 | .coverage 55 | .coverage.* 56 | .cache 57 | nosetests.xml 58 | coverage.xml 59 | *.cover 60 | *.py,cover 61 | .hypothesis/ 62 | .pytest_cache/ 63 | cover/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | db.sqlite3-journal 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | .pybuilder/ 87 | target/ 88 | 89 | # Jupyter Notebook 90 | .ipynb_checkpoints 91 | 92 | # IPython 93 | profile_default/ 94 | ipython_config.py 95 | 96 | # pyenv 97 | # For a library or package, you might want to ignore these files since the code is 98 | # intended to run in multiple environments; otherwise, check them in: 99 | .python-version 100 | 101 | # pipenv 102 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 103 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 104 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 105 | # install all needed dependencies. 106 | #Pipfile.lock 107 | 108 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 109 | __pypackages__/ 110 | 111 | # Celery stuff 112 | celerybeat-schedule 113 | celerybeat.pid 114 | 115 | # SageMath parsed files 116 | *.sage.py 117 | 118 | # Environments 119 | .env 120 | .venv 121 | env/ 122 | venv/ 123 | ENV/ 124 | env.bak/ 125 | venv.bak/ 126 | 127 | # Spyder project settings 128 | .spyderproject 129 | .spyproject 130 | 131 | # Rope project settings 132 | .ropeproject 133 | 134 | # mkdocs documentation 135 | /site 136 | 137 | # mypy 138 | .mypy_cache/ 139 | .dmypy.json 140 | dmypy.json 141 | 142 | # Pyre type checker 143 | .pyre/ 144 | 145 | # pytype static type analyzer 146 | .pytype/ 147 | 148 | # Cython debug symbols 149 | cython_debug/ 150 | 151 | 152 | ### Linux ### 153 | *~ 154 | 155 | # temporary files which can be created if a process still has a handle open of a deleted file 156 | .fuse_hidden* 157 | 158 | # KDE directory preferences 159 | .directory 160 | 161 | # Linux trash folder which might appear on any partition or disk 162 | .Trash-* 163 | 164 | # .nfs files are created when an open file is removed but is still being accessed 165 | .nfs* 166 | 167 | ### OSX ### 168 | # General 169 | .DS_Store 170 | .AppleDouble 171 | .LSOverride 172 | 173 | # Icon must end with two \r 174 | Icon 175 | 176 | # Thumbnails 177 | ._* 178 | 179 | # Files that might appear in the root of a volume 180 | .DocumentRevisions-V100 181 | .fseventsd 182 | .Spotlight-V100 183 | .TemporaryItems 184 | .Trashes 185 | .VolumeIcon.icns 186 | .com.apple.timemachine.donotpresent 187 | 188 | # Directories potentially created on remote AFP share 189 | .AppleDB 190 | .AppleDesktop 191 | Network Trash Folder 192 | Temporary Items 193 | .apdisk 194 | 195 | ### Windows ### 196 | # Windows thumbnail cache files 197 | Thumbs.db 198 | ehthumbs.db 199 | ehthumbs_vista.db 200 | 201 | # Dump file 202 | *.stackdump 203 | 204 | # Folder config file 205 | [Dd]esktop.ini 206 | 207 | # Recycle Bin used on file shares 208 | $RECYCLE.BIN/ 209 | 210 | # Windows Installer files 211 | *.cab 212 | *.msi 213 | *.msix 214 | *.msm 215 | *.msp 216 | 217 | # Windows shortcuts 218 | *.lnk 219 | 220 | # End of https://www.toptal.com/developers/gitignore/api/osx,windows,linux -------------------------------------------------------------------------------- /examples/paginate query.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Automatically Paginating a SPARQL query" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "Requirement already satisfied: sparqlwrapper in /Users/kalyan/.pyenv/versions/3.7.3/lib/python3.7/site-packages (1.8.5)\n", 20 | "Requirement already satisfied: rdflib>=4.0 in /Users/kalyan/.pyenv/versions/3.7.3/lib/python3.7/site-packages (from sparqlwrapper) (5.0.0)\n", 21 | "Requirement already satisfied: six in /Users/kalyan/.pyenv/versions/3.7.3/lib/python3.7/site-packages (from rdflib>=4.0->sparqlwrapper) (1.12.0)\n", 22 | "Requirement already satisfied: isodate in /Users/kalyan/.pyenv/versions/3.7.3/lib/python3.7/site-packages (from rdflib>=4.0->sparqlwrapper) (0.6.0)\n", 23 | "Requirement already satisfied: pyparsing in /Users/kalyan/.pyenv/versions/3.7.3/lib/python3.7/site-packages (from rdflib>=4.0->sparqlwrapper) (2.4.2)\n", 24 | "\u001b[33mYou are using pip version 19.0.3, however version 20.2 is available.\n", 25 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "!pip install sparqlwrapper" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "import sys\n", 40 | "sys.path.append(\"..\")\n", 41 | "\n", 42 | "from itertools import islice\n", 43 | "from elastic_wikidata import sparql_helpers, sparql_to_es\n", 44 | "\n", 45 | "from tqdm.auto import tqdm" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "Let's write a query to get all humans. There are over 8 million humans on Wikidata so we'll get a timeout if we try to run the entire query at once. " 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "query = \"\"\"\n", 62 | "SELECT ?human WHERE {{\n", 63 | " ?human wdt:P31 wd:Q5. \n", 64 | "}}\n", 65 | "\"\"\"" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "We can use `elastic_wikidata` to paginate the query instead." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "name": "stdout", 82 | "output_type": "stream", 83 | "text": [ 84 | "WARNING: no ORDER BY logic in the SPARQL query. This could result in incorrect pages.\n" 85 | ] 86 | }, 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "'\\nSELECT ?human WHERE {{\\n ?human wdt:P31 wd:Q5. \\n}}\\n\\n LIMIT 500\\n OFFSET 0\\n '" 91 | ] 92 | }, 93 | "execution_count": 4, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "pages = sparql_helpers.paginate_sparql_query(query, page_size=500)\n", 100 | "next(pages)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "## Running paginated queries\n", 108 | "\n", 109 | "Putting this all together, we can use `sparql_to_es.get_entities_from_query` to:\n", 110 | "1. paginate a query to fetch entities\n", 111 | "2. run each page against the Wikidata Query Service\n", 112 | "3. combine the results" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 6, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "application/vnd.jupyter.widget-view+json": { 123 | "model_id": "10e58e8a57434553918a3b57a67df597", 124 | "version_major": 2, 125 | "version_minor": 0 126 | }, 127 | "text/plain": [ 128 | "HBox(children=(IntProgress(value=0, max=10), HTML(value='')))" 129 | ] 130 | }, 131 | "metadata": {}, 132 | "output_type": "display_data" 133 | }, 134 | { 135 | "name": "stdout", 136 | "output_type": "stream", 137 | "text": [ 138 | "WARNING: no ORDER BY logic in the SPARQL query. This could result in incorrect pages.\n", 139 | "\n", 140 | "1000 entities returned\n", 141 | "1000 unique entities returned\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "entities = sparql_to_es.get_entities_from_query(query, page_size=100, limit=1000)\n", 147 | "\n", 148 | "print(f\"{len(entities)} entities returned\")\n", 149 | "print(f\"{len(set(entities))} unique entities returned\")" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [] 158 | } 159 | ], 160 | "metadata": { 161 | "kernelspec": { 162 | "display_name": "Python 3", 163 | "language": "python", 164 | "name": "python3" 165 | }, 166 | "language_info": { 167 | "codemirror_mode": { 168 | "name": "ipython", 169 | "version": 3 170 | }, 171 | "file_extension": ".py", 172 | "mimetype": "text/x-python", 173 | "name": "python", 174 | "nbconvert_exporter": "python", 175 | "pygments_lexer": "ipython3", 176 | "version": "3.7.3" 177 | } 178 | }, 179 | "nbformat": 4, 180 | "nbformat_minor": 4 181 | } 182 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Elastic Wikidata 2 | 3 | Simple CLI tools to load a subset of Wikidata into Elasticsearch. Part of the [Heritage Connector](https://www.sciencemuseumgroup.org.uk/project/heritage-connector/) project. 4 | 5 | - [Why?](#why) 6 | - [Installation](#installation) 7 | - [Setup](#setup) 8 | - [Usage](#usage) 9 | - [Loading from Wikidata dump (.ndjson)](#loading-from-wikidata-dump-ndjson) 10 | - [Loading from SPARQL query](#loading-from-sparql-query) 11 | - [Temporary side effects](#temporary-side-effects) 12 | 13 |
14 | 15 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/elastic-wikidata) 16 | ![GitHub last commit](https://img.shields.io/github/last-commit/thesciencemuseum/elastic-wikidata) 17 | ![GitHub Pipenv locked Python version](https://img.shields.io/github/pipenv/locked/python-version/thesciencemuseum/elastic-wikidata) 18 | 19 | ## Why? 20 | 21 | Running text search programmatically on Wikidata means using the MediaWiki query API, either [directly](https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=John_Snow&srlimit=10&srprop=size&formatversion=2) or [through the Wikidata query service/SPARQL](https://query.wikidata.org/#SELECT%20%2a%20WHERE%20%7B%0A%20%20SERVICE%20wikibase%3Amwapi%20%7B%0A%20%20%20%20%20%20bd%3AserviceParam%20wikibase%3Aendpoint%20%22en.wikipedia.org%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20wikibase%3Aapi%20%22Search%22%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20mwapi%3Asrsearch%20%22John%20Snow%22.%0A%20%20%20%20%20%20%3Ftitle%20wikibase%3AapiOutput%20mwapi%3Atitle.%0A%20%20%7D%0A%20%20%20hint%3APrior%20hint%3ArunLast%20%22true%22.%0A%20%20%20SERVICE%20wikibase%3Alabel%20%7B%20bd%3AserviceParam%20wikibase%3Alanguage%20%22en%22.%20%7D%0A%7D%20LIMIT%2020). 22 | 23 | There are a couple of reasons you may not want to do this when running searches programmatically: 24 | 25 | - *time constraints/large volumes:* APIs are rate-limited, and you can only do one text search per SPARQL query 26 | - *better search:* using Elasticsearch allows for more flexible and powerful text search capabilities.* We're using our own Elasticsearch instance to do nearest neighbour search on embeddings, too. 27 | 28 | ** [CirrusSearch](https://www.mediawiki.org/wiki/Extension:CirrusSearch) is a Wikidata extension that enables direct search on Wikidata using Elasticsearch, if you require powerful search and are happy with the rate limit.* 29 | 30 | ## Installation 31 | 32 | from pypi: `pip install elastic_wikidata` 33 | 34 | from repo: 35 | 36 | 1. Download 37 | 2. `cd` into root 38 | 3. `pip install -e .` 39 | 40 | ## Setup 41 | 42 | elastic-wikidata needs the Elasticsearch credentials `ELASTICSEARCH_CLUSTER`, `ELASTICSEARCH_USER` and `ELASTICSEARCH_PASSWORD` to connect to your ES instance. You can set these in one of three ways: 43 | 44 | 1. Using environment variables: `export ELASTICSEARCH_CLUSTER=https://...` etc 45 | 2. Using config.ini: pass the `-c` parameter followed by a path to an ini file containing your Elasticsearch credentials. [Example here](./config.sample.ini). 46 | 3. Pass each variable in at runtime using options `--cluster/-c`, `--user/-u`, `--password/-p`. 47 | 48 | ## Usage 49 | 50 | Once installed the package is accessible through the keyword `ew`. A call is structured as follows: 51 | 52 | ``` bash 53 | ew 54 | ``` 55 | 56 | *Task* is either: 57 | 58 | - `dump`: [load data from Wikidata JSON dump](#loading-from-wikidata-dump-ndjson), or 59 | - `query`: [load data from SPARQL query](#loading-from-sparql-query). 60 | 61 | A full list of options can be found with `ew --help`, but the following are likely to be useful: 62 | 63 | - `--index/-i`: the index name to push to. If not specified at runtime, elastic-wikidata will prompt for it 64 | - `--limit/-l`: limit the number of records pushed into ES. You might want to use this for a small trial run before importing the whole thing. 65 | - `--properties/-prop`: a whitespace-separated list of properties to include in the ES index e.g. *'p31 p21'*, or the path to a text file containing newline-separated properties e.g. [this one](./pids.sample.cfg). 66 | - `--language/-lang`: [Wikimedia language code](https://www.wikidata.org/wiki/Help:Wikimedia_language_codes/lists/all). Only one supported at this time. 67 | 68 | ### Loading from Wikidata dump (.ndjson) 69 | 70 | ``` bash 71 | ew dump -p 72 | ``` 73 | 74 | This is useful if you want to create one or more large subsets of Wikidata in different Elasticsearch indexes (millions of entities). 75 | 76 | **Time estimate:** Loading all ~8million humans into an AWS Elasticsearch index took me about 20 minutes. Creating the humans subset using `wikibase-dump-filter` took about 3 hours using its [instructions for parallelising](https://github.com/maxlath/wikibase-dump-filter/blob/master/docs/parallelize.md). 77 | 78 | 1. Download the complete Wikidata dump (latest-all.json.gz from [here](https://dumps.wikimedia.org/wikidatawiki/entities/)). This is a *large* file: 87GB on 07/2020. 79 | 2. Use [maxlath](https://github.com/maxlath)'s [wikibase-dump-filter](https://github.com/maxlath/wikibase-dump-filter/) to create a subset of the Wikidata dump. **Note: don't use the `--simplify` flag when running the dump. elastic-wikidata will take care of simplification.** 80 | 3. Run `ew dump` with flag `-p` pointing to the JSON subset. You might want to test it with a limit (using the `-l` flag) first. 81 | 82 | ### Loading from SPARQL query 83 | 84 | ``` bash 85 | ew query -p 86 | ``` 87 | 88 | For smaller collections of Wikidata entities it might be easier to populate an Elasticsearch index directly from a SPARQL query rather than downloading the whole Wikidata dump to take a subset. `ew query` [automatically paginates SPARQL queries](examples/paginate%20query.ipynb) so that a heavy query like *'return all the humans'* doesn't result in a timeout error. 89 | 90 | **Time estimate:** Loading 10,000 entities into Wikidata into an AWS hosted Elasticsearch index took me about 6 minutes. 91 | 92 | 1. Write a SPARQL query and save it to a text/.rq file. See [example](queries/humans.rq). 93 | 2. Run `ew query` with the `-p` option pointing to the file containing the SPARQL query. Optionally add a `--page_size` for the SPARQL query. 94 | 95 | ### Temporary side effects 96 | 97 | As of version *0.3.1* refreshing the search index is disabled for the duration of load by default, as [recommended by ElasticSearch](https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-indexing-speed.html#_unset_or_increase_the_refresh_interval). Refresh is re-enabled to the default interval of `1s` after load is complete. To disable this behaviour use the flag `--no_disable_refresh/-ndr`. 98 | -------------------------------------------------------------------------------- /cli.py: -------------------------------------------------------------------------------- 1 | from elastic_wikidata import dump_to_es, sparql_to_es 2 | from elastic_wikidata.config import runtime_config 3 | import os 4 | import click 5 | from configparser import ConfigParser 6 | 7 | 8 | @click.command() 9 | @click.argument("source", nargs=1) 10 | @click.option("--path", "-p", type=click.Path(exists=True)) 11 | @click.option( 12 | "--cluster", envvar="ELASTICSEARCH_CLUSTER", help="Elasticsearch cluster URL" 13 | ) 14 | @click.option("--user", envvar="ELASTICSEARCH_USER", help="Elasticsearch username") 15 | @click.option( 16 | "--password", envvar="ELASTICSEARCH_PASSWORD", help="Elasticsearch password" 17 | ) 18 | @click.option( 19 | "--agent_contact", 20 | "-contact", 21 | envvar="WIKIMEDIA_AGENT_CONTACT", 22 | help="(optional) Contact details to add to the User Agent header for Wikidata requests", 23 | default=None, 24 | ) 25 | @click.option( 26 | "--config", 27 | "-c", 28 | type=click.Path(exists=True), 29 | help="Path to .ini file containing Elasticsearch credentials", 30 | ) 31 | @click.option( 32 | "--index", 33 | "-i", 34 | prompt="Elasticsearch index", 35 | help="Name of Elasticsearch index to load into", 36 | ) 37 | @click.option( 38 | "--limit", "-l", type=int, help="(optional) Limit the number of entities loaded in" 39 | ) 40 | @click.option("--page_size", type=int, help="Page size for SPARQL query.", default=100) 41 | @click.option( 42 | "--language", "-lang", type=str, help="Language (Wikimedia language code)" 43 | ) 44 | @click.option( 45 | "--properties", 46 | "-prop", 47 | type=str, 48 | help="One or more Wikidata property e.g. 'p31' or 'p31 p21'. A path to a file containing newline-separated properties can also be passed. Not case-sensitive", 49 | ) 50 | @click.option( 51 | "--timeout", 52 | "-t", 53 | type=int, 54 | help="Timeout for Wikidata requests (seconds)", 55 | default=6, 56 | ) 57 | @click.option( 58 | "--disable_refresh/--no_disable_refresh", 59 | "-dr/-ndr", 60 | help="Whether to disable Elasticsearch's (CPU-intensive) refresh during data load. Defaults to True. Recommended to leave this on for low-resource machines or large datasets.", 61 | default=True, 62 | ) 63 | def main( 64 | source, 65 | path, 66 | cluster, 67 | user, 68 | password, 69 | agent_contact, 70 | config, 71 | index, 72 | limit, 73 | page_size, 74 | language, 75 | properties, 76 | timeout, 77 | disable_refresh, 78 | ): 79 | 80 | # get elasticsearch credentials 81 | if config: 82 | # read .ini file 83 | parser = ConfigParser() 84 | parser.optionxform = str # make option names case sensitive 85 | parser.read(config) 86 | es_credentials = parser._sections["ELASTIC"] 87 | check_es_credentials(es_credentials) 88 | 89 | runtime_config.add_item( 90 | { 91 | "user_agent_contact": parser._sections["HTTP"].get( 92 | "CONTACT_DETAILS", None 93 | ) 94 | } 95 | ) 96 | else: 97 | # check environment variables/flags 98 | es_credentials = {} 99 | 100 | if cluster: 101 | es_credentials["ELASTICSEARCH_CLUSTER"] = cluster 102 | if user: 103 | es_credentials["ELASTICSEARCH_USER"] = user 104 | if password: 105 | es_credentials["ELASTICSEARCH_PASSWORD"] = password 106 | 107 | check_es_credentials(es_credentials) 108 | 109 | runtime_config.add_item({"user_agent_contact": agent_contact}) 110 | 111 | runtime_config.add_item({"http_timeout": timeout}) 112 | 113 | # global flag for all functions that the module is being run through the CLI 114 | runtime_config.add_item({"cli": True}) 115 | 116 | # set kwargs 117 | kwargs = {} 118 | if language: 119 | kwargs["lang"] = language 120 | if properties: 121 | if os.path.exists(properties): 122 | with open(properties, "r") as f: 123 | kwargs["properties"] = f.read().splitlines() 124 | else: 125 | kwargs["properties"] = properties.split() 126 | 127 | if disable_refresh: 128 | kwargs["disable_refresh_on_index"] = disable_refresh 129 | 130 | # run job 131 | if source == "dump": 132 | load_from_dump(path, es_credentials, index, limit, **kwargs) 133 | elif source == "query": 134 | load_from_sparql(path, es_credentials, index, limit, page_size, **kwargs) 135 | else: 136 | raise ValueError(f"Argument {source} must be either dump or sparql") 137 | 138 | 139 | def load_from_dump(path, es_credentials, index, limit, **kwargs): 140 | if not kwargs: 141 | kwargs = {} 142 | if limit: 143 | kwargs["doc_limit"] = limit 144 | 145 | # limit is used when dumping JSON to Elasticsearch 146 | d = dump_to_es.processDump( 147 | dump=path, es_credentials=es_credentials, index_name=index, **kwargs 148 | ) 149 | d.start_elasticsearch() 150 | d.dump_to_es() 151 | 152 | 153 | def load_from_sparql(path, es_credentials, index, limit, page_size=100, **kwargs): 154 | if not kwargs: 155 | kwargs = {} 156 | 157 | with open(path, "r") as f: 158 | query = f.read() 159 | 160 | # limit is used when getting list of entities 161 | print("Getting entities from SPARQL query") 162 | entity_list = sparql_to_es.get_entities_from_query( 163 | query, page_size=100, limit=limit 164 | ) 165 | 166 | print( 167 | f"Retrieving information from wbgetentities API and pushing to ES index {index}" 168 | ) 169 | d = dump_to_es.processDump( 170 | dump=entity_list, es_credentials=es_credentials, index_name=index, **kwargs 171 | ) 172 | d.start_elasticsearch() 173 | d.dump_to_es() 174 | 175 | 176 | def check_es_credentials(credentials: dict): 177 | credentials_present = set(credentials.keys()) 178 | credentials_required = { 179 | "ELASTICSEARCH_CLUSTER", 180 | "ELASTICSEARCH_USER", 181 | "ELASTICSEARCH_PASSWORD", 182 | } 183 | missing_credentials = credentials_required - credentials_present 184 | 185 | if len(missing_credentials) > 0: 186 | raise ValueError(f"Missing Elasticsearch credentials: {missing_credentials}") 187 | 188 | 189 | if __name__ == "__main__": 190 | # main( 191 | # source='dump', 192 | # path="../wikidata/all_no_articles.ndjson", 193 | # properties="p31,p279", 194 | # config="./config.ini", 195 | # index='wikidump2', 196 | # cluster=None, 197 | # user=None, 198 | # password=None, 199 | # agent_contact=False, 200 | # limit=None, 201 | # page_size=100, 202 | # language='en', 203 | # timeout=6, 204 | # disable_refresh=True 205 | # ) 206 | main() 207 | -------------------------------------------------------------------------------- /elastic_wikidata/dump_to_es.py: -------------------------------------------------------------------------------- 1 | import json 2 | from itertools import islice 3 | from tqdm.auto import tqdm 4 | from elasticsearch import Elasticsearch 5 | from elasticsearch.helpers import streaming_bulk 6 | from typing import Union 7 | from elastic_wikidata.wd_entities import ( 8 | get_entities, 9 | wiki_property_check, 10 | simplify_wbgetentities_result, 11 | ) 12 | 13 | 14 | class processDump: 15 | def __init__( 16 | self, dump: Union[str, list], es_credentials: dict, index_name: str, **kwargs 17 | ): 18 | self.config = { 19 | "chunk_size": 1000, 20 | "queue_size": 8, 21 | } 22 | 23 | self.es_credentials = es_credentials 24 | 25 | if isinstance(dump, str): 26 | self.dump_path = dump 27 | self.entities = None 28 | elif isinstance(dump, list): 29 | self.entities = dump 30 | self.dump_path = None 31 | else: 32 | raise ValueError( 33 | "dump must either be path to JSON dump or Python list of entities" 34 | ) 35 | 36 | self.index_name = index_name 37 | 38 | # process kwargs/set defaults 39 | self.disable_refresh_on_index = kwargs["disable_refresh_on_index"] 40 | 41 | if "doc_limit" in kwargs: 42 | self.doc_limit = kwargs["doc_limit"] 43 | else: 44 | self.doc_limit = None 45 | 46 | self.wiki_options = {} 47 | 48 | if "lang" in kwargs: 49 | self.wiki_options["lang"] = kwargs["lang"] 50 | else: 51 | self.wiki_options["lang"] = "en" 52 | 53 | if "user_agent_contact" in kwargs: 54 | self.user_agent_contact = kwargs["user_agent_contact"] 55 | else: 56 | self.user_agent_contact = None 57 | 58 | if "properties" in kwargs: 59 | if isinstance(kwargs["properties"], str) and wiki_property_check( 60 | kwargs["properties"] 61 | ): 62 | self.wiki_options["properties"] = [ 63 | kwargs["properties"].upper() 64 | ] # [P31], not [p31] 65 | elif isinstance(kwargs["properties"], list): 66 | self.wiki_options["properties"] = [ 67 | item.upper() 68 | for item in kwargs["properties"] 69 | if wiki_property_check(item) 70 | ] 71 | else: 72 | self.wiki_options["properties"] = ["P31"] 73 | 74 | def start_elasticsearch(self): 75 | """ 76 | Creates an Elasticsearch index. If SEARCH_CLUSTER, ELASTICSEARCH_USER & ELASTICSEARCH_PASSWORD 77 | are specified in config it uses those, otherwise uses a locally running Elasticsearch instance. 78 | """ 79 | 80 | if "ELASTICSEARCH_CLUSTER" in self.es_credentials: 81 | print( 82 | f"Connecting to Elasticsearch at {self.es_credentials['ELASTICSEARCH_CLUSTER']}" 83 | ) 84 | self.es = Elasticsearch( 85 | [self.es_credentials["ELASTICSEARCH_CLUSTER"]], 86 | http_auth=( 87 | self.es_credentials["ELASTICSEARCH_USER"], 88 | self.es_credentials["ELASTICSEARCH_PASSWORD"], 89 | ), 90 | max_retries=100, 91 | retry_on_timeout=True, 92 | ) 93 | else: 94 | # run on localhost 95 | print("Connecting to Elasticsearch on localhost") 96 | self.es = Elasticsearch( 97 | max_retries=100, 98 | retry_on_timeout=True, 99 | ) 100 | 101 | mappings = { 102 | "mappings": { 103 | "properties": { 104 | "labels": {"type": "text", "copy_to": "labels_aliases"}, 105 | "aliases": {"type": "text", "copy_to": "labels_aliases"}, 106 | "labels_aliases": {"type": "text", "store": "true"}, 107 | } 108 | } 109 | } 110 | 111 | self.es.indices.create(index=self.index_name, ignore=400, body=mappings) 112 | 113 | if self.disable_refresh_on_index: 114 | print( 115 | "Temporary disabling refresh for the index. Will reset refresh interval to the default (1s) after load is complete." 116 | ) 117 | self.es.indices.put_settings({"index": {"refresh_interval": -1}}) 118 | 119 | def dump_to_es(self): 120 | print("Indexing documents...") 121 | successes = 0 122 | errors = [] 123 | 124 | # if dump_path, use generator that passes 125 | if self.dump_path: 126 | action_generator = self.generate_actions_from_dump() 127 | elif self.entities: 128 | action_generator = self.generate_actions_from_entities() 129 | 130 | try: 131 | for ok, action in tqdm( 132 | streaming_bulk( 133 | client=self.es, 134 | index=self.index_name, 135 | actions=action_generator, 136 | chunk_size=self.config["chunk_size"], 137 | # queue_size=self.config["queue_size"], 138 | max_retries=3, 139 | ), 140 | ): 141 | if not ok: 142 | print(action) 143 | errors.append(action) 144 | successes += ok 145 | 146 | finally: 147 | if self.disable_refresh_on_index: 148 | # reset back to default 149 | print("Refresh interval set back to default of 1s.") 150 | self.es.indices.put_settings({"index": {"refresh_interval": "1s"}}) 151 | 152 | def process_doc(self, doc: dict) -> dict: 153 | """ 154 | Processes a single document from the JSON dump, returning a filtered version of that document. 155 | """ 156 | 157 | lang = self.wiki_options["lang"] 158 | properties = self.wiki_options["properties"] 159 | 160 | return simplify_wbgetentities_result(doc, lang, properties) 161 | 162 | def generate_actions_from_dump(self): 163 | """ 164 | Generator to yield a processed document from the Wikidata JSON dump. 165 | Each line of the Wikidata JSON dump is a separate document. 166 | """ 167 | with open(self.dump_path, "r", encoding="utf-8") as f: 168 | objects = (json.loads(line) for line in f) 169 | 170 | # optionally limit number that are loaded 171 | if self.doc_limit is not None: 172 | objects = islice(objects, self.doc_limit) 173 | 174 | for item in objects: 175 | doc = self.process_doc(item) 176 | 177 | yield doc 178 | 179 | def generate_actions_from_entities(self): 180 | """ 181 | Generator to yield processed document from list of entities. Calls are made to 182 | wbgetentities API with page size of 50 to retrieve documents. 183 | """ 184 | 185 | json_generator = get_entities.result_generator( 186 | self.entities, lang=self.wiki_options["lang"] 187 | ) 188 | 189 | for page in json_generator: 190 | for item in page: 191 | yield self.process_doc(item) 192 | -------------------------------------------------------------------------------- /elastic_wikidata/wd_entities.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from tqdm.auto import tqdm 3 | from typing import List, Union 4 | from math import ceil 5 | import re 6 | from elastic_wikidata.http import generate_user_agent 7 | from elastic_wikidata.config import runtime_config 8 | 9 | 10 | class get_entities: 11 | def __init__(self): 12 | """ 13 | One instance of this class per list of qcodes. The JSON response for a list of qcodes is made to Wikidata on 14 | creation of a class instance. 15 | 16 | Args: 17 | qcodes (str/list): Wikidata qcode or list of qcodes/ 18 | lang (str, optional): Defaults to 'en'. 19 | page_limit (int): page limit for Wikidata API. Usually 50, can reach 500. 20 | """ 21 | self.endpoint = ( 22 | "http://www.wikidata.org/w/api.php?action=wbgetentities&format=json" 23 | ) 24 | 25 | self.properties = ["labels", "aliases", "claims", "descriptions"] 26 | 27 | @staticmethod 28 | def _param_join(params: List[str]) -> str: 29 | """ 30 | Joins list of parameters for the URL. ['a', 'b'] -> "a%7Cb" 31 | 32 | Args: 33 | params (list): list of parameters (strings) 34 | 35 | Returns: 36 | str 37 | """ 38 | 39 | return "%7C".join(params) if len(params) > 1 else params[0] 40 | 41 | @classmethod 42 | def get_all_results( 43 | self, qcodes, lang="en", page_limit=50, timeout: int = None 44 | ) -> list: 45 | """ 46 | Get response through the `wbgetentities` API. 47 | 48 | Returns: 49 | list: each item is a the response for an entity 50 | """ 51 | 52 | results = self().result_generator(qcodes, lang, page_limit, timeout) 53 | 54 | all_results = [] 55 | 56 | print(f"Getting {len(qcodes)} wikidata documents in pages of {page_limit}") 57 | 58 | for res in tqdm(results, total=ceil(len(qcodes) / page_limit)): 59 | all_results += res 60 | 61 | return all_results 62 | 63 | @classmethod 64 | def result_generator( 65 | self, qcodes, lang="en", page_limit=50, timeout: int = None 66 | ) -> list: 67 | """ 68 | Get response through the `wbgetentities` API. Yields `page_limit` entities at a time. 69 | 70 | Returns: 71 | list: each item is a the response for an entity 72 | """ 73 | 74 | if isinstance(qcodes, str): 75 | qcodes = [qcodes] 76 | 77 | qcodes_paginated = [ 78 | qcodes[i : i + page_limit] for i in range(0, len(qcodes), page_limit) 79 | ] 80 | 81 | headers = {"User-Agent": generate_user_agent()} 82 | 83 | if timeout is None: 84 | timeout = runtime_config.get("http_timeout") 85 | 86 | with requests.Session() as s: 87 | for page in qcodes_paginated: 88 | url = f"http://www.wikidata.org/w/api.php?action=wbgetentities&format=json&ids={self._param_join(page)}&props={self._param_join(self().properties)}&languages={lang}&languagefallback=1&formatversion=2" 89 | response = s.get(url, headers=headers, timeout=timeout).json() 90 | yield [v for _, v in response["entities"].items()] 91 | 92 | def get_labels(self, qcodes, lang="en", page_limit=50, timeout: int = None) -> dict: 93 | """ 94 | Get labels from Wikidata qcodes. If the item associated with a qcode has no label, its value 95 | in the dictionary is an empty string. 96 | 97 | Returns: 98 | dict: {qid1: label1, qid2: label2, ...} 99 | """ 100 | 101 | qid_label_mapping = dict() 102 | qcodes = list(set(qcodes)) 103 | 104 | docs = self.get_all_results(qcodes, lang, page_limit, timeout) 105 | 106 | for doc in docs: 107 | qid_label_mapping[doc["id"]] = doc["labels"].get(lang, {}).get("value", "") 108 | 109 | return qid_label_mapping 110 | 111 | 112 | def simplify_wbgetentities_result( 113 | doc: Union[dict, List[dict]], 114 | lang: str, 115 | properties: list, 116 | use_redirected_qid: bool = False, 117 | ) -> Union[dict, List[dict]]: 118 | """ 119 | Processes a single document or set of documents from the JSON result of wbgetentities, returning a simplified version of that document. 120 | 121 | Args: 122 | doc (Union[dict, List[dict]]): JSON result from Wikidata wbgetentities API 123 | lang (str): Wikimedia language code 124 | properties (list): list of Wikidata properties 125 | use_redirected_qid (bool, optional): whether to return the redirected QID value under the 'id' field instead of the original QID 126 | if there is one. Defaults to False. 127 | 128 | Returns: 129 | Union[dict, List[dict]]: dict if single record passed in; list if multiple records 130 | """ 131 | 132 | # if list of dicts, run this function for each dict 133 | if isinstance(doc, list) and isinstance(doc[0], dict): 134 | return [simplify_wbgetentities_result(item, lang, properties) for item in doc] 135 | 136 | wd_type_mapping = { 137 | "wikibase-entityid": "id", 138 | "time": "time", 139 | "monolingualtext": "text", 140 | "quantity": "amount", 141 | } 142 | 143 | # check for redirected URL 144 | if "redirects" in doc: 145 | if use_redirected_qid: 146 | newdoc = {"id": doc["redirects"]["to"]} 147 | else: 148 | newdoc = {"id": doc["redirects"]["from"]} 149 | 150 | else: 151 | newdoc = {"id": doc["id"]} 152 | 153 | # add label(s) 154 | if lang in doc.get("labels", {}): 155 | newdoc["labels"] = doc["labels"][lang]["value"] 156 | 157 | # add descriptions(s) 158 | if lang in doc.get("descriptions", {}): 159 | newdoc["descriptions"] = doc["descriptions"][lang]["value"] 160 | 161 | # add aliases 162 | if (len(doc.get("aliases", {})) > 0) and (lang in doc.get("aliases", {})): 163 | newdoc["aliases"] = [i["value"] for i in doc["aliases"][lang]] 164 | else: 165 | newdoc["aliases"] = [] 166 | 167 | # add claims (property values) 168 | newdoc["claims"] = {} 169 | 170 | if "claims" in doc: 171 | for p in properties: 172 | if p in doc["claims"]: 173 | claims = [] 174 | for i in doc["claims"][p]: 175 | try: 176 | value_type = i["mainsnak"]["datavalue"]["type"] 177 | if value_type in wd_type_mapping.keys(): 178 | # Return specific value for certain types. 179 | value_name = wd_type_mapping[value_type] 180 | claims.append( 181 | i["mainsnak"]["datavalue"]["value"][value_name] 182 | ) 183 | else: 184 | # Otherwise return the whole dictionary. 185 | claims.append(i["mainsnak"]["datavalue"]["value"]) 186 | except KeyError: 187 | print( 188 | f"WARNING: property {p} with datatype {value_type} failed to process. Consider forking this code and implementing support for it." 189 | ) 190 | 191 | newdoc["claims"][p] = claims 192 | 193 | return newdoc 194 | 195 | 196 | def wiki_property_check(p): 197 | if len(re.findall(r"(p\d+)", p.lower())) == 1: 198 | return True 199 | else: 200 | print(f"WARNING: property {p} is not a valid Wikidata property") 201 | return False 202 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "acf1c4fff67ab549cc8414c05abe84231399324830aa057d3dbe493e19a132b6" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.7" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "certifi": { 20 | "hashes": [ 21 | "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", 22 | "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" 23 | ], 24 | "version": "==2020.6.20" 25 | }, 26 | "chardet": { 27 | "hashes": [ 28 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", 29 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" 30 | ], 31 | "version": "==3.0.4" 32 | }, 33 | "click": { 34 | "hashes": [ 35 | "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", 36 | "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc" 37 | ], 38 | "index": "pypi", 39 | "version": "==7.1.2" 40 | }, 41 | "elasticsearch": { 42 | "hashes": [ 43 | "sha256:2ffbd746fc7d2db08e5ede29c822483705f29c4bf43b0875c238637d5d843d44", 44 | "sha256:92b534931865a186906873f75ae0b91808ff5036b0f2b9269eb5f6dc09644b55" 45 | ], 46 | "index": "pypi", 47 | "version": "==7.8.1" 48 | }, 49 | "idna": { 50 | "hashes": [ 51 | "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", 52 | "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" 53 | ], 54 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 55 | "version": "==2.10" 56 | }, 57 | "isodate": { 58 | "hashes": [ 59 | "sha256:2e364a3d5759479cdb2d37cce6b9376ea504db2ff90252a2e5b7cc89cc9ff2d8", 60 | "sha256:aa4d33c06640f5352aca96e4b81afd8ab3b47337cc12089822d6f322ac772c81" 61 | ], 62 | "version": "==0.6.0" 63 | }, 64 | "pyparsing": { 65 | "hashes": [ 66 | "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", 67 | "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" 68 | ], 69 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", 70 | "version": "==2.4.7" 71 | }, 72 | "rdflib": { 73 | "hashes": [ 74 | "sha256:78149dd49d385efec3b3adfbd61c87afaf1281c30d3fcaf1b323b34f603fb155", 75 | "sha256:88208ea971a87886d60ae2b1a4b2cdc263527af0454c422118d43fe64b357877" 76 | ], 77 | "version": "==5.0.0" 78 | }, 79 | "requests": { 80 | "hashes": [ 81 | "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b", 82 | "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898" 83 | ], 84 | "index": "pypi", 85 | "version": "==2.24.0" 86 | }, 87 | "six": { 88 | "hashes": [ 89 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", 90 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" 91 | ], 92 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 93 | "version": "==1.15.0" 94 | }, 95 | "sparqlwrapper": { 96 | "hashes": [ 97 | "sha256:17ec44b08b8ae2888c801066249f74fe328eec25d90203ce7eadaf82e64484c7", 98 | "sha256:357ee8a27bc910ea13d77836dbddd0b914991495b8cc1bf70676578155e962a8", 99 | "sha256:8cf6c21126ed76edc85c5c232fd6f77b9f61f8ad1db90a7147cdde2104aff145", 100 | "sha256:c7f9c9d8ebb13428771bc3b6dee54197422507dcc3dea34e30d5dcfc53478dec", 101 | "sha256:d6a66b5b8cda141660e07aeb00472db077a98d22cb588c973209c7336850fb3c" 102 | ], 103 | "index": "pypi", 104 | "version": "==1.8.5" 105 | }, 106 | "tqdm": { 107 | "hashes": [ 108 | "sha256:1a336d2b829be50e46b84668691e0a2719f26c97c62846298dd5ae2937e4d5cf", 109 | "sha256:564d632ea2b9cb52979f7956e093e831c28d441c11751682f84c86fc46e4fd21" 110 | ], 111 | "index": "pypi", 112 | "version": "==4.48.2" 113 | }, 114 | "urllib3": { 115 | "hashes": [ 116 | "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a", 117 | "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461" 118 | ], 119 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 120 | "version": "==1.25.10" 121 | } 122 | }, 123 | "develop": { 124 | "appdirs": { 125 | "hashes": [ 126 | "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", 127 | "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128" 128 | ], 129 | "version": "==1.4.4" 130 | }, 131 | "appnope": { 132 | "hashes": [ 133 | "sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0", 134 | "sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71" 135 | ], 136 | "markers": "sys_platform == 'darwin' and platform_system == 'Darwin'", 137 | "version": "==0.1.0" 138 | }, 139 | "argon2-cffi": { 140 | "hashes": [ 141 | "sha256:05a8ac07c7026542377e38389638a8a1e9b78f1cd8439cd7493b39f08dd75fbf", 142 | "sha256:0bf066bc049332489bb2d75f69216416329d9dc65deee127152caeb16e5ce7d5", 143 | "sha256:18dee20e25e4be86680b178b35ccfc5d495ebd5792cd00781548d50880fee5c5", 144 | "sha256:392c3c2ef91d12da510cfb6f9bae52512a4552573a9e27600bdb800e05905d2b", 145 | "sha256:57358570592c46c420300ec94f2ff3b32cbccd10d38bdc12dc6979c4a8484fbc", 146 | "sha256:6678bb047373f52bcff02db8afab0d2a77d83bde61cfecea7c5c62e2335cb203", 147 | "sha256:6ea92c980586931a816d61e4faf6c192b4abce89aa767ff6581e6ddc985ed003", 148 | "sha256:77e909cc756ef81d6abb60524d259d959bab384832f0c651ed7dcb6e5ccdbb78", 149 | "sha256:7d455c802727710e9dfa69b74ccaab04568386ca17b0ad36350b622cd34606fe", 150 | "sha256:9bee3212ba4f560af397b6d7146848c32a800652301843df06b9e8f68f0f7361", 151 | "sha256:9dfd5197852530294ecb5795c97a823839258dfd5eb9420233c7cfedec2058f2", 152 | "sha256:b160416adc0f012fb1f12588a5e6954889510f82f698e23ed4f4fa57f12a0647", 153 | "sha256:ba7209b608945b889457f949cc04c8e762bed4fe3fec88ae9a6b7765ae82e496", 154 | "sha256:cc0e028b209a5483b6846053d5fd7165f460a1f14774d79e632e75e7ae64b82b", 155 | "sha256:d8029b2d3e4b4cea770e9e5a0104dd8fa185c1724a0f01528ae4826a6d25f97d", 156 | "sha256:da7f0445b71db6d3a72462e04f36544b0de871289b0bc8a7cc87c0f5ec7079fa" 157 | ], 158 | "version": "==20.1.0" 159 | }, 160 | "astroid": { 161 | "hashes": [ 162 | "sha256:2f4078c2a41bf377eea06d71c9d2ba4eb8f6b1af2135bec27bbbb7d8f12bb703", 163 | "sha256:bc58d83eb610252fd8de6363e39d4f1d0619c894b0ed24603b881c02e64c7386" 164 | ], 165 | "markers": "python_version >= '3.5'", 166 | "version": "==2.4.2" 167 | }, 168 | "attrs": { 169 | "hashes": [ 170 | "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", 171 | "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" 172 | ], 173 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 174 | "version": "==19.3.0" 175 | }, 176 | "backcall": { 177 | "hashes": [ 178 | "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", 179 | "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255" 180 | ], 181 | "version": "==0.2.0" 182 | }, 183 | "black": { 184 | "hashes": [ 185 | "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b", 186 | "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539" 187 | ], 188 | "index": "pypi", 189 | "version": "==19.10b0" 190 | }, 191 | "bleach": { 192 | "hashes": [ 193 | "sha256:2bce3d8fab545a6528c8fa5d9f9ae8ebc85a56da365c7f85180bfe96a35ef22f", 194 | "sha256:3c4c520fdb9db59ef139915a5db79f8b51bc2a7257ea0389f30c846883430a4b" 195 | ], 196 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 197 | "version": "==3.1.5" 198 | }, 199 | "certifi": { 200 | "hashes": [ 201 | "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", 202 | "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" 203 | ], 204 | "version": "==2020.6.20" 205 | }, 206 | "cffi": { 207 | "hashes": [ 208 | "sha256:267adcf6e68d77ba154334a3e4fc921b8e63cbb38ca00d33d40655d4228502bc", 209 | "sha256:26f33e8f6a70c255767e3c3f957ccafc7f1f706b966e110b855bfe944511f1f9", 210 | "sha256:3cd2c044517f38d1b577f05927fb9729d3396f1d44d0c659a445599e79519792", 211 | "sha256:4a03416915b82b81af5502459a8a9dd62a3c299b295dcdf470877cb948d655f2", 212 | "sha256:4ce1e995aeecf7cc32380bc11598bfdfa017d592259d5da00fc7ded11e61d022", 213 | "sha256:4f53e4128c81ca3212ff4cf097c797ab44646a40b42ec02a891155cd7a2ba4d8", 214 | "sha256:4fa72a52a906425416f41738728268072d5acfd48cbe7796af07a923236bcf96", 215 | "sha256:66dd45eb9530e3dde8f7c009f84568bc7cac489b93d04ac86e3111fb46e470c2", 216 | "sha256:6923d077d9ae9e8bacbdb1c07ae78405a9306c8fd1af13bfa06ca891095eb995", 217 | "sha256:833401b15de1bb92791d7b6fb353d4af60dc688eaa521bd97203dcd2d124a7c1", 218 | "sha256:8416ed88ddc057bab0526d4e4e9f3660f614ac2394b5e019a628cdfff3733849", 219 | "sha256:892daa86384994fdf4856cb43c93f40cbe80f7f95bb5da94971b39c7f54b3a9c", 220 | "sha256:98be759efdb5e5fa161e46d404f4e0ce388e72fbf7d9baf010aff16689e22abe", 221 | "sha256:a6d28e7f14ecf3b2ad67c4f106841218c8ab12a0683b1528534a6c87d2307af3", 222 | "sha256:b1d6ebc891607e71fd9da71688fcf332a6630b7f5b7f5549e6e631821c0e5d90", 223 | "sha256:b2a2b0d276a136146e012154baefaea2758ef1f56ae9f4e01c612b0831e0bd2f", 224 | "sha256:b87dfa9f10a470eee7f24234a37d1d5f51e5f5fa9eeffda7c282e2b8f5162eb1", 225 | "sha256:bac0d6f7728a9cc3c1e06d4fcbac12aaa70e9379b3025b27ec1226f0e2d404cf", 226 | "sha256:c991112622baee0ae4d55c008380c32ecfd0ad417bcd0417ba432e6ba7328caa", 227 | "sha256:cda422d54ee7905bfc53ee6915ab68fe7b230cacf581110df4272ee10462aadc", 228 | "sha256:d3148b6ba3923c5850ea197a91a42683f946dba7e8eb82dfa211ab7e708de939", 229 | "sha256:d6033b4ffa34ef70f0b8086fd4c3df4bf801fee485a8a7d4519399818351aa8e", 230 | "sha256:ddff0b2bd7edcc8c82d1adde6dbbf5e60d57ce985402541cd2985c27f7bec2a0", 231 | "sha256:e23cb7f1d8e0f93addf0cae3c5b6f00324cccb4a7949ee558d7b6ca973ab8ae9", 232 | "sha256:effd2ba52cee4ceff1a77f20d2a9f9bf8d50353c854a282b8760ac15b9833168", 233 | "sha256:f90c2267101010de42f7273c94a1f026e56cbc043f9330acd8a80e64300aba33", 234 | "sha256:f960375e9823ae6a07072ff7f8a85954e5a6434f97869f50d0e41649a1c8144f", 235 | "sha256:fcf32bf76dc25e30ed793145a57426064520890d7c02866eb93d3e4abe516948" 236 | ], 237 | "version": "==1.14.1" 238 | }, 239 | "cfgv": { 240 | "hashes": [ 241 | "sha256:32e43d604bbe7896fe7c248a9c2276447dbef840feb28fe20494f62af110211d", 242 | "sha256:cf22deb93d4bcf92f345a5c3cd39d3d41d6340adc60c78bbbd6588c384fda6a1" 243 | ], 244 | "markers": "python_full_version >= '3.6.1'", 245 | "version": "==3.2.0" 246 | }, 247 | "chardet": { 248 | "hashes": [ 249 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", 250 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" 251 | ], 252 | "version": "==3.0.4" 253 | }, 254 | "click": { 255 | "hashes": [ 256 | "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", 257 | "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc" 258 | ], 259 | "index": "pypi", 260 | "version": "==7.1.2" 261 | }, 262 | "decorator": { 263 | "hashes": [ 264 | "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760", 265 | "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7" 266 | ], 267 | "version": "==4.4.2" 268 | }, 269 | "defusedxml": { 270 | "hashes": [ 271 | "sha256:6687150770438374ab581bb7a1b327a847dd9c5749e396102de3fad4e8a3ef93", 272 | "sha256:f684034d135af4c6cbb949b8a4d2ed61634515257a67299e5f940fbaa34377f5" 273 | ], 274 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 275 | "version": "==0.6.0" 276 | }, 277 | "distlib": { 278 | "hashes": [ 279 | "sha256:8c09de2c67b3e7deef7184574fc060ab8a793e7adbb183d942c389c8b13c52fb", 280 | "sha256:edf6116872c863e1aa9d5bb7cb5e05a022c519a4594dc703843343a9ddd9bff1" 281 | ], 282 | "version": "==0.3.1" 283 | }, 284 | "entrypoints": { 285 | "hashes": [ 286 | "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19", 287 | "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451" 288 | ], 289 | "markers": "python_version >= '2.7'", 290 | "version": "==0.3" 291 | }, 292 | "filelock": { 293 | "hashes": [ 294 | "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59", 295 | "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836" 296 | ], 297 | "version": "==3.0.12" 298 | }, 299 | "flake8": { 300 | "hashes": [ 301 | "sha256:15e351d19611c887e482fb960eae4d44845013cc142d42896e9862f775d8cf5c", 302 | "sha256:f04b9fcbac03b0a3e58c0ab3a0ecc462e023a9faf046d57794184028123aa208" 303 | ], 304 | "index": "pypi", 305 | "version": "==3.8.3" 306 | }, 307 | "identify": { 308 | "hashes": [ 309 | "sha256:110ed090fec6bce1aabe3c72d9258a9de82207adeaa5a05cd75c635880312f9a", 310 | "sha256:ccd88716b890ecbe10920659450a635d2d25de499b9a638525a48b48261d989b" 311 | ], 312 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 313 | "version": "==1.4.25" 314 | }, 315 | "idna": { 316 | "hashes": [ 317 | "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", 318 | "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" 319 | ], 320 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 321 | "version": "==2.10" 322 | }, 323 | "importlib-metadata": { 324 | "hashes": [ 325 | "sha256:90bb658cdbbf6d1735b6341ce708fc7024a3e14e99ffdc5783edea9f9b077f83", 326 | "sha256:dc15b2969b4ce36305c51eebe62d418ac7791e9a157911d58bfb1f9ccd8e2070" 327 | ], 328 | "markers": "python_version < '3.8'", 329 | "version": "==1.7.0" 330 | }, 331 | "iniconfig": { 332 | "hashes": [ 333 | "sha256:80cf40c597eb564e86346103f609d74efce0f6b4d4f30ec8ce9e2c26411ba437", 334 | "sha256:e5f92f89355a67de0595932a6c6c02ab4afddc6fcdc0bfc5becd0d60884d3f69" 335 | ], 336 | "version": "==1.0.1" 337 | }, 338 | "ipykernel": { 339 | "hashes": [ 340 | "sha256:9b2652af1607986a1b231c62302d070bc0534f564c393a5d9d130db9abbbe89d", 341 | "sha256:d6fbba26dba3cebd411382bc484f7bc2caa98427ae0ddb4ab37fe8bfeb5c7dd3" 342 | ], 343 | "markers": "python_version >= '3.5'", 344 | "version": "==5.3.4" 345 | }, 346 | "ipython": { 347 | "hashes": [ 348 | "sha256:5a8f159ca8b22b9a0a1f2a28befe5ad2b703339afb58c2ffe0d7c8d7a3af5999", 349 | "sha256:b70974aaa2674b05eb86a910c02ed09956a33f2dd6c71afc60f0b128a77e7f28" 350 | ], 351 | "markers": "python_version >= '3.7'", 352 | "version": "==7.17.0" 353 | }, 354 | "ipython-genutils": { 355 | "hashes": [ 356 | "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8", 357 | "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8" 358 | ], 359 | "version": "==0.2.0" 360 | }, 361 | "isort": { 362 | "hashes": [ 363 | "sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1", 364 | "sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd" 365 | ], 366 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 367 | "version": "==4.3.21" 368 | }, 369 | "jedi": { 370 | "hashes": [ 371 | "sha256:86ed7d9b750603e4ba582ea8edc678657fb4007894a12bcf6f4bb97892f31d20", 372 | "sha256:98cc583fa0f2f8304968199b01b6b4b94f469a1f4a74c1560506ca2a211378b5" 373 | ], 374 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 375 | "version": "==0.17.2" 376 | }, 377 | "jinja2": { 378 | "hashes": [ 379 | "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0", 380 | "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035" 381 | ], 382 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 383 | "version": "==2.11.2" 384 | }, 385 | "json5": { 386 | "hashes": [ 387 | "sha256:703cfee540790576b56a92e1c6aaa6c4b0d98971dc358ead83812aa4d06bdb96", 388 | "sha256:af1a1b9a2850c7f62c23fde18be4749b3599fd302f494eebf957e2ada6b9e42c" 389 | ], 390 | "version": "==0.9.5" 391 | }, 392 | "jsonschema": { 393 | "hashes": [ 394 | "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163", 395 | "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a" 396 | ], 397 | "version": "==3.2.0" 398 | }, 399 | "jupyter-client": { 400 | "hashes": [ 401 | "sha256:7ad9aa91505786420d77edc5f9fb170d51050c007338ba8d196f603223fd3b3a", 402 | "sha256:b360f8d4638bc577a4656e93f86298db755f915098dc763f6fc05da0c5d7a595" 403 | ], 404 | "markers": "python_version >= '3.5'", 405 | "version": "==6.1.6" 406 | }, 407 | "jupyter-core": { 408 | "hashes": [ 409 | "sha256:394fd5dd787e7c8861741880bdf8a00ce39f95de5d18e579c74b882522219e7e", 410 | "sha256:a4ee613c060fe5697d913416fc9d553599c05e4492d58fac1192c9a6844abb21" 411 | ], 412 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 413 | "version": "==4.6.3" 414 | }, 415 | "jupyterlab": { 416 | "hashes": [ 417 | "sha256:8aa9bc4b5020e7b9ec6e006d516d48bddf7d2528680af65840464ee722d59db3", 418 | "sha256:d0d743ea75b8eee20a18b96ccef24f76ee009bafb2617f3f330698fe3a00026e" 419 | ], 420 | "index": "pypi", 421 | "version": "==2.2.2" 422 | }, 423 | "jupyterlab-server": { 424 | "hashes": [ 425 | "sha256:5431d9dde96659364b7cc877693d5d21e7b80cea7ae3959ecc2b87518e5f5d8c", 426 | "sha256:55d256077bf13e5bc9e8fbd5aac51bef82f6315111cec6b712b9a5ededbba924" 427 | ], 428 | "markers": "python_version >= '3.5'", 429 | "version": "==1.2.0" 430 | }, 431 | "lazy-object-proxy": { 432 | "hashes": [ 433 | "sha256:0c4b206227a8097f05c4dbdd323c50edf81f15db3b8dc064d08c62d37e1a504d", 434 | "sha256:194d092e6f246b906e8f70884e620e459fc54db3259e60cf69a4d66c3fda3449", 435 | "sha256:1be7e4c9f96948003609aa6c974ae59830a6baecc5376c25c92d7d697e684c08", 436 | "sha256:4677f594e474c91da97f489fea5b7daa17b5517190899cf213697e48d3902f5a", 437 | "sha256:48dab84ebd4831077b150572aec802f303117c8cc5c871e182447281ebf3ac50", 438 | "sha256:5541cada25cd173702dbd99f8e22434105456314462326f06dba3e180f203dfd", 439 | "sha256:59f79fef100b09564bc2df42ea2d8d21a64fdcda64979c0fa3db7bdaabaf6239", 440 | "sha256:8d859b89baf8ef7f8bc6b00aa20316483d67f0b1cbf422f5b4dc56701c8f2ffb", 441 | "sha256:9254f4358b9b541e3441b007a0ea0764b9d056afdeafc1a5569eee1cc6c1b9ea", 442 | "sha256:9651375199045a358eb6741df3e02a651e0330be090b3bc79f6d0de31a80ec3e", 443 | "sha256:97bb5884f6f1cdce0099f86b907aa41c970c3c672ac8b9c8352789e103cf3156", 444 | "sha256:9b15f3f4c0f35727d3a0fba4b770b3c4ebbb1fa907dbcc046a1d2799f3edd142", 445 | "sha256:a2238e9d1bb71a56cd710611a1614d1194dc10a175c1e08d75e1a7bcc250d442", 446 | "sha256:a6ae12d08c0bf9909ce12385803a543bfe99b95fe01e752536a60af2b7797c62", 447 | "sha256:ca0a928a3ddbc5725be2dd1cf895ec0a254798915fb3a36af0964a0a4149e3db", 448 | "sha256:cb2c7c57005a6804ab66f106ceb8482da55f5314b7fcb06551db1edae4ad1531", 449 | "sha256:d74bb8693bf9cf75ac3b47a54d716bbb1a92648d5f781fc799347cfc95952383", 450 | "sha256:d945239a5639b3ff35b70a88c5f2f491913eb94871780ebfabb2568bd58afc5a", 451 | "sha256:eba7011090323c1dadf18b3b689845fd96a61ba0a1dfbd7f24b921398affc357", 452 | "sha256:efa1909120ce98bbb3777e8b6f92237f5d5c8ea6758efea36a473e1d38f7d3e4", 453 | "sha256:f3900e8a5de27447acbf900b4750b0ddfd7ec1ea7fbaf11dfa911141bc522af0" 454 | ], 455 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 456 | "version": "==1.4.3" 457 | }, 458 | "markupsafe": { 459 | "hashes": [ 460 | "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", 461 | "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", 462 | "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", 463 | "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", 464 | "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", 465 | "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", 466 | "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", 467 | "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", 468 | "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", 469 | "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", 470 | "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", 471 | "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b", 472 | "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", 473 | "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", 474 | "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", 475 | "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", 476 | "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", 477 | "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", 478 | "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", 479 | "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", 480 | "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", 481 | "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", 482 | "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", 483 | "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", 484 | "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", 485 | "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", 486 | "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", 487 | "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", 488 | "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", 489 | "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", 490 | "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2", 491 | "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7", 492 | "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be" 493 | ], 494 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 495 | "version": "==1.1.1" 496 | }, 497 | "mccabe": { 498 | "hashes": [ 499 | "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", 500 | "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" 501 | ], 502 | "version": "==0.6.1" 503 | }, 504 | "mistune": { 505 | "hashes": [ 506 | "sha256:59a3429db53c50b5c6bcc8a07f8848cb00d7dc8bdb431a4ab41920d201d4756e", 507 | "sha256:88a1051873018da288eee8538d476dffe1262495144b33ecb586c4ab266bb8d4" 508 | ], 509 | "version": "==0.8.4" 510 | }, 511 | "more-itertools": { 512 | "hashes": [ 513 | "sha256:68c70cc7167bdf5c7c9d8f6954a7837089c6a36bf565383919bb595efb8a17e5", 514 | "sha256:b78134b2063dd214000685165d81c154522c3ee0a1c0d4d113c80361c234c5a2" 515 | ], 516 | "markers": "python_version >= '3.5'", 517 | "version": "==8.4.0" 518 | }, 519 | "nbconvert": { 520 | "hashes": [ 521 | "sha256:21fb48e700b43e82ba0e3142421a659d7739b65568cc832a13976a77be16b523", 522 | "sha256:f0d6ec03875f96df45aa13e21fd9b8450c42d7e1830418cccc008c0df725fcee" 523 | ], 524 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 525 | "version": "==5.6.1" 526 | }, 527 | "nbformat": { 528 | "hashes": [ 529 | "sha256:54d4d6354835a936bad7e8182dcd003ca3dc0cedfee5a306090e04854343b340", 530 | "sha256:ea55c9b817855e2dfcd3f66d74857342612a60b1f09653440f4a5845e6e3523f" 531 | ], 532 | "markers": "python_version >= '3.5'", 533 | "version": "==5.0.7" 534 | }, 535 | "nodeenv": { 536 | "hashes": [ 537 | "sha256:4b0b77afa3ba9b54f4b6396e60b0c83f59eaeb2d63dc3cc7a70f7f4af96c82bc" 538 | ], 539 | "version": "==1.4.0" 540 | }, 541 | "notebook": { 542 | "hashes": [ 543 | "sha256:42391d8f3b88676e774316527599e49c11f3a7e51c41035e9e44c1b58e1398d5", 544 | "sha256:4cc4e44a43a83a7c2f5e85bfdbbfe1c68bed91b857741df9e593d213a6fc2d27" 545 | ], 546 | "markers": "python_version >= '3.5'", 547 | "version": "==6.1.1" 548 | }, 549 | "packaging": { 550 | "hashes": [ 551 | "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8", 552 | "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181" 553 | ], 554 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 555 | "version": "==20.4" 556 | }, 557 | "pandocfilters": { 558 | "hashes": [ 559 | "sha256:b3dd70e169bb5449e6bc6ff96aea89c5eea8c5f6ab5e207fc2f521a2cf4a0da9" 560 | ], 561 | "version": "==1.4.2" 562 | }, 563 | "parso": { 564 | "hashes": [ 565 | "sha256:97218d9159b2520ff45eb78028ba8b50d2bc61dcc062a9682666f2dc4bd331ea", 566 | "sha256:caba44724b994a8a5e086460bb212abc5a8bc46951bf4a9a1210745953622eb9" 567 | ], 568 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 569 | "version": "==0.7.1" 570 | }, 571 | "pathspec": { 572 | "hashes": [ 573 | "sha256:7d91249d21749788d07a2d0f94147accd8f845507400749ea19c1ec9054a12b0", 574 | "sha256:da45173eb3a6f2a5a487efba21f050af2b41948be6ab52b6a1e3ff22bb8b7061" 575 | ], 576 | "version": "==0.8.0" 577 | }, 578 | "pexpect": { 579 | "hashes": [ 580 | "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937", 581 | "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c" 582 | ], 583 | "markers": "sys_platform != 'win32'", 584 | "version": "==4.8.0" 585 | }, 586 | "pickleshare": { 587 | "hashes": [ 588 | "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", 589 | "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56" 590 | ], 591 | "version": "==0.7.5" 592 | }, 593 | "pluggy": { 594 | "hashes": [ 595 | "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0", 596 | "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d" 597 | ], 598 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 599 | "version": "==0.13.1" 600 | }, 601 | "pre-commit": { 602 | "hashes": [ 603 | "sha256:1657663fdd63a321a4a739915d7d03baedd555b25054449090f97bb0cb30a915", 604 | "sha256:e8b1315c585052e729ab7e99dcca5698266bedce9067d21dc909c23e3ceed626" 605 | ], 606 | "index": "pypi", 607 | "version": "==2.6.0" 608 | }, 609 | "prometheus-client": { 610 | "hashes": [ 611 | "sha256:983c7ac4b47478720db338f1491ef67a100b474e3bc7dafcbaefb7d0b8f9b01c", 612 | "sha256:c6e6b706833a6bd1fd51711299edee907857be10ece535126a158f911ee80915" 613 | ], 614 | "version": "==0.8.0" 615 | }, 616 | "prompt-toolkit": { 617 | "hashes": [ 618 | "sha256:563d1a4140b63ff9dd587bda9557cffb2fe73650205ab6f4383092fb882e7dc8", 619 | "sha256:df7e9e63aea609b1da3a65641ceaf5bc7d05e0a04de5bd45d05dbeffbabf9e04" 620 | ], 621 | "markers": "python_full_version >= '3.6.1'", 622 | "version": "==3.0.5" 623 | }, 624 | "ptyprocess": { 625 | "hashes": [ 626 | "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0", 627 | "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f" 628 | ], 629 | "markers": "os_name != 'nt'", 630 | "version": "==0.6.0" 631 | }, 632 | "py": { 633 | "hashes": [ 634 | "sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2", 635 | "sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342" 636 | ], 637 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 638 | "version": "==1.9.0" 639 | }, 640 | "pycodestyle": { 641 | "hashes": [ 642 | "sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367", 643 | "sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e" 644 | ], 645 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 646 | "version": "==2.6.0" 647 | }, 648 | "pycparser": { 649 | "hashes": [ 650 | "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0", 651 | "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705" 652 | ], 653 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 654 | "version": "==2.20" 655 | }, 656 | "pyflakes": { 657 | "hashes": [ 658 | "sha256:0d94e0e05a19e57a99444b6ddcf9a6eb2e5c68d3ca1e98e90707af8152c90a92", 659 | "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8" 660 | ], 661 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 662 | "version": "==2.2.0" 663 | }, 664 | "pygments": { 665 | "hashes": [ 666 | "sha256:647344a061c249a3b74e230c739f434d7ea4d8b1d5f3721bc0f3558049b38f44", 667 | "sha256:ff7a40b4860b727ab48fad6360eb351cc1b33cbf9b15a0f689ca5353e9463324" 668 | ], 669 | "markers": "python_version >= '3.5'", 670 | "version": "==2.6.1" 671 | }, 672 | "pylint": { 673 | "hashes": [ 674 | "sha256:7dd78437f2d8d019717dbf287772d0b2dbdfd13fc016aa7faa08d67bccc46adc", 675 | "sha256:d0ece7d223fe422088b0e8f13fa0a1e8eb745ebffcb8ed53d3e95394b6101a1c" 676 | ], 677 | "index": "pypi", 678 | "version": "==2.5.3" 679 | }, 680 | "pyparsing": { 681 | "hashes": [ 682 | "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", 683 | "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" 684 | ], 685 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", 686 | "version": "==2.4.7" 687 | }, 688 | "pyrsistent": { 689 | "hashes": [ 690 | "sha256:28669905fe725965daa16184933676547c5bb40a5153055a8dee2a4bd7933ad3" 691 | ], 692 | "version": "==0.16.0" 693 | }, 694 | "pytest": { 695 | "hashes": [ 696 | "sha256:85228d75db9f45e06e57ef9bf4429267f81ac7c0d742cc9ed63d09886a9fe6f4", 697 | "sha256:8b6007800c53fdacd5a5c192203f4e531eb2a1540ad9c752e052ec0f7143dbad" 698 | ], 699 | "index": "pypi", 700 | "version": "==6.0.1" 701 | }, 702 | "python-dateutil": { 703 | "hashes": [ 704 | "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", 705 | "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a" 706 | ], 707 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 708 | "version": "==2.8.1" 709 | }, 710 | "pyyaml": { 711 | "hashes": [ 712 | "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97", 713 | "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76", 714 | "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2", 715 | "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648", 716 | "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf", 717 | "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f", 718 | "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2", 719 | "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee", 720 | "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d", 721 | "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c", 722 | "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a" 723 | ], 724 | "version": "==5.3.1" 725 | }, 726 | "pyzmq": { 727 | "hashes": [ 728 | "sha256:00dca814469436455399660247d74045172955459c0bd49b54a540ce4d652185", 729 | "sha256:046b92e860914e39612e84fa760fc3f16054d268c11e0e25dcb011fb1bc6a075", 730 | "sha256:09d24a80ccb8cbda1af6ed8eb26b005b6743e58e9290566d2a6841f4e31fa8e0", 731 | "sha256:0a422fc290d03958899743db091f8154958410fc76ce7ee0ceb66150f72c2c97", 732 | "sha256:276ad604bffd70992a386a84bea34883e696a6b22e7378053e5d3227321d9702", 733 | "sha256:296540a065c8c21b26d63e3cea2d1d57902373b16e4256afe46422691903a438", 734 | "sha256:29d51279060d0a70f551663bc592418bcad7f4be4eea7b324f6dd81de05cb4c1", 735 | "sha256:36ab114021c0cab1a423fe6689355e8f813979f2c750968833b318c1fa10a0fd", 736 | "sha256:3fa6debf4bf9412e59353defad1f8035a1e68b66095a94ead8f7a61ae90b2675", 737 | "sha256:5120c64646e75f6db20cc16b9a94203926ead5d633de9feba4f137004241221d", 738 | "sha256:59f1e54627483dcf61c663941d94c4af9bf4163aec334171686cdaee67974fe5", 739 | "sha256:5d9fc809aa8d636e757e4ced2302569d6e60e9b9c26114a83f0d9d6519c40493", 740 | "sha256:654d3e06a4edc566b416c10293064732516cf8871a4522e0a2ba00cc2a2e600c", 741 | "sha256:720d2b6083498a9281eaee3f2927486e9fe02cd16d13a844f2e95217f243efea", 742 | "sha256:73483a2caaa0264ac717af33d6fb3f143d8379e60a422730ee8d010526ce1913", 743 | "sha256:8a6ada5a3f719bf46a04ba38595073df8d6b067316c011180102ba2a1925f5b5", 744 | "sha256:8b66b94fe6243d2d1d89bca336b2424399aac57932858b9a30309803ffc28112", 745 | "sha256:99cc0e339a731c6a34109e5c4072aaa06d8e32c0b93dc2c2d90345dd45fa196c", 746 | "sha256:a7e7f930039ee0c4c26e4dfee015f20bd6919cd8b97c9cd7afbde2923a5167b6", 747 | "sha256:ab0d01148d13854de716786ca73701012e07dff4dfbbd68c4e06d8888743526e", 748 | "sha256:c1a31cd42905b405530e92bdb70a8a56f048c8a371728b8acf9d746ecd4482c0", 749 | "sha256:c20dd60b9428f532bc59f2ef6d3b1029a28fc790d408af82f871a7db03e722ff", 750 | "sha256:c36ffe1e5aa35a1af6a96640d723d0d211c5f48841735c2aa8d034204e87eb87", 751 | "sha256:c40fbb2b9933369e994b837ee72193d6a4c35dfb9a7c573257ef7ff28961272c", 752 | "sha256:d46fb17f5693244de83e434648b3dbb4f4b0fec88415d6cbab1c1452b6f2ae17", 753 | "sha256:e36f12f503511d72d9bdfae11cadbadca22ff632ff67c1b5459f69756a029c19", 754 | "sha256:f1a25a61495b6f7bb986accc5b597a3541d9bd3ef0016f50be16dbb32025b302", 755 | "sha256:fa411b1d8f371d3a49d31b0789eb6da2537dadbb2aef74a43aa99a78195c3f76" 756 | ], 757 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 758 | "version": "==19.0.2" 759 | }, 760 | "regex": { 761 | "hashes": [ 762 | "sha256:0dc64ee3f33cd7899f79a8d788abfbec168410be356ed9bd30bbd3f0a23a7204", 763 | "sha256:1269fef3167bb52631ad4fa7dd27bf635d5a0790b8e6222065d42e91bede4162", 764 | "sha256:14a53646369157baa0499513f96091eb70382eb50b2c82393d17d7ec81b7b85f", 765 | "sha256:3a3af27a8d23143c49a3420efe5b3f8cf1a48c6fc8bc6856b03f638abc1833bb", 766 | "sha256:46bac5ca10fb748d6c55843a931855e2727a7a22584f302dd9bb1506e69f83f6", 767 | "sha256:4c037fd14c5f4e308b8370b447b469ca10e69427966527edcab07f52d88388f7", 768 | "sha256:51178c738d559a2d1071ce0b0f56e57eb315bcf8f7d4cf127674b533e3101f88", 769 | "sha256:5ea81ea3dbd6767873c611687141ec7b06ed8bab43f68fad5b7be184a920dc99", 770 | "sha256:6961548bba529cac7c07af2fd4d527c5b91bb8fe18995fed6044ac22b3d14644", 771 | "sha256:75aaa27aa521a182824d89e5ab0a1d16ca207318a6b65042b046053cfc8ed07a", 772 | "sha256:7a2dd66d2d4df34fa82c9dc85657c5e019b87932019947faece7983f2089a840", 773 | "sha256:8a51f2c6d1f884e98846a0a9021ff6861bdb98457879f412fdc2b42d14494067", 774 | "sha256:9c568495e35599625f7b999774e29e8d6b01a6fb684d77dee1f56d41b11b40cd", 775 | "sha256:9eddaafb3c48e0900690c1727fba226c4804b8e6127ea409689c3bb492d06de4", 776 | "sha256:bbb332d45b32df41200380fff14712cb6093b61bd142272a10b16778c418e98e", 777 | "sha256:bc3d98f621898b4a9bc7fecc00513eec8f40b5b83913d74ccb445f037d58cd89", 778 | "sha256:c11d6033115dc4887c456565303f540c44197f4fc1a2bfb192224a301534888e", 779 | "sha256:c50a724d136ec10d920661f1442e4a8b010a4fe5aebd65e0c2241ea41dbe93dc", 780 | "sha256:d0a5095d52b90ff38592bbdc2644f17c6d495762edf47d876049cfd2968fbccf", 781 | "sha256:d6cff2276e502b86a25fd10c2a96973fdb45c7a977dca2138d661417f3728341", 782 | "sha256:e46d13f38cfcbb79bfdb2964b0fe12561fe633caf964a77a5f8d4e45fe5d2ef7" 783 | ], 784 | "version": "==2020.7.14" 785 | }, 786 | "requests": { 787 | "hashes": [ 788 | "sha256:b3559a131db72c33ee969480840fff4bb6dd111de7dd27c8ee1f820f4f00231b", 789 | "sha256:fe75cc94a9443b9246fc7049224f75604b113c36acb93f87b80ed42c44cbb898" 790 | ], 791 | "index": "pypi", 792 | "version": "==2.24.0" 793 | }, 794 | "send2trash": { 795 | "hashes": [ 796 | "sha256:60001cc07d707fe247c94f74ca6ac0d3255aabcb930529690897ca2a39db28b2", 797 | "sha256:f1691922577b6fa12821234aeb57599d887c4900b9ca537948d2dac34aea888b" 798 | ], 799 | "version": "==1.5.0" 800 | }, 801 | "six": { 802 | "hashes": [ 803 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", 804 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" 805 | ], 806 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 807 | "version": "==1.15.0" 808 | }, 809 | "terminado": { 810 | "hashes": [ 811 | "sha256:4804a774f802306a7d9af7322193c5390f1da0abb429e082a10ef1d46e6fb2c2", 812 | "sha256:a43dcb3e353bc680dd0783b1d9c3fc28d529f190bc54ba9a229f72fe6e7a54d7" 813 | ], 814 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 815 | "version": "==0.8.3" 816 | }, 817 | "testpath": { 818 | "hashes": [ 819 | "sha256:60e0a3261c149755f4399a1fff7d37523179a70fdc3abdf78de9fc2604aeec7e", 820 | "sha256:bfcf9411ef4bf3db7579063e0546938b1edda3d69f4e1fb8756991f5951f85d4" 821 | ], 822 | "version": "==0.4.4" 823 | }, 824 | "toml": { 825 | "hashes": [ 826 | "sha256:926b612be1e5ce0634a2ca03470f95169cf16f939018233a670519cb4ac58b0f", 827 | "sha256:bda89d5935c2eac546d648028b9901107a595863cb36bae0c73ac804a9b4ce88" 828 | ], 829 | "version": "==0.10.1" 830 | }, 831 | "tornado": { 832 | "hashes": [ 833 | "sha256:0fe2d45ba43b00a41cd73f8be321a44936dc1aba233dee979f17a042b83eb6dc", 834 | "sha256:22aed82c2ea340c3771e3babc5ef220272f6fd06b5108a53b4976d0d722bcd52", 835 | "sha256:2c027eb2a393d964b22b5c154d1a23a5f8727db6fda837118a776b29e2b8ebc6", 836 | "sha256:5217e601700f24e966ddab689f90b7ea4bd91ff3357c3600fa1045e26d68e55d", 837 | "sha256:5618f72e947533832cbc3dec54e1dffc1747a5cb17d1fd91577ed14fa0dc081b", 838 | "sha256:5f6a07e62e799be5d2330e68d808c8ac41d4a259b9cea61da4101b83cb5dc673", 839 | "sha256:c58d56003daf1b616336781b26d184023ea4af13ae143d9dda65e31e534940b9", 840 | "sha256:c952975c8ba74f546ae6de2e226ab3cc3cc11ae47baf607459a6728585bb542a", 841 | "sha256:c98232a3ac391f5faea6821b53db8db461157baa788f5d6222a193e9456e1740" 842 | ], 843 | "markers": "python_version >= '3.5'", 844 | "version": "==6.0.4" 845 | }, 846 | "traitlets": { 847 | "hashes": [ 848 | "sha256:70b4c6a1d9019d7b4f6846832288f86998aa3b9207c6821f3578a6a6a467fe44", 849 | "sha256:d023ee369ddd2763310e4c3eae1ff649689440d4ae59d7485eb4cfbbe3e359f7" 850 | ], 851 | "version": "==4.3.3" 852 | }, 853 | "typed-ast": { 854 | "hashes": [ 855 | "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355", 856 | "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919", 857 | "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa", 858 | "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652", 859 | "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75", 860 | "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01", 861 | "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d", 862 | "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1", 863 | "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907", 864 | "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c", 865 | "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3", 866 | "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b", 867 | "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614", 868 | "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb", 869 | "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b", 870 | "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41", 871 | "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6", 872 | "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34", 873 | "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe", 874 | "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4", 875 | "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7" 876 | ], 877 | "markers": "python_version < '3.8' and implementation_name == 'cpython'", 878 | "version": "==1.4.1" 879 | }, 880 | "urllib3": { 881 | "hashes": [ 882 | "sha256:91056c15fa70756691db97756772bb1eb9678fa585d9184f24534b100dc60f4a", 883 | "sha256:e7983572181f5e1522d9c98453462384ee92a0be7fac5f1413a1e35c56cc0461" 884 | ], 885 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 886 | "version": "==1.25.10" 887 | }, 888 | "virtualenv": { 889 | "hashes": [ 890 | "sha256:8aa9c37b082664dbce2236fa420759c02d64109d8e6013593ad13914718a30fd", 891 | "sha256:f14a0a98ea4397f0d926cff950361766b6a73cd5975ae7eb259d12919f819a25" 892 | ], 893 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 894 | "version": "==20.0.29" 895 | }, 896 | "wcwidth": { 897 | "hashes": [ 898 | "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784", 899 | "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83" 900 | ], 901 | "version": "==0.2.5" 902 | }, 903 | "webencodings": { 904 | "hashes": [ 905 | "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", 906 | "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" 907 | ], 908 | "version": "==0.5.1" 909 | }, 910 | "wrapt": { 911 | "hashes": [ 912 | "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7" 913 | ], 914 | "version": "==1.12.1" 915 | }, 916 | "zipp": { 917 | "hashes": [ 918 | "sha256:aa36550ff0c0b7ef7fa639055d797116ee891440eac1a56f378e2d3179e0320b", 919 | "sha256:c599e4d75c98f6798c509911d08a22e6c021d074469042177c8c86fb92eefd96" 920 | ], 921 | "markers": "python_version >= '3.6'", 922 | "version": "==3.1.0" 923 | } 924 | } 925 | } 926 | --------------------------------------------------------------------------------