├── .gitattributes ├── .github ├── COC_CONTACT.md ├── CODE_OF_CONDUCT.md ├── resources │ ├── images │ │ └── wikidata_logo.png │ └── logo │ │ ├── wikirepo_logo.png │ │ └── wikirepo_logo_transparent.png └── workflows │ └── ci.yml ├── .gitignore ├── .readthedocs.yaml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── codecov.yml ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── conf.py │ ├── data │ ├── data_utils.rst │ ├── index.rst │ ├── lctn_utils.rst │ ├── query.rst │ ├── time_utils.rst │ ├── upload.rst │ └── wd_utils.rst │ ├── data_property_directories │ ├── climate │ │ └── index.rst │ ├── demographic │ │ └── index.rst │ ├── economic │ │ └── index.rst │ ├── electoral_results │ │ └── index.rst │ ├── geographic │ │ └── index.rst │ ├── index.rst │ ├── institutional │ │ └── index.rst │ ├── misc │ │ └── index.rst │ └── political │ │ └── index.rst │ ├── index.rst │ ├── maps │ └── index.rst │ ├── notes.rst │ └── utils.rst ├── environment.yml ├── examples ├── add_data.ipynb └── add_property.ipynb ├── requirements.txt ├── setup.cfg ├── setup.py ├── src └── wikirepo │ ├── __init__.py │ ├── data │ ├── __init__.py │ ├── climate │ │ ├── __init__.py │ │ ├── aqi.py │ │ ├── precipitation.py │ │ └── temperature.py │ ├── data_utils.py │ ├── demographic │ │ ├── __init__.py │ │ ├── ethnic_div.py │ │ ├── life_expectancy.py │ │ ├── literacy.py │ │ ├── out_of_school_children.py │ │ └── population.py │ ├── economic │ │ ├── __init__.py │ │ ├── gdp_ppp.py │ │ ├── gini.py │ │ ├── inflation_rate.py │ │ ├── median_income.py │ │ ├── nom_gdp.py │ │ ├── nom_gdp_per_cap.py │ │ ├── ppp_gdp_per_cap.py │ │ ├── total_reserves.py │ │ └── unemployment.py │ ├── electoral_polls │ │ └── __init__.py │ ├── electoral_results │ │ ├── __init__.py │ │ ├── ballot_measures.py │ │ ├── pri_executive.py │ │ ├── pri_lower_house.py │ │ ├── pri_sup_executive.py │ │ ├── pri_sup_lower_house.py │ │ ├── pri_sup_upper_house.py │ │ ├── pri_upper_house.py │ │ ├── rd1_executive.py │ │ ├── rd1_lower_house.py │ │ ├── rd1_sup_executive.py │ │ ├── rd1_sup_lower_house.py │ │ ├── rd1_sup_upper_house.py │ │ ├── rd1_upper_house.py │ │ ├── rd2_executive.py │ │ ├── rd2_lower_house.py │ │ ├── rd2_sup_executive.py │ │ ├── rd2_sup_lower_house.py │ │ ├── rd2_sup_upper_house.py │ │ ├── rd2_upper_house.py │ │ └── sup_ballot_measures.py │ ├── geographic │ │ ├── __init__.py │ │ ├── area.py │ │ ├── continent.py │ │ ├── country.py │ │ └── sub_electoral_region.py │ ├── institutional │ │ ├── __init__.py │ │ ├── bti_gov_idx.py │ │ ├── bti_status_idx.py │ │ ├── capital.py │ │ ├── fh_category.py │ │ ├── human_dev_idx.py │ │ └── org_membership.py │ ├── lctn_utils.py │ ├── misc │ │ ├── __init__.py │ │ ├── country_abbr.py │ │ └── sub_country_abbr.py │ ├── political │ │ ├── __init__.py │ │ ├── executive.py │ │ ├── num_seats_lower_house.py │ │ ├── num_seats_upper_house.py │ │ ├── rep_lower_house.py │ │ ├── rep_upper_house.py │ │ └── sup_executive.py │ ├── query.py │ ├── time_utils.py │ ├── upload.py │ └── wd_utils.py │ ├── maps │ ├── __init__.py │ ├── map_utils.py │ ├── query.py │ └── upload.py │ └── utils.py └── tests ├── __init__.py ├── conftest.py ├── test_data_utils.py ├── test_lctn_utils.py ├── test_time_utils.py ├── test_utils.py └── test_wd_utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Source files 2 | *.pxd text diff=python 3 | *.py text diff=python 4 | *.py3 text diff=python 5 | *.pyw text diff=python 6 | *.pyx text diff=python 7 | *.pyz text diff=python 8 | *.pyi text diff=python 9 | 10 | # Binary files 11 | *.db binary 12 | *.p binary 13 | *.pkl binary 14 | *.pickle binary 15 | *.pyc binary 16 | *.pyd binary 17 | *.pyo binary 18 | 19 | # Jupyter notebook 20 | *.ipynb text 21 | -------------------------------------------------------------------------------- /.github/COC_CONTACT.md: -------------------------------------------------------------------------------- 1 | Issues pertaining to this project's [code of conduct](https://github.com/andrewtavis/wikirepo/blob/main/.github/CODE_OF_CONDUCT.md) can be reported to: 2 | 3 | Andrew Tavis McAllister (andrew.t.mcallister@gmail.com) 4 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 65 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 66 | reported to the community leaders responsible for enforcement - these being the 67 | main project stakeholders or individuals specifically tasked with overseeing the 68 | values described within this code. Contact details must be readily provided for 69 | the latter party, and for the former if no community leader has been given the 70 | specific responsibility to oversee community conduct. 71 | 72 | Within source code, the contact for those responsible for community conduct can 73 | be found in a corresponding COC_CONTACT file. All complaints will be reviewed and 74 | investigated promptly and fairly. 75 | 76 | All community leaders are obligated to respect the privacy and security of the 77 | reporter of any incident. 78 | 79 | ## Enforcement Guidelines 80 | 81 | Community leaders will follow these Community Impact Guidelines in determining 82 | the consequences for any action they deem in violation of this Code of Conduct: 83 | 84 | ### 1. Correction 85 | 86 | **Community Impact**: Use of inappropriate language or other behavior deemed 87 | unprofessional or unwelcome in the community. 88 | 89 | **Consequence**: A private, written warning from community leaders, providing 90 | clarity around the nature of the violation and an explanation of why the 91 | behavior was inappropriate. A public apology may be requested. 92 | 93 | ### 2. Warning 94 | 95 | **Community Impact**: A violation through a single incident or series 96 | of actions. 97 | 98 | **Consequence**: A warning with consequences for continued behavior. No 99 | interaction with the people involved, including unsolicited interaction with 100 | those enforcing the Code of Conduct, for a specified period of time. This 101 | includes avoiding interactions in community spaces as well as external channels 102 | like social media. Violating these terms may lead to a temporary or 103 | permanent ban. 104 | 105 | ### 3. Temporary Ban 106 | 107 | **Community Impact**: A serious violation of community standards, including 108 | sustained inappropriate behavior. 109 | 110 | **Consequence**: A temporary ban from any sort of interaction or public 111 | communication with the community for a specified period of time. No public or 112 | private interaction with the people involved, including unsolicited interaction 113 | with those enforcing the Code of Conduct, is allowed during this period. 114 | Violating these terms may lead to a permanent ban. 115 | 116 | ### 4. Permanent Ban 117 | 118 | **Community Impact**: Demonstrating a pattern of violation of community 119 | standards, including sustained inappropriate behavior, harassment of an 120 | individual, or aggression toward or disparagement of classes of individuals. 121 | 122 | **Consequence**: A permanent ban from any sort of public interaction within 123 | the community. 124 | 125 | ## Attribution 126 | 127 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 128 | version 2.0, available at 129 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. 130 | 131 | Community Impact Guidelines were inspired by 132 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 133 | 134 | For answers to common questions about this code of conduct, see the FAQ at 135 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available 136 | at [https://www.contributor-covenant.org/translations][translations]. 137 | 138 | [homepage]: https://www.contributor-covenant.org 139 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html 140 | [Mozilla CoC]: https://github.com/mozilla/diversity 141 | [FAQ]: https://www.contributor-covenant.org/faq 142 | [translations]: https://www.contributor-covenant.org/translations 143 | -------------------------------------------------------------------------------- /.github/resources/images/wikidata_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/.github/resources/images/wikidata_logo.png -------------------------------------------------------------------------------- /.github/resources/logo/wikirepo_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/.github/resources/logo/wikirepo_logo.png -------------------------------------------------------------------------------- /.github/resources/logo/wikirepo_logo_transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/.github/resources/logo/wikirepo_logo_transparent.png -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | branches: [main] 6 | # schedule: 7 | # - cron: '0 0 * * 0' # weekly 8 | 9 | jobs: 10 | run: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [macos-latest] 15 | python-version: [3.8] 16 | env: 17 | OS: ${{ matrix.os }} 18 | PYTHON: ${{ matrix.python-version }} 19 | 20 | steps: 21 | - name: Clone repository 22 | uses: actions/checkout@v2 23 | - name: Setup Python 24 | uses: actions/setup-python@main 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Add conda to system path 28 | uses: conda-incubator/setup-miniconda@v2 29 | with: 30 | auto-update-conda: true 31 | python-version: ${{ matrix.python-version }} 32 | - name: Install dependencies 33 | run: | 34 | eval "$(conda shell.bash hook)" 35 | conda env create --file environment.yml 36 | # - name: Build 37 | # eval "$(conda shell.bash hook)" 38 | # pip install -e . 39 | - name: Test and generate coverage 40 | run: | 41 | eval "$(conda shell.bash hook)" 42 | conda activate wikirepo-dev 43 | export PYTHONPATH="./src" 44 | pytest --cov=src tests --cov-report=xml 45 | - name: Upload coverage to Codecov 46 | uses: codecov/codecov-action@v2 47 | with: 48 | fail_ci_if_error: true 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Editor files 2 | ############## 3 | .vscode/ 4 | .ipynb_checkpoints 5 | 6 | # OS files 7 | ########## 8 | .DS_Store 9 | 10 | # Python files 11 | ############## 12 | # setup.py working directory 13 | build 14 | # setup.py dist directory 15 | dist 16 | # Egg metadata 17 | *.egg-info 18 | # Caches 19 | __pycache__ 20 | # Virtual Environments 21 | .env 22 | .venv 23 | env 24 | venv 25 | wikirepo-dev 26 | 27 | # Display files 28 | ############### 29 | *_preview.png 30 | *.sketch 31 | 32 | # Testing files 33 | ############### 34 | .coverage 35 | _*.ipynb 36 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.11" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/source/conf.py 17 | 18 | # We recommend specifying your dependencies to enable reproducible builds: 19 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 20 | python: 21 | install: 22 | - requirements: docs/requirements.txt 23 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | wikirepo tries to follow [semantic versioning](https://semver.org/), a MAJOR.MINOR.PATCH version where increments are made of the: 4 | 5 | - MAJOR version when we make incompatible API changes 6 | - MINOR version when we add functionality in a backwards compatible manner 7 | - PATCH version when we make backwards compatible bug fixes 8 | 9 | # wikirepo 1.0.0 (December 28th, 2021) 10 | 11 | - Release switches wikirepo over to [semantic versioning](https://semver.org/) and indicates that it is stable 12 | 13 | # wikirepo 0.1.1.5 (March 28th, 2021) 14 | 15 | Changes include: 16 | 17 | - An src structure has been adopted for easier testing and to fix wheel distribution issues 18 | - Code quality is now checked with Codacy 19 | - Extensive code formatting to improve quality and style 20 | - Fixes to vulnerabilities through exception use 21 | 22 | # wikirepo 0.1.0 (Feb 23rd, 2021) 23 | 24 | First stable release of wikirepo 25 | 26 | Changes include: 27 | 28 | - Full documentation of the package 29 | - Virtual environment files 30 | - Bug fixes 31 | - Extensive testing of all modules with GH Actions and Codecov 32 | - Code of conduct and contribution guidelines 33 | 34 | # wikirepo 0.0.2 (Dec 8th, 2020) 35 | 36 | The minimum viable product of wikirepo: 37 | 38 | - Users are able to query data from [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) given locations, depth, time_lvl, and timespan arguments 39 | - String arguments are accepted for Earth, continents, countries and disputed territories 40 | - Data for greater depths can be retrieved by creating a dictionary given initial starting locations and going to greater depths using the [contains administrative territorial entity property](https://www.wikidata.org/wiki/Property:P150) 41 | - Data is formatted and loaded into a pandas dataframe for further manipulation 42 | - All available social science properties on Wikidata have had modules created for them 43 | - Estimated load times and progress are given 44 | - The project's scope and general roadmap have been defined and detailed in the README 45 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to wikirepo 2 | 3 | Thank you for your consideration in contributing to this project! 4 | 5 | Please take a moment to review this document in order to make the contribution process easy and effective for everyone involved. 6 | 7 | Following these guidelines helps to communicate that you respect the time of the developers managing and developing this open source project. In return, and in accordance with this project's [code of conduct](https://github.com/andrewtavis/wikirepo/blob/main/.github/CODE_OF_CONDUCT.md), other contributors will reciprocate that respect in addressing your issue or assessing patches and features. 8 | 9 | 10 | ## Using the issue tracker 11 | 12 | The [issue tracker for wikirepo](https://github.com/andrewtavis/wikirepo/issues) is the preferred channel for [bug reports](#bug-reports), [features requests](#feature-requests) and [submitting pull requests](#pull-requests). 13 | 14 | 15 | 16 | ## Bug reports 17 | 18 | A bug is a _demonstrable problem_ that is caused by the code in the repository. Good bug reports are extremely helpful - thank you! 19 | 20 | Guidelines for bug reports: 21 | 22 | 1. **Use the GitHub issue search** to check if the issue has already been reported. 23 | 24 | 2. **Check if the issue has been fixed** by trying to reproduce it using the latest `main` or development branch in the repository. 25 | 26 | 3. **Isolate the problem** to make sure that the code in the repository is _definitely_ responsible for the issue. 27 | 28 | **Great Bug Reports** tend to have: 29 | - A quick summary 30 | - Steps to reproduce 31 | - What you expected would happen 32 | - What actually happens 33 | - Notes (why this might be happening, things tried that didn't work, etc) 34 | 35 | Again, thank you for your time in reporting issues! 36 | 37 | 38 | 39 | ## Feature requests 40 | 41 | Feature requests are more than welcome! Please take a moment to find out whether your idea fits with the scope and aims of the project. When making a suggestion, provide as much detail and context as possible, and further make clear the degree to which you would like to contribute in its development. 42 | 43 | 44 | 45 | ## Pull requests 46 | 47 | Good pull requests - patches, improvements and new features - are a fantastic help. They should remain focused in scope and avoid containing unrelated commits. Note that all contributions to this project will be made under [the specified license](https://github.com/andrewtavis/wikirepo/blob/main/LICENSE.txt) and should follow the coding indentation and style standards (contact us if unsure). 48 | 49 | **Please ask first** before embarking on any significant pull request (implementing features, refactoring code, etc), otherwise you risk spending a lot of time working on something that the developers might not want to merge into the project. With that being said, major additions are very appreciated! 50 | 51 | When making a contribution, adhering to the [GitHub flow](https://guides.github.com/introduction/flow/index.html) process is the best way to get your work merged: 52 | 53 | 1. [Fork](http://help.github.com/fork-a-repo/) the repo, clone your fork, and configure the remotes: 54 | 55 | ```bash 56 | # Clone your fork of the repo into the current directory 57 | git clone https://github.com// 58 | # Navigate to the newly cloned directory 59 | cd 60 | # Assign the original repo to a remote called "upstream" 61 | git remote add upstream https://github.com// 62 | ``` 63 | 64 | 2. If you cloned a while ago, get the latest changes from upstream: 65 | 66 | ```bash 67 | git checkout 68 | git pull upstream 69 | ``` 70 | 71 | 3. Create a new topic branch (off the main project development branch) to contain your feature, change, or fix: 72 | 73 | ```bash 74 | git checkout -b 75 | ``` 76 | 77 | 4. Commit your changes in logical chunks, and please try to adhere to [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/). Use Git's [interactive rebase](https://docs.github.com/en/github/getting-started-with-github/about-git-rebase) feature to tidy up your commits before making them public. 78 | 79 | 5. Locally merge (or rebase) the upstream development branch into your topic branch: 80 | 81 | ```bash 82 | git pull --rebase upstream 83 | ``` 84 | 85 | 6. Push your topic branch up to your fork: 86 | 87 | ```bash 88 | git push origin 89 | ``` 90 | 91 | 7. [Open a Pull Request](https://help.github.com/articles/using-pull-requests/) with a clear title and description. 92 | 93 | Thank you in advance for your contributions! 94 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, the wikirepo developers. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | * Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CHANGELOG.* CONTRIBUTORS.* LICENSE.* 2 | graft src 3 | graft docs 4 | prune docs/build 5 | graft tests 6 | global-exclude *.py[cod] 7 | global-exclude .DS_Store 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 |
    6 | 7 | [![rtd](https://img.shields.io/readthedocs/wikirepo.svg?logo=read-the-docs)](http://wikirepo.readthedocs.io/en/latest/) 8 | [![ci](https://img.shields.io/github/actions/workflow/status/andrewtavis/wikirepo/.github/workflows/ci.yml?branch=main?logo=github)](https://github.com/andrewtavis/wikirepo/actions?query=workflow%3ACI) 9 | [![codecov](https://codecov.io/gh/andrewtavis/wikirepo/branch/main/graphs/badge.svg)](https://codecov.io/gh/andrewtavis/wikirepo) 10 | [![pyversions](https://img.shields.io/pypi/pyversions/wikirepo.svg?logo=python&logoColor=FFD43B&color=306998)](https://pypi.org/project/wikirepo/) 11 | [![pypi](https://img.shields.io/pypi/v/wikirepo.svg?color=4B8BBE)](https://pypi.org/project/wikirepo/) 12 | [![pypistatus](https://img.shields.io/pypi/status/wikirepo.svg)](https://pypi.org/project/wikirepo/) 13 | [![license](https://img.shields.io/github/license/andrewtavis/wikirepo.svg)](https://github.com/andrewtavis/wikirepo/blob/main/LICENSE.txt) 14 | [![coc](https://img.shields.io/badge/coc-Contributor%20Covenant-ff69b4.svg)](https://github.com/andrewtavis/wikirepo/blob/main/.github/CODE_OF_CONDUCT.md) 15 | [![codestyle](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 16 | 17 | ## Python based Wikidata framework for easy dataframe extraction 18 | 19 | **wikirepo** is a Python package that provides a framework to easily source and leverage standardized [Wikidata](https://www.wikidata.org/) information. The goal is to create an intuitive interface so that Wikidata can function as a common read-write repository for public statistics. 20 | 21 | See the [documentation](http://wikirepo.readthedocs.io/en/latest/) for a full outline of the package including usage and available data. 22 | 23 | 24 | 25 | # **Contents** 26 | 27 | - [Installation](#installation) 28 | - [Data](#data) 29 | - [Query Data](#query-data) 30 | - [Upload Data (WIP)](#upload-data) 31 | - [Maps (WIP)](#maps-wip) 32 | - [Examples](#examples) 33 | - [To-Do](#to-do) 34 | 35 | 36 | 37 | # Installation [`⇧`](#contents) 38 | 39 | wikirepo can be downloaded from PyPI via pip or sourced directly from this repository: 40 | 41 | ```bash 42 | pip install wikirepo 43 | ``` 44 | 45 | ```bash 46 | git clone https://github.com/andrewtavis/wikirepo.git 47 | cd wikirepo 48 | python setup.py install 49 | ``` 50 | 51 | ```python 52 | import wikirepo 53 | ``` 54 | 55 | 56 | 57 | # Data [`⇧`](#contents) 58 | 59 | wikirepo's data structure is built around [Wikidata.org](https://www.wikidata.org/). Human-readable access to Wikidata statistics is achieved through converting requests into Wikidata's Quantity IDs (QIDs) and Property IDs (PIDs), with the Python package [wikidata](https://github.com/dahlia/wikidata) serving as a basis for data loading and indexing. See the [documentation](https://wikirepo.readthedocs.io/en/latest/) for a structured overview of the currently available properties. 60 | 61 | 62 | 63 | ## Query Data [`⇧`](#contents) 64 | 65 | wikirepo's main access function, [wikirepo.data.query](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/data/query.py), returns a `pandas.DataFrame` of locations and property data across time. 66 | 67 | Each query needs the following inputs: 68 | 69 | - **locations**: the locations that data should be queried for 70 | - Strings are accepted for `Earth`, continents, and countries 71 | - Get all country names with `wikirepo.data.incl_lctn_lbls(lctn_lvls='country')` 72 | - The user can also pass Wikidata QIDs directly 73 | - **depth**: the geographic level of the given locations to query 74 | - A depth of 0 is the locations themselves 75 | - Greater depths correspond to lower geographic levels (states of countries, etc.) 76 | - A dictionary of locations is generated for lower depths (see second example below) 77 | - **timespan**: start and end `datetime.date` objects defining when data should come from 78 | - If not provided, then the most recent data will be retrieved with annotation for when it's from 79 | - **interval**: `yearly`, `monthly`, `weekly`, or `daily` as strings 80 | - **Further arguments**: the names of modules in [wikirepo/data](https://github.com/andrewtavis/wikirepo/tree/main/src/wikirepo/data) directories 81 | - These are passed to arguments corresponding to their directories 82 | - Data will be queried for these properties for the given `locations`, `depth`, `timespan` and `interval`, with results being merged as dataframe columns 83 | 84 | Queries are also able to access information in Wikidata sub-pages for locations. For example: if inflation rate is not found on the location's main page, then wikirepo checks the location's economic topic page as [inflation_rate.py](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/data/economic/inflation_rate.py) is found in [wikirepo/data/economic](https://github.com/andrewtavis/wikirepo/tree/main/src/wikirepo/data/economic) (see [Germany](https://www.wikidata.org/wiki/Q183) and [economy of Germany](https://www.wikidata.org/wiki/Q8046)). 85 | 86 | wikirepo further provides a unique dictionary class, `EntitiesDict`, that stores all loaded Wikidata entities during a query. This speeds up data retrieval, as entities are loaded once and then accessed in the `EntitiesDict` object for any other needed properties. 87 | 88 | Examples of [wikirepo.data.query](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/data/query.py) follow: 89 | 90 | #### Querying Information for Given Countries 91 | 92 | ```python 93 | import wikirepo 94 | from wikirepo.data import wd_utils 95 | from datetime import date 96 | 97 | ents_dict = wd_utils.EntitiesDict() 98 | # Strings must match their Wikidata English page names 99 | countries = ["Germany", "United States of America", "People's Republic of China"] 100 | # countries = ["Q183", "Q30", "Q148"] # we could also pass QIDs 101 | # data.incl_lctn_lbls(lctn_lvls='country') # or all countries` 102 | depth = 0 103 | timespan = (date(2009, 1, 1), date(2010, 1, 1)) 104 | interval = "yearly" 105 | 106 | df = wikirepo.data.query( 107 | ents_dict=ents_dict, 108 | locations=countries, 109 | depth=depth, 110 | timespan=timespan, 111 | interval=interval, 112 | climate_props=None, 113 | demographic_props=["population", "life_expectancy"], 114 | economic_props="median_income", 115 | electoral_poll_props=None, 116 | electoral_result_props=None, 117 | geographic_props=None, 118 | institutional_props="human_dev_idx", 119 | political_props="executive", 120 | misc_props=None, 121 | verbose=True, 122 | ) 123 | 124 | col_order = [ 125 | "location", 126 | "qid", 127 | "year", 128 | "executive", 129 | "population", 130 | "life_exp", 131 | "human_dev_idx", 132 | "median_income", 133 | ] 134 | df = df[col_order] 135 | 136 | df.head(6) 137 | ``` 138 | 139 | | location | qid | year | executive | population | life_exp | human_dev_idx | median_income | 140 | | :------------------------- | ---: | ---: | -------------: | ----------: | -------: | ------------: | ------------: | 141 | | Germany | Q183 | 2010 | Angela Merkel | 8.1752e+07 | 79.9878 | 0.921 | 33333 | 142 | | Germany | Q183 | 2009 | Angela Merkel | nan | 79.8366 | 0.917 | nan | 143 | | United States of America | Q30 | 2010 | Barack Obama | 3.08746e+08 | 78.5415 | 0.914 | 43585 | 144 | | United States of America | Q30 | 2009 | George W. Bush | nan | 78.3902 | 0.91 | nan | 145 | | People's Republic of China | Q148 | 2010 | Wen Jiabao | 1.35976e+09 | 75.236 | 0.706 | nan | 146 | | People's Republic of China | Q148 | 2009 | Wen Jiabao | nan | 75.032 | 0.694 | nan | 147 | 148 | #### Querying Information for all US Counties 149 | 150 | ```python 151 | # Note: >3000 regions, expect a 45 minute runtime 152 | import wikirepo 153 | from wikirepo.data import lctn_utils, wd_utils 154 | from datetime import date 155 | 156 | ents_dict = wd_utils.EntitiesDict() 157 | country = "United States of America" 158 | # country = "Q30" # we could also pass its QID 159 | depth = 2 # 2 for counties, 1 for states and territories 160 | sub_lctns = True # for all 161 | # Only valid sub-locations given the timespan will be queried 162 | timespan = (date(2016, 1, 1), date(2018, 1, 1)) 163 | interval = "yearly" 164 | 165 | us_counties_dict = lctn_utils.gen_lctns_dict( 166 | ents_dict=ents_dict, 167 | locations=country, 168 | depth=depth, 169 | sub_lctns=sub_lctns, 170 | timespan=timespan, 171 | interval=interval, 172 | verbose=True, 173 | ) 174 | 175 | df = wikirepo.data.query( 176 | ents_dict=ents_dict, 177 | locations=us_counties_dict, 178 | depth=depth, 179 | timespan=timespan, 180 | interval=interval, 181 | climate_props=None, 182 | demographic_props="population", 183 | economic_props=None, 184 | electoral_poll_props=None, 185 | electoral_result_props=None, 186 | geographic_props="area", 187 | institutional_props="capital", 188 | political_props=None, 189 | misc_props=None, 190 | verbose=True, 191 | ) 192 | 193 | df[df["population"].notnull()].head(6) 194 | ``` 195 | 196 | | location | sub_lctn | sub_sub_lctn | qid | year | population | area_km2 | capital | 197 | | :----------------------- | ---------: | ------------------: | ------: | ---: | ----------: | -------: | -----------: | 198 | | United States of America | California | Alameda County | Q107146 | 2018 | 1.6602e+06 | 2127 | Oakland | 199 | | United States of America | California | Contra Costa County | Q108058 | 2018 | 1.14936e+06 | 2078 | Martinez | 200 | | United States of America | California | Marin County | Q108117 | 2018 | 263886 | 2145 | San Rafael | 201 | | United States of America | California | Napa County | Q108137 | 2018 | 141294 | 2042 | Napa | 202 | | United States of America | California | San Mateo County | Q108101 | 2018 | 774155 | 1919 | Redwood City | 203 | | United States of America | California | Santa Clara County | Q110739 | 2018 | 1.9566e+06 | 3377 | San Jose | 204 | 205 | 206 | 207 | ## Upload Data (WIP) [`⇧`](#contents) 208 | 209 | [wikirepo.data.upload](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/data/upload.py) will be the core of the eventual wikirepo upload feature. The goal is to record edits that a user makes to a previously queried or baseline dataframe such that these changes can then be pushed back to Wikidata. With the addition of Wikidata login credentials as a wikirepo feature (WIP), the unique information in the edited dataframe could then be uploaded to Wikidata for all to use. 210 | 211 | The same process used to query information from Wikidata could be reversed for the upload process. Dataframe columns could be linked to their corresponding Wikidata properties, whether the time qualifiers are a [point in time](https://www.wikidata.org/wiki/Property:P585) or spans using [start time](https://www.wikidata.org/wiki/Property:P580) and [end time](https://www.wikidata.org/wiki/Property:P582) could be derived through the defined variables in the module header, and other necessary qualifiers for proper data indexing could also be included. Source information could also be added in corresponding columns to the given property edits. 212 | 213 | `Pseudocode` for how this process could function follows: 214 | 215 | In the first example, changes are made to a `df.copy()` of a queried dataframe. [pandas](https://github.com/pandas-dev/pandas) is then used to compare the new and original dataframes after the user has added information that they have access to. 216 | 217 | ```python 218 | import wikirepo 219 | from wikirepo.data import lctn_utils, wd_utils 220 | from datetime import date 221 | 222 | credentials = wd_utils.login() 223 | 224 | ents_dict = wd_utils.EntitiesDict() 225 | country = "Country Name" 226 | depth = 2 227 | sub_lctns = True 228 | timespan = (date(2000,1,1), date(2018,1,1)) 229 | interval = 'yearly' 230 | 231 | lctns_dict = lctn_utils.gen_lctns_dict() 232 | 233 | df = wikirepo.data.query() 234 | df_copy = df.copy() 235 | 236 | # The user checks for NaNs and adds data 237 | 238 | df_edits = pd.concat([df, df_copy]).drop_duplicates(keep=False) 239 | 240 | wikirepo.data.upload(df_edits, credentials) 241 | ``` 242 | 243 | In the next example `data.data_utils.gen_base_df` is used to create a dataframe with dimensions that match a time series that the user has access to. The data is then added to the column that corresponds to the property to which it should be added. Source information could further be added via a structured dictionary generated for the user. 244 | 245 | ```python 246 | import wikirepo 247 | from wikirepo.data import data_utils, wd_utils 248 | from datetime import date 249 | 250 | credentials = wd_utils.login() 251 | 252 | locations = "Country Name" 253 | depth = 0 254 | # The user defines the time parameters based on their data 255 | timespan = (date(1995,1,2), date(2010,1,2)) # (first Monday, last Sunday) 256 | interval = 'weekly' 257 | 258 | base_df = data_utils.gen_base_df() 259 | base_df['data'] = data_for_matching_time_series 260 | 261 | source_data = wd_utils.gen_source_dict('Source Information') 262 | base_df['data_source'] = [source_data] * len(base_df) 263 | 264 | wikirepo.data.upload(base_df, credentials) 265 | ``` 266 | 267 | Put simply: a full featured [wikirepo.data.upload](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/data/upload.py) function would realize the potential of a single read-write repository for all public information. 268 | 269 | 270 | 271 | # Maps (WIP) [`⇧`](#contents) 272 | 273 | [wikirepo/maps](https://github.com/andrewtavis/wikirepo/tree/main/src/wikirepo/maps) is a further goal of the project, as it combines wikirepo's focus on easy to access open source data and quick high level analytics. 274 | 275 | ### • Query Maps 276 | 277 | As in [wikirepo.data.query](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/data/query.py), passing the `locations`, `depth`, `timespan` and `interval` arguments could access GeoJSON files stored on Wikidata, thus providing mapping files in parallel to the user's data. These files could then be leveraged using existing Python plotting libraries to provide detailed presentations of geographic analysis. 278 | 279 | ### • Upload Maps 280 | 281 | Similar to the potential of adding statistics through [wikirepo.data.upload](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/data/upload.py), GeoJSON map files could also be uploaded to Wikidata using appropriate arguments. The potential exists for a myriad of variable maps given `locations`, `depth`, `timespan` and `interval` information that would allow all wikirepo users to get the exact mapping file that they need for their given task. 282 | 283 | 284 | 285 | # Examples [`⇧`](#contents) 286 | 287 | wikirepo can be used as a foundation for countless projects, with its usefulness and practicality only improving as more properties are added and more data is uploaded to [Wikidata](https://www.wikidata.org/). 288 | 289 | Current usage examples include: 290 | 291 | - Sample notebooks for the Python package [poli-sci-kit](https://github.com/andrewtavis/poli-sci-kit) show how to use wikirepo as a basis for political election and parliamentary appointment analysis, with those notebooks being found in the [examples for poli-sci-kit](https://github.com/andrewtavis/poli-sci-kit/tree/main/examples) or on [Google Colab](https://colab.research.google.com/github/andrewtavis/poli-sci-kit) 292 | - Pull requests with other examples will gladly be accepted 293 | 294 | 295 | 296 | # To-Do [`⇧`](#contents) 297 | 298 | Please see the [contribution guidelines](https://github.com/andrewtavis/wikirepo/blob/main/.github/CONTRIBUTING.md) if you are interested in contributing to this project. Work that is in progress or could be implemented includes: 299 | 300 | ## Expanding wikirepo 301 | 302 | - Creating an outline of the package's structure for the readme [(see issue)](https://github.com/andrewtavis/wikirepo/issues/18) 303 | 304 | - Integrating current Python tools with wikirepo structures for uploads to Wikidata 305 | 306 | - Adding a query of property descriptions to `data.data_utils.incl_dir_idxs` [(see issue)](https://github.com/andrewtavis/wikirepo/issues/15) 307 | 308 | - Adding multiprocessing support to the [wikirepo.data.query](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/data/query.py) process and `data.lctn_utils.gen_lctns_dict` 309 | 310 | - Potentially converting [wikirepo.data.query](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/data/query.py) and `data.lctn_utils.gen_lctns_dict` over to generated Wikidata SPARQL queries 311 | 312 | - Optimizing [wikirepo.data.query](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/data/query.py): 313 | 314 | - Potentially converting `EntitiesDict` and `LocationsDict` to slotted object classes for memory savings 315 | - Deriving and optimizing other slow parts of the query process 316 | 317 | - Adding access to GeoJSON files for mapping via [wikirepo.maps.query](https://github.com/andrewtavis/wikirepo/blob/main/src/wikirepo/maps/query.py) 318 | 319 | - Designing and adding GeoJSON files indexed by time properties to Wikidata 320 | 321 | - Creating, improving and sharing [examples](https://github.com/andrewtavis/wikirepo/tree/main/examples) 322 | 323 | - Improving [tests](https://github.com/andrewtavis/wikirepo/tree/main/tests) for greater [code coverage](https://codecov.io/gh/andrewtavis/wikirepo) 324 | 325 | - Improving [code quality](https://app.codacy.com/gh/andrewtavis/wikirepo/dashboard) by refactoring large functions and checking conventions 326 | 327 | ## Expanding Wikidata 328 | 329 | The growth of wikirepo's database relies on that of [Wikidata](https://www.wikidata.org/). Through `data.wd_utils.dir_to_topic_page` wikirepo can access properties on location sub-pages, thus allowing for statistics on any topic to be linked to. Beyond including entries for already existing properties (see [this issue](https://github.com/andrewtavis/wikirepo/issues/16)), the following are examples of property types that could be added: 330 | 331 | - Climate statistics could be added to [data/climate](https://github.com/andrewtavis/wikirepo/tree/main/src/wikirepo/data/climate) 332 | 333 | - This would allow for easy modeling of global warming and its effects 334 | - Planning would be needed for whether lower intervals would be necessary, or just include daily averages 335 | 336 | - Those for electoral [polling](https://github.com/andrewtavis/wikirepo/tree/main/src/wikirepo/data/electoral_polls) and [results](https://github.com/andrewtavis/wikirepo/tree/main/src/wikirepo/data/electoral_results) for locations 337 | 338 | - This would allow direct access to all needed election information in a single function call 339 | 340 | - A property that links political parties and their regions in [data/political](https://github.com/andrewtavis/wikirepo/tree/main/src/wikirepo/data/political) 341 | 342 | - For easy professional presentation of electoral results (ex: loading in party hex colors, abbreviations, and alignments) 343 | 344 | - [data/demographic](https://github.com/andrewtavis/wikirepo/tree/main/src/wikirepo/data/demographic) properties such as: 345 | 346 | - age, education, religious, and linguistic diversities across time 347 | 348 | - [data/economic](https://github.com/andrewtavis/wikirepo/tree/main/src/wikirepo/data/economic) properties such as: 349 | 350 | - female workforce participation, workforce industry diversity, wealth diversity, and total working age population across time 351 | 352 | - Distinct properties for Freedom House and Press Freedom indexes, as well as other descriptive metrics 353 | - These could be added to [data/institutional](https://github.com/andrewtavis/wikirepo/tree/main/src/wikirepo/data/institutional) 354 | 355 | # Similar Projects 356 | 357 |
    Python 358 |

    359 | 360 | - https://github.com/dahlia/wikidata 361 | - https://github.com/RDFLib/sparqlwrapper 362 | - https://github.com/SuLab/WikidataIntegrator 363 | - https://github.com/siznax/wptools 364 | 365 |

    366 |
    367 | 368 |
    JavaScript 369 |

    370 | 371 | - https://github.com/maxlath/wikibase-cli 372 | - https://github.com/maxlath/wikibase-edit 373 | - https://github.com/maxlath/wikibase-dump-filter 374 | - https://github.com/maxlath/wikibase-sdk 375 | 376 |

    377 |
    378 | 379 |
    Java 380 |

    381 | 382 | - https://github.com/Wikidata/Wikidata-Toolkit 383 | 384 |

    385 |
    386 | 387 | # Powered By 388 | 389 |
    390 |
    391 | Wikidata 392 |
    393 |
    394 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | branch: main 3 | require_ci_to_pass: yes 4 | 5 | coverage: 6 | precision: 2 7 | round: down 8 | range: "70...100" 9 | 10 | status: 11 | project: 12 | target: auto 13 | threshold: 5% 14 | 15 | patch: 16 | default: 17 | enabled: false 18 | changes: no 19 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | m2r2 2 | numpydoc 3 | sphinx<7.0.0 4 | sphinx_rtd_theme 5 | wikirepo 6 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | 13 | import os 14 | import sys 15 | 16 | sys.path.insert(0, os.path.abspath("..")) 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = "wikirepo" 21 | copyright = "2020, wikirepo developers (BSD License)" 22 | author = "wikirepo developers" 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = "1.0.1" 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | "m2r2", 35 | "sphinx.ext.autodoc", 36 | "numpydoc", 37 | "sphinx.ext.viewcode", 38 | ] 39 | 40 | numpydoc_show_inherited_class_members = False 41 | numpydoc_show_class_members = False 42 | 43 | # NOT to sort autodoc functions in alphabetical order 44 | autodoc_member_order = "bysource" 45 | 46 | # To avoid installing dependencies when building doc 47 | # https://stackoverflow.com/a/15912502/8729698 48 | autodoc_mock_imports = [ 49 | "numpy", 50 | "pandas", 51 | "pytest", 52 | "pytest-cov", 53 | "tqdm", 54 | "Wikidata", 55 | ] 56 | 57 | # Add any paths that contain templates here, relative to this directory. 58 | templates_path = ["_templates"] 59 | 60 | # List of patterns, relative to source directory, that match files and 61 | # directories to ignore when looking for source files. 62 | # This pattern also affects html_static_path and html_extra_path. 63 | exclude_patterns = [] 64 | 65 | # The suffix(es) of source filenames. 66 | # You can specify multiple suffix as a list of string: 67 | # 68 | # source_suffix = ['.rst', '.md'] 69 | source_suffix = ".rst" 70 | 71 | # The master toctree document. 72 | master_doc = "index" 73 | 74 | # The name of the Pygments (syntax highlighting) style to use. 75 | pygments_style = "sphinx" 76 | 77 | 78 | # -- Options for HTML output ---------------------------------------------- 79 | 80 | # The theme to use for HTML and HTML Help pages. See the documentation for 81 | # a list of builtin themes. 82 | # 83 | import sphinx_rtd_theme 84 | 85 | html_theme = "sphinx_rtd_theme" 86 | 87 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 88 | 89 | # Theme options are theme-specific and customize the look and feel of a theme 90 | # further. For a list of options available for each theme, see the 91 | # documentation. 92 | # 93 | # html_theme_options = {} 94 | 95 | # Add any paths that contain custom static files (such as style sheets) here, 96 | # relative to this directory. They are copied after the builtin static files, 97 | # so a file named "default.css" will overwrite the builtin "default.css". 98 | html_static_path = ["_static"] 99 | 100 | # Custom sidebar templates, must be a dictionary that maps document names 101 | # to template names. 102 | # 103 | # This is required for the alabaster theme 104 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars 105 | html_sidebars = { 106 | "**": ["relations.html", "searchbox.html"] 107 | } # needs 'show_related': True theme option to display 108 | 109 | 110 | # -- Options for HTMLHelp output ------------------------------------------ 111 | 112 | # Output file base name for HTML help builder. 113 | htmlhelp_basename = "wikirepo_doc" 114 | 115 | 116 | # -- Options for LaTeX output --------------------------------------------- 117 | 118 | latex_elements = { 119 | # The paper size ('letterpaper' or 'a4paper'). 120 | # 121 | # 'papersize': 'letterpaper', 122 | # The font size ('10pt', '11pt' or '12pt'). 123 | # 124 | # 'pointsize': '10pt', 125 | # Additional stuff for the LaTeX preamble. 126 | # 127 | # 'preamble': '', 128 | # Latex figure (float) alignment 129 | # 130 | # 'figure_align': 'htbp', 131 | } 132 | 133 | # Grouping the document tree into LaTeX files. List of tuples 134 | # (source start file, target name, title, 135 | # author, documentclass [howto, manual, or own class]). 136 | latex_documents = [ 137 | (master_doc, "wikirepo.tex", "wikirepo Documentation", "andrewtavis", "manual",) 138 | ] 139 | 140 | 141 | # -- Options for manual page output --------------------------------------- 142 | 143 | # One entry per manual page. List of tuples 144 | # (source start file, name, description, authors, manual section). 145 | man_pages = [(master_doc, "wikirepo", "wikirepo Documentation", [author], 1)] 146 | 147 | 148 | # -- Options for Texinfo output ------------------------------------------- 149 | 150 | # Grouping the document tree into Texinfo files. List of tuples 151 | # (source start file, target name, title, author, 152 | # dir menu entry, description, category) 153 | texinfo_documents = [ 154 | ( 155 | master_doc, 156 | "wikirepo", 157 | "wikirepo Documentation", 158 | author, 159 | "wikirepo", 160 | "Python based Wikidata framework for easy dataframe extraction", 161 | "Miscellaneous", 162 | ) 163 | ] 164 | -------------------------------------------------------------------------------- /docs/source/data/data_utils.rst: -------------------------------------------------------------------------------- 1 | data_utils 2 | ========== 3 | 4 | The :py:mod:`data.data_utils` module provides utility functions for querying data. 5 | 6 | **Functions** 7 | 8 | * :py:func:`wikirepo.data.data_utils._get_fxn_idx` 9 | * :py:func:`wikirepo.data.data_utils._get_dir_fxns_dict` 10 | * :py:func:`wikirepo.data.data_utils._check_data_assertions` 11 | * :py:func:`wikirepo.data.data_utils._get_max_workers` 12 | * :py:func:`wikirepo.data.data_utils.incl_dir_idxs` 13 | * :py:func:`wikirepo.data.data_utils.gen_base_df` 14 | * :py:func:`wikirepo.data.data_utils.assign_to_column` 15 | * :py:func:`wikirepo.data.data_utils.gen_base_and_assign_to_column` 16 | * :py:func:`wikirepo.data.data_utils.assign_to_cols` 17 | * :py:func:`wikirepo.data.data_utils.gen_base_and_assign_to_cols` 18 | * :py:func:`wikirepo.data.data_utils.query_wd_prop` 19 | * :py:func:`wikirepo.data.data_utils.query_repo_dir` 20 | * :py:func:`wikirepo.data.data_utils.interp_by_subset` 21 | * :py:func:`wikirepo.data.data_utils.sum_df_prop_vals` 22 | * :py:func:`wikirepo.data.data_utils.split_col_val_dates` 23 | * :py:func:`wikirepo.data.data_utils.count_df_prop_vals` 24 | 25 | .. autofunction:: wikirepo.data.data_utils._get_fxn_idx 26 | .. autofunction:: wikirepo.data.data_utils._get_dir_fxns_dict 27 | .. autofunction:: wikirepo.data.data_utils._check_data_assertions 28 | .. autofunction:: wikirepo.data.data_utils._get_max_workers 29 | .. autofunction:: wikirepo.data.data_utils.incl_dir_idxs 30 | .. autofunction:: wikirepo.data.data_utils.gen_base_df 31 | .. autofunction:: wikirepo.data.data_utils.assign_to_column 32 | .. autofunction:: wikirepo.data.data_utils.gen_base_and_assign_to_column 33 | .. autofunction:: wikirepo.data.data_utils.assign_to_cols 34 | .. autofunction:: wikirepo.data.data_utils.gen_base_and_assign_to_cols 35 | .. autofunction:: wikirepo.data.data_utils.query_wd_prop 36 | .. autofunction:: wikirepo.data.data_utils.query_repo_dir 37 | .. autofunction:: wikirepo.data.data_utils.interp_by_subset 38 | .. autofunction:: wikirepo.data.data_utils.sum_df_prop_vals 39 | .. autofunction:: wikirepo.data.data_utils.split_col_val_dates 40 | .. autofunction:: wikirepo.data.data_utils.count_df_prop_vals 41 | -------------------------------------------------------------------------------- /docs/source/data/index.rst: -------------------------------------------------------------------------------- 1 | data - modules 2 | ============== 3 | 4 | The :py:mod:`data` directory comprises all functions for the wikirepo data querying process. Included modules are those for the query process itself and needed utility functions. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | query 10 | data_utils 11 | lctn_utils 12 | time_utils 13 | wd_utils 14 | upload 15 | -------------------------------------------------------------------------------- /docs/source/data/lctn_utils.rst: -------------------------------------------------------------------------------- 1 | lctn_utils 2 | ========== 3 | 4 | The :py:mod:`data.lctn_utils` module provides functions for querying locations. 5 | 6 | **Functions** 7 | 8 | * :py:func:`wikirepo.data.lctn_utils.lctn_to_qid_dict` 9 | * :py:func:`wikirepo.data.lctn_utils.qid_to_lctn_dict` 10 | * :py:func:`wikirepo.data.lctn_utils.incl_lctn_lbls` 11 | * :py:func:`wikirepo.data.lctn_utils.incl_lctn_ids` 12 | * :py:func:`wikirepo.data.lctn_utils.lctn_lbl_to_qid` 13 | * :py:func:`wikirepo.data.lctn_utils.qid_tp_lctn_lbl` 14 | * :py:func:`wikirepo.data.lctn_utils.depth_to_col_name` 15 | * :py:func:`wikirepo.data.lctn_utils.depth_to_cols` 16 | * :py:func:`wikirepo.data.lctn_utils.depth_to_qid_col_name` 17 | * :py:func:`wikirepo.data.lctn_utils.depth_to_qid_cols` 18 | * :py:func:`wikirepo.data.lctn_utils.find_qid_get_depth` 19 | * :py:func:`wikirepo.data.lctn_utils.get_qids_at_depth` 20 | * :py:func:`wikirepo.data.lctn_utils.iter_set_dict` 21 | * :py:func:`wikirepo.data.lctn_utils.gen_lctns_dict` 22 | * :py:func:`wikirepo.data.lctn_utils.derive_depth` 23 | * :py:func:`wikirepo.data.lctn_utils.merge_lctn_dicts` 24 | * :py:func:`wikirepo.data.lctn_utils.find_key_items` 25 | 26 | **Classes** 27 | 28 | * :py:class:`wikirepo.data.lctn_utils.LocationsDict` 29 | 30 | .. autofunction:: wikirepo.data.lctn_utils.lctn_to_qid_dict 31 | .. autofunction:: wikirepo.data.lctn_utils.qid_to_lctn_dict 32 | .. autofunction:: wikirepo.data.lctn_utils.incl_lctn_lbls 33 | .. autofunction:: wikirepo.data.lctn_utils.incl_lctn_ids 34 | .. autofunction:: wikirepo.data.lctn_utils.lctn_lbl_to_qid 35 | .. autofunction:: wikirepo.data.lctn_utils.qid_tp_lctn_lbl 36 | .. autofunction:: wikirepo.data.lctn_utils.depth_to_col_name 37 | .. autofunction:: wikirepo.data.lctn_utils.depth_to_qid_cols 38 | .. autofunction:: wikirepo.data.lctn_utils.depth_to_qid_col_name 39 | .. autofunction:: wikirepo.data.lctn_utils.depth_to_qid_cols 40 | .. autofunction:: wikirepo.data.lctn_utils.find_qid_get_depth 41 | .. autofunction:: wikirepo.data.lctn_utils.get_qids_at_depth 42 | .. autofunction:: wikirepo.data.lctn_utils.iter_set_dict 43 | .. autofunction:: wikirepo.data.lctn_utils.gen_lctns_dict 44 | .. autofunction:: wikirepo.data.lctn_utils.derive_depth 45 | .. autofunction:: wikirepo.data.lctn_utils.merge_lctn_dicts 46 | .. autofunction:: wikirepo.data.lctn_utils.find_key_items 47 | 48 | .. autoclass:: wikirepo.data.lctn_utils.LocationsDict 49 | :members: 50 | :private-members: 51 | -------------------------------------------------------------------------------- /docs/source/data/query.rst: -------------------------------------------------------------------------------- 1 | query 2 | ===== 3 | 4 | The :py:mod:`data.query` module provides a function that calls and combines data from Wikidata 5 | 6 | Note: the purpose of this module is for a `wikirepo.data.query()` function call 7 | 8 | **Functions** 9 | 10 | * :py:func:`wikirepo.data.query.query` 11 | 12 | .. autofunction:: wikirepo.data.query.query 13 | -------------------------------------------------------------------------------- /docs/source/data/time_utils.rst: -------------------------------------------------------------------------------- 1 | time_utils 2 | ========== 3 | 4 | The :py:mod:`data.time_utils` module provides utility functions for querying time information. 5 | 6 | **Functions** 7 | 8 | * :py:func:`wikirepo.data.time_utils.interval_to_col_name` 9 | * :py:func:`wikirepo.data.time_utils.truncate_date` 10 | * :py:func:`wikirepo.data.time_utils.truncate_date_col` 11 | * :py:func:`wikirepo.data.time_utils.incl_intervals` 12 | * :py:func:`wikirepo.data.time_utils.make_timespan` 13 | * :py:func:`wikirepo.data.time_utils.latest_date` 14 | * :py:func:`wikirepo.data.time_utils.earliest_date` 15 | * :py:func:`wikirepo.data.time_utils.truncated_latest_date` 16 | * :py:func:`wikirepo.data.time_utils.truncated_earliest_date` 17 | 18 | .. autofunction:: wikirepo.data.time_utils.interval_to_col_name 19 | .. autofunction:: wikirepo.data.time_utils.truncate_date 20 | .. autofunction:: wikirepo.data.time_utils.truncate_date_col 21 | .. autofunction:: wikirepo.data.time_utils.incl_intervals 22 | .. autofunction:: wikirepo.data.time_utils.make_timespan 23 | .. autofunction:: wikirepo.data.time_utils.latest_date 24 | .. autofunction:: wikirepo.data.time_utils.earliest_date 25 | .. autofunction:: wikirepo.data.time_utils.truncated_latest_date 26 | .. autofunction:: wikirepo.data.time_utils.truncated_earliest_date 27 | -------------------------------------------------------------------------------- /docs/source/data/upload.rst: -------------------------------------------------------------------------------- 1 | upload (WIP) 2 | ============ 3 | 4 | `wikirepo.data.upload `_ will be the core of the eventual wikirepo upload feature. The goal is to record edits that a user makes to a previously queried or baseline dataframe such that these changes can then be pushed back to Wikidata. With the addition of Wikidata login credentials as a wikirepo feature (WIP), the unique information in the edited dataframe could then be uploaded to Wikidata for all to use. 5 | 6 | The same process used to query information from Wikidata could be reversed for the upload process. Dataframe columns could be linked to their corresponding Wikidata properties, whether the time qualifiers are a `point in time `_ or spans using `start time `_ and `end time `_ could be derived through the defined variables in the module header, and other necessary qualifiers for proper data indexing could also be included. Source information could also be added in corresponding columns to the given property edits. 7 | 8 | :py:mod:`Pseudocode` for how this process could function follows: 9 | 10 | In the first example, changes are made to a `df.copy()` of a queried dataframe. `pandas `_ is then used to compare the new and original dataframes after the user has added information that they have access to. 11 | 12 | 13 | .. code-block:: python 14 | 15 | import wikirepo 16 | from wikirepo.data import lctn_utils, wd_utils 17 | from datetime import date 18 | 19 | credentials = wd_utils.login() 20 | 21 | ents_dict = wd_utils.EntitiesDict() 22 | country = "Country Name" 23 | depth = 2 24 | sub_lctns = True 25 | timespan = (date(2000,1,1), date(2018,1,1)) 26 | interval = 'yearly' 27 | 28 | lctns_dict = lctn_utils.gen_lctns_dict() 29 | 30 | df = wikirepo.data.query() 31 | df_copy = df.copy() 32 | 33 | # The user checks for NaNs and adds data 34 | 35 | df_edits = pd.concat([df, df_copy]).drop_duplicates(keep=False) 36 | 37 | wikirepo.data.upload(df_edits, credentials) 38 | 39 | In the next example `data.data_utils.gen_base_df` is used to create a dataframe with dimensions that match a time series that the user has access to. The data is then added to the column that corresponds to the property to which it should be added. Source information could further be added via a structured dictionary generated for the user. 40 | 41 | .. code-block:: python 42 | 43 | import wikirepo 44 | from wikirepo.data import data_utils, wd_utils 45 | from datetime import date 46 | 47 | credentials = wd_utils.login() 48 | 49 | locations = "Country Name" 50 | depth = 0 51 | # The user defines the time parameters based on their data 52 | timespan = (date(1995,1,2), date(2010,1,2)) # (first Monday, last Sunday) 53 | interval = 'weekly' 54 | 55 | base_df = data_utils.gen_base_df() 56 | base_df['data'] = data_for_matching_time_series 57 | 58 | source_data = wd_utils.gen_source_dict('Source Information') 59 | base_df['data_source'] = [source_data] * len(base_df) 60 | 61 | wikirepo.data.upload(base_df, credentials) 62 | 63 | Put simply: a full featured `wikirepo.data.upload `_ function would realize the potential of a single read-write repository for all public information. 64 | -------------------------------------------------------------------------------- /docs/source/data/wd_utils.rst: -------------------------------------------------------------------------------- 1 | wd_utils 2 | ======== 3 | 4 | The :py:mod:`data.wd_utils` module provides utility functions for accessing and storing Wikidata information. 5 | 6 | **Functions** 7 | 8 | * :py:func:`wikirepo.data.wd_utils.check_in_ents_dict` 9 | * :py:func:`wikirepo.data.wd_utils.load_ent` 10 | * :py:func:`wikirepo.data.wd_utils.is_wd_id` 11 | * :py:func:`wikirepo.data.wd_utils.prop_has_many_entries` 12 | 13 | * :py:func:`wikirepo.data.wd_utils.get_lbl` 14 | * :py:func:`wikirepo.data.wd_utils.check_stget_propr_similarity` 15 | * :py:func:`wikirepo.data.wd_utils.get_prop_id` 16 | * :py:func:`wikirepo.data.wd_utils.get_prop_lbl` 17 | * :py:func:`wikirepo.data.wd_utils.get_prop_val` 18 | * :py:func:`wikirepo.data.wd_utils.prop_has_qualifiers` 19 | * :py:func:`wikirepo.data.wd_utils.get_qualifiers` 20 | * :py:func:`wikirepo.data.wd_utils.get_prop_qualifier_val` 21 | * :py:func:`wikirepo.data.wd_utils.get_val` 22 | 23 | * :py:func:`wikirepo.data.wd_utils.get_prop_t` 24 | * :py:func:`wikirepo.data.wd_utils.get_prop_start_t` 25 | * :py:func:`wikirepo.data.wd_utils.get_prop_end_t` 26 | * :py:func:`wikirepo.data.wd_utils.format_t` 27 | * :py:func:`wikirepo.data.wd_utils.get_formatted_prop_t` 28 | * :py:func:`wikirepo.data.wd_utils.get_formatted_prop_start_t` 29 | * :py:func:`wikirepo.data.wd_utils.get_formatted_prop_end_t` 30 | * :py:func:`wikirepo.data.wd_utils.get_prop_timespan_intersection` 31 | * :py:func:`wikirepo.data.wd_utils.get_formatted_prop_start_end_t` 32 | * :py:func:`wikirepo.data.wd_utils.prop_start_end_to_timespan` 33 | * :py:func:`wikirepo.data.wd_utils.get_prop_timespan` 34 | 35 | * :py:func:`wikirepo.data.wd_utils.dir_to_topic_page` 36 | * :py:func:`wikirepo.data.wd_utils.check_for_pid_sub_page` 37 | * :py:func:`wikirepo.data.wd_utils.t_to_prop_val_dict` 38 | * :py:func:`wikirepo.data.wd_utils.t_to_prop_val_dict_dict` 39 | 40 | **Classes** 41 | 42 | * :py:class:`wikirepo.data.wd_utils.EntitiesDict` 43 | 44 | .. autofunction:: wikirepo.data.wd_utils.check_in_ents_dict 45 | .. autofunction:: wikirepo.data.wd_utils.load_ent 46 | .. autofunction:: wikirepo.data.wd_utils.is_wd_id 47 | .. autofunction:: wikirepo.data.wd_utils.prop_has_many_entries 48 | 49 | .. autofunction:: wikirepo.data.wd_utils.get_lbl 50 | .. autofunction:: wikirepo.data.wd_utils.check_stget_propr_similarity 51 | .. autofunction:: wikirepo.data.wd_utils.get_prop_id 52 | .. autofunction:: wikirepo.data.wd_utils.get_prop_lbl 53 | .. autofunction:: wikirepo.data.wd_utils.get_prop_val 54 | .. autofunction:: wikirepo.data.wd_utils.prop_has_qualifiers 55 | .. autofunction:: wikirepo.data.wd_utils.get_qualifiers 56 | .. autofunction:: wikirepo.data.wd_utils.get_prop_qualifier_val 57 | .. autofunction:: wikirepo.data.wd_utils.get_val 58 | 59 | .. autofunction:: wikirepo.data.wd_utils.get_prop_t 60 | .. autofunction:: wikirepo.data.wd_utils.get_prop_start_t 61 | .. autofunction:: wikirepo.data.wd_utils.get_prop_end_t 62 | .. autofunction:: wikirepo.data.wd_utils.format_t 63 | .. autofunction:: wikirepo.data.wd_utils.get_formatted_prop_t 64 | .. autofunction:: wikirepo.data.wd_utils.get_formatted_prop_start_t 65 | .. autofunction:: wikirepo.data.wd_utils.get_formatted_prop_end_t 66 | .. autofunction:: wikirepo.data.wd_utils.get_prop_timespan_intersection 67 | .. autofunction:: wikirepo.data.wd_utils.get_formatted_prop_start_end_t 68 | .. autofunction:: wikirepo.data.wd_utils.prop_start_end_to_timespan 69 | .. autofunction:: wikirepo.data.wd_utils.get_prop_timespan 70 | 71 | .. autofunction:: wikirepo.data.wd_utils.dir_to_topic_page 72 | .. autofunction:: wikirepo.data.wd_utils.check_for_pid_sub_page 73 | .. autofunction:: wikirepo.data.wd_utils.t_to_prop_val_dict 74 | .. autofunction:: wikirepo.data.wd_utils.t_to_prop_val_dict_dict 75 | 76 | .. autoclass:: wikirepo.data.wd_utils.EntitiesDict 77 | -------------------------------------------------------------------------------- /docs/source/data_property_directories/climate/index.rst: -------------------------------------------------------------------------------- 1 | climate 2 | ======= 3 | 4 | :py:mod:`data.climate` is a directory of modules connecting wikirepo to corresponding Wikidata features for climate properties. 5 | 6 | Modules included in `wikirepo/data/climate `_ follow, with full details of needed property parameters being shown in the linked source codes: 7 | -------------------------------------------------------------------------------- /docs/source/data_property_directories/demographic/index.rst: -------------------------------------------------------------------------------- 1 | demographic 2 | =========== 3 | 4 | :py:mod:`data.demographic` is a directory of modules connecting wikirepo to corresponding Wikidata features for demographic properties. 5 | 6 | Modules included in `wikirepo/data/demographic `_ follow, with full details of needed property parameters being shown in the linked source codes: 7 | 8 | .. automodule:: wikirepo.data.demographic.ethnic_div 9 | :members: 10 | .. automodule:: wikirepo.data.demographic.life_expectancy 11 | :members: 12 | .. automodule:: wikirepo.data.demographic.literacy 13 | :members: 14 | .. automodule:: wikirepo.data.demographic.out_of_school_children 15 | :members: 16 | .. automodule:: wikirepo.data.demographic.population 17 | :members: 18 | -------------------------------------------------------------------------------- /docs/source/data_property_directories/economic/index.rst: -------------------------------------------------------------------------------- 1 | economic 2 | ======== 3 | 4 | :py:mod:`data.economic` is a directory of modules connecting wikirepo to corresponding Wikidata features for economic properties. 5 | 6 | Modules included in `wikirepo/data/economic `_ follow, with full details of needed property parameters being shown in the linked source codes: 7 | 8 | .. automodule:: wikirepo.data.economic.gdp_ppp 9 | :members: 10 | .. automodule:: wikirepo.data.economic.gini 11 | :members: 12 | .. automodule:: wikirepo.data.economic.inflation_rate 13 | :members: 14 | .. automodule:: wikirepo.data.economic.median_income 15 | :members: 16 | .. automodule:: wikirepo.data.economic.nom_gdp_per_cap 17 | :members: 18 | .. automodule:: wikirepo.data.economic.nom_gdp 19 | :members: 20 | .. automodule:: wikirepo.data.economic.ppp_gdp_per_cap 21 | :members: 22 | .. automodule:: wikirepo.data.economic.total_reserves 23 | :members: 24 | .. automodule:: wikirepo.data.economic.unemployment 25 | :members: 26 | -------------------------------------------------------------------------------- /docs/source/data_property_directories/electoral_results/index.rst: -------------------------------------------------------------------------------- 1 | electoral_results 2 | ================= 3 | 4 | :py:mod:`data.electoral_results` is a directory of modules connecting wikirepo to corresponding Wikidata features for electoral_results properties. 5 | 6 | Modules included in `wikirepo/data/electoral_results `_ follow, with full details of needed property parameters being shown in the linked source codes: 7 | -------------------------------------------------------------------------------- /docs/source/data_property_directories/geographic/index.rst: -------------------------------------------------------------------------------- 1 | geographic 2 | ========= 3 | 4 | :py:mod:`data.geographic` is a directory of modules connecting wikirepo to corresponding Wikidata features for geographic properties. 5 | 6 | Modules included in `wikirepo/data/geographic `_ follow, with full details of needed property parameters being shown in the linked source codes: 7 | 8 | .. automodule:: wikirepo.data.geographic.area 9 | :members: 10 | .. automodule:: wikirepo.data.geographic.continent 11 | :members: 12 | .. automodule:: wikirepo.data.geographic.country 13 | :members: 14 | -------------------------------------------------------------------------------- /docs/source/data_property_directories/index.rst: -------------------------------------------------------------------------------- 1 | data - property directories 2 | =========================== 3 | 4 | The :py:mod:`data` directory comprises all functions for the wikirepo data querying process. Included sub-directories containing those modules that link wikirepo queries to their Wikidata metadata. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | climate/index 10 | demographic/index 11 | economic/index 12 | electoral_results/index 13 | geographic/index 14 | institutional/index 15 | political/index 16 | misc/index 17 | -------------------------------------------------------------------------------- /docs/source/data_property_directories/institutional/index.rst: -------------------------------------------------------------------------------- 1 | institutional 2 | ========= 3 | 4 | :py:mod:`data.institutional` is a directory of modules connecting wikirepo to corresponding Wikidata features for institutional properties. 5 | 6 | Modules included in `wikirepo/data/institutional `_ follow, with full details of needed property parameters being shown in the linked source codes: 7 | 8 | .. automodule:: wikirepo.data.institutional.bti_gov_idx 9 | :members: 10 | .. automodule:: wikirepo.data.institutional.bti_status_idx 11 | :members: 12 | .. automodule:: wikirepo.data.institutional.capital 13 | :members: 14 | .. automodule:: wikirepo.data.institutional.fh_category 15 | :members: 16 | .. automodule:: wikirepo.data.institutional.human_dev_idx 17 | :members: 18 | .. automodule:: wikirepo.data.institutional.org_membership 19 | :members: 20 | -------------------------------------------------------------------------------- /docs/source/data_property_directories/misc/index.rst: -------------------------------------------------------------------------------- 1 | misc 2 | ==== 3 | 4 | :py:mod:`data.misc` is a directory of modules connecting wikirepo to corresponding Wikidata features for miscellaneous properties. 5 | 6 | Modules included in `wikirepo/data/misc `_ follow, with full details of needed property parameters being shown in the linked source codes: 7 | 8 | .. automodule:: wikirepo.data.misc.country_abbr 9 | :members: 10 | .. automodule:: wikirepo.data.misc.sub_country_abbr 11 | :members: 12 | -------------------------------------------------------------------------------- /docs/source/data_property_directories/political/index.rst: -------------------------------------------------------------------------------- 1 | political 2 | ========= 3 | 4 | :py:mod:`data.political` is a directory of modules connecting wikirepo to corresponding Wikidata features for political properties. 5 | 6 | Modules included in `wikirepo/data/political `_ follow, with full details of needed property parameters being shown in the linked source codes: 7 | 8 | .. automodule:: wikirepo.data.political.executive 9 | :members: 10 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://raw.githubusercontent.com/andrewtavis/wikirepo/main/.github/resources/logo/wikirepo_logo_transparent.png 2 | :width: 506 3 | :height: 304 4 | :align: center 5 | :target: https://github.com/andrewtavis/wikirepo 6 | ============ 7 | 8 | |rtd| |ci| |codecov| |pyversions| |pypi| |pypistatus| |license| |coc| |codestyle| 9 | 10 | .. |rtd| image:: https://img.shields.io/readthedocs/wikirepo.svg?logo=read-the-docs 11 | :target: http://wikirepo.readthedocs.io/en/latest/ 12 | 13 | .. |ci| image:: https://img.shields.io/github/actions/workflow/status/andrewtavis/wikirepo/.github/workflows/ci.yml?branch=main?logo=github 14 | :target: https://github.com/andrewtavis/wikirepo/actions?query=workflow%3ACI 15 | 16 | .. |codecov| image:: https://codecov.io/gh/andrewtavis/wikirepo/branch/main/graphs/badge.svg 17 | :target: https://codecov.io/gh/andrewtavis/wikirepo 18 | 19 | .. |pyversions| image:: https://img.shields.io/pypi/pyversions/wikirepo.svg?logo=python&logoColor=FFD43B&color=306998 20 | :target: https://pypi.org/project/wikirepo/ 21 | 22 | .. |pypi| image:: https://img.shields.io/pypi/v/wikirepo.svg?color=4B8BBE 23 | :target: https://pypi.org/project/wikirepo/ 24 | 25 | .. |pypistatus| image:: https://img.shields.io/pypi/status/wikirepo.svg 26 | :target: https://pypi.org/project/wikirepo/ 27 | 28 | .. |license| image:: https://img.shields.io/github/license/andrewtavis/wikirepo.svg 29 | :target: https://github.com/andrewtavis/wikirepo/blob/main/LICENSE.txt 30 | 31 | .. |coc| image:: https://img.shields.io/badge/coc-Contributor%20Covenant-ff69b4.svg 32 | :target: https://github.com/andrewtavis/wikirepo/blob/main/.github/CODE_OF_CONDUCT.md 33 | 34 | .. |codestyle| image:: https://img.shields.io/badge/code%20style-black-000000.svg 35 | :target: https://github.com/psf/black 36 | 37 | Python based Wikidata framework for easy dataframe extraction 38 | 39 | Installation 40 | ------------ 41 | .. code-block:: shell 42 | 43 | pip install wikirepo 44 | 45 | .. code-block:: shell 46 | 47 | git clone https://github.com/andrewtavis/wikirepo.git 48 | cd wikirepo 49 | python setup.py install 50 | 51 | .. code-block:: python 52 | 53 | import wikirepo 54 | 55 | .. toctree:: 56 | :maxdepth: 2 57 | :caption: Contents: 58 | 59 | data/index 60 | data_property_directories/index 61 | maps/index 62 | utils 63 | notes 64 | 65 | Project Indices 66 | =============== 67 | 68 | * :ref:`genindex` 69 | -------------------------------------------------------------------------------- /docs/source/maps/index.rst: -------------------------------------------------------------------------------- 1 | maps (WIP) 2 | ========== 3 | 4 | `wikirepo/maps `_ is a further goal of the project, as it combines wikirepo's focus on easy to access open source data and quick high level analytics. 5 | 6 | **Query Maps** 7 | 8 | As in `wikirepo.data.query `_, passing the :py:mod:`locations`, :py:mod:`depth`, :py:mod:`timespan` and :py:mod:`interval` arguments could access GeoJSON files stored on Wikidata, thus providing mapping files in parallel to the user's data. These files could then be leveraged using existing Python plotting libraries to provide detailed presentations of geographic analysis. 9 | 10 | **Upload Maps** 11 | 12 | Similar to the potential of adding statistics through `wikirepo.data.upload `_, GeoJSON map files could also be uploaded to Wikidata using appropriate arguments. The potential exists for a myriad of variable maps given :py:mod:`locations`, :py:mod:`depth`, :py:mod:`timespan` and :py:mod:`interval` information that would allow all wikirepo users to get the exact mapping file that they need for their given task. 13 | -------------------------------------------------------------------------------- /docs/source/notes.rst: -------------------------------------------------------------------------------- 1 | .. mdinclude:: ../../.github/CONTRIBUTING.md 2 | 3 | License 4 | ------- 5 | 6 | .. literalinclude:: ../../LICENSE.txt 7 | :language: text 8 | 9 | Change log 10 | ---------- 11 | 12 | .. mdinclude:: ../../CHANGELOG.md 13 | -------------------------------------------------------------------------------- /docs/source/utils.rst: -------------------------------------------------------------------------------- 1 | utils 2 | ===== 3 | 4 | The :py:mod:`utils` module provides utility functions for general operations. 5 | 6 | **Functions** 7 | 8 | * :py:func:`wikirepo.utils._make_var_list` 9 | * :py:func:`wikirepo.utils._return_given_type` 10 | * :py:func:`wikirepo.utils.try_float` 11 | * :py:func:`wikirepo.utils.round_if_int` 12 | * :py:func:`wikirepo.utils.gen_list_of_lists` 13 | * :py:func:`wikirepo.utils.check_str_similarity` 14 | * :py:func:`wikirepo.utils.check_str_args` 15 | 16 | .. autofunction:: wikirepo.utils._make_var_list 17 | .. autofunction:: wikirepo.utils._return_given_type 18 | .. autofunction:: wikirepo.utils.try_float 19 | .. autofunction:: wikirepo.utils.round_if_int 20 | .. autofunction:: wikirepo.utils.gen_list_of_lists 21 | .. autofunction:: wikirepo.utils.check_str_similarity 22 | .. autofunction:: wikirepo.utils.check_str_args 23 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: wikirepo-dev 2 | channels: 3 | - conda-forge 4 | - anaconda 5 | - defaults 6 | dependencies: 7 | - black>=19.10b0 8 | - numpy>=1.19.2 9 | - packaging>=20.9 10 | - pandas>=1.1.5 11 | - pyopenssl>=20.0.1 12 | - pytest>=6.2.2 13 | - pytest-cov>=2.11.1 14 | - python>=3.6 15 | - tqdm>=4.56.0 16 | - pip: 17 | - wikidata>=0.7.0 18 | -------------------------------------------------------------------------------- /examples/add_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

    Table of Contents

    \n", 10 | "" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "**Adding Data**\n", 18 | "\n", 19 | "This example demonstrates how to leverage wikirepo to most effectively add data to [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page). We'll load in a dataset, check for missing data, and then go through the process of filling that data and pushing the changes back to Wikidata. \n", 20 | "\n", 21 | "The assumption is that the property that data should be added to already exists. See [examples/add_property](https://github.com/andrewtavis/wikirepo/blob/main/examples/add_property.ipynb) for how to add a property to wikirepo and Wikidata." 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "# Query" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "# Edit" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "# Add" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [] 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 3 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython3", 83 | "version": "3.7.7" 84 | }, 85 | "toc": { 86 | "base_numbering": 1, 87 | "nav_menu": {}, 88 | "number_sections": true, 89 | "sideBar": true, 90 | "skip_h1_title": false, 91 | "title_cell": "Table of Contents", 92 | "title_sidebar": "Contents", 93 | "toc_cell": true, 94 | "toc_position": {}, 95 | "toc_section_display": true, 96 | "toc_window_display": false 97 | } 98 | }, 99 | "nbformat": 4, 100 | "nbformat_minor": 4 101 | } 102 | -------------------------------------------------------------------------------- /examples/add_property.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "toc": true 7 | }, 8 | "source": [ 9 | "

    Table of Contents

    \n", 10 | "" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "**Adding Properties**\n", 18 | "\n", 19 | "In this example we'll show how to add properties to wikirepo. See [examples/add_data](http://localhost:8888/notebooks/programming/wikirepo/examples/add_data.ipynb) for (eventually) how to leverage wikirepo to add data to Wikidata.\n", 20 | "\n", 21 | "Adding properties to wikirepo can be as simple as finding a wikirepo data module that queries a similar data structure, copying this module to the appropriate data directory for the new property (see next note), renaming the module to what the user should enter to query it, and assigning appropriate values to the variables that make up the module header: `pid`, `sub_pid`, `col_name`, `col_prefix`, `ignore_char` and `span`. To fully detail this, we're going to pretend that the following properties can't already be accessed by wikirepo:\n", 22 | "\n", 23 | "- ['P1082' (population)](https://www.wikidata.org/wiki/Property:P1082)\n", 24 | "- ['P6' (head of government)](https://www.wikidata.org/wiki/Property:P6)\n", 25 | "- ['P172' (ethnicity)](https://www.wikidata.org/wiki/Property:P172)\n", 26 | "\n", 27 | "The final modules for each of these can be found in [data/demographic/population](https://github.com/andrewtavis/wikirepo/blob/main/wikirepo/data/demographic/population.py), [data/political/executive](https://github.com/andrewtavis/wikirepo/blob/main/wikirepo/data/political/executive.py), and [data/demographic/ethnic_div](https://github.com/andrewtavis/wikirepo/blob/main/wikirepo/data/demographic/ethnic_div.py) respectively. The focus will be how to add a property that already exists on [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page) to wikirepo, with later versions covering the process of adding a property to Wikidata as well.\n", 28 | "\n", 29 | "**Note:** by \"the appropriate data directory for the new property\" we mean that a new module should go into the [wikirepo/data](https://github.com/andrewtavis/wikirepo/tree/main/wikirepo/data) directory that matches a Wikidata sub-page. Sometimes data isn't on the location's page itself, but rather on a sub-page. An example is that certain economic properties for [Germany](https://www.wikidata.org/wiki/Q183) are found on the page [economy of Germany](https://www.wikidata.org/wiki/Q8046). wikirepo checks for a property on the main page of a location first, and if the property is not found then the package checks the sub-page associated with the module's directory (the user is notified that the property does not exist for the given location if it is found in neither page). Properties are often moved from main pages to sub-pages, so even current main page property modules need to be organized based on where they could be re-indexed. Worst comes to worst, put the module in [data/misc](https://github.com/andrewtavis/wikirepo/tree/main/wikirepo/data/misc)." 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": { 36 | "ExecuteTime": { 37 | "end_time": "2020-12-08T11:29:32.138356Z", 38 | "start_time": "2020-12-08T11:29:31.696107Z" 39 | } 40 | }, 41 | "outputs": [ 42 | { 43 | "data": { 44 | "text/html": [ 45 | "" 46 | ], 47 | "text/plain": [ 48 | "" 49 | ] 50 | }, 51 | "metadata": {}, 52 | "output_type": "display_data" 53 | } 54 | ], 55 | "source": [ 56 | "from wikirepo.data import time_utils, wd_utils\n", 57 | "\n", 58 | "from IPython.core.display import display, HTML\n", 59 | "display(HTML(\"\"))" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "We'll use ['Q183' (Germany)](https://www.wikidata.org/wiki/Q183) for this example. First we'll initialize an `EntitiesDict` and the QID, and then we'll load in the entity:" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 2, 72 | "metadata": { 73 | "ExecuteTime": { 74 | "end_time": "2020-12-08T11:29:36.673058Z", 75 | "start_time": "2020-12-08T11:29:34.630060Z" 76 | } 77 | }, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "['Germany']" 83 | ] 84 | }, 85 | "execution_count": 2, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "ents_dict = wd_utils.EntitiesDict()\n", 92 | "qid = 'Q183'\n", 93 | "\n", 94 | "ent = wd_utils.load_ent(ents_dict=ents_dict, pq_id=qid)\n", 95 | "ents_dict.key_lbls()" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "# Adding a Property to wikirepo" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "## Adding single column property" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "['P1082' (population)](https://www.wikidata.org/wiki/Property:P1082) is an example of a property that goes in a single column, which also only occurs once at any given time.\n", 117 | "\n", 118 | "Let's start by defining our property and checking an element of the population data for Germany:" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 3, 124 | "metadata": { 125 | "ExecuteTime": { 126 | "end_time": "2020-12-08T11:29:36.679579Z", 127 | "start_time": "2020-12-08T11:29:36.674835Z" 128 | } 129 | }, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "{'mainsnak': {'snaktype': 'value',\n", 135 | " 'property': 'P1082',\n", 136 | " 'datavalue': {'value': {'amount': '+80500000',\n", 137 | " 'unit': '1',\n", 138 | " 'upperBound': '+80500500',\n", 139 | " 'lowerBound': '+80499500'},\n", 140 | " 'type': 'quantity'},\n", 141 | " 'datatype': 'quantity'},\n", 142 | " 'type': 'statement',\n", 143 | " 'qualifiers': {'P585': [{'snaktype': 'value',\n", 144 | " 'property': 'P585',\n", 145 | " 'hash': 'd071256bb4b9260491239bfad2cc561ad8bf870c',\n", 146 | " 'datavalue': {'value': {'time': '+2012-12-31T00:00:00Z',\n", 147 | " 'timezone': 0,\n", 148 | " 'before': 0,\n", 149 | " 'after': 0,\n", 150 | " 'precision': 11,\n", 151 | " 'calendarmodel': 'http://www.wikidata.org/entity/Q1985727'},\n", 152 | " 'type': 'time'},\n", 153 | " 'datatype': 'time'}]},\n", 154 | " 'qualifiers-order': ['P585'],\n", 155 | " 'id': 'Q183$3c493715-464a-6bad-ce6c-0f204e655157',\n", 156 | " 'rank': 'normal',\n", 157 | " 'references': [{'hash': '1112211b516d0ce090dfd3dd197bf7b7a4b88eaf',\n", 158 | " 'snaks': {'P143': [{'snaktype': 'value',\n", 159 | " 'property': 'P143',\n", 160 | " 'datavalue': {'value': {'entity-type': 'item',\n", 161 | " 'numeric-id': 764739,\n", 162 | " 'id': 'Q764739'},\n", 163 | " 'type': 'wikibase-entityid'},\n", 164 | " 'datatype': 'wikibase-item'}],\n", 165 | " 'P854': [{'snaktype': 'value',\n", 166 | " 'property': 'P854',\n", 167 | " 'datavalue': {'value': 'https://www.destatis.de/DE/ZahlenFakten/GesellschaftStaat/Bevoelkerung/Bevoelkerung.html',\n", 168 | " 'type': 'string'},\n", 169 | " 'datatype': 'url'}],\n", 170 | " 'P813': [{'snaktype': 'value',\n", 171 | " 'property': 'P813',\n", 172 | " 'datavalue': {'value': {'time': '+2014-06-14T00:00:00Z',\n", 173 | " 'timezone': 0,\n", 174 | " 'before': 0,\n", 175 | " 'after': 0,\n", 176 | " 'precision': 11,\n", 177 | " 'calendarmodel': 'http://www.wikidata.org/entity/Q1985727'},\n", 178 | " 'type': 'time'},\n", 179 | " 'datatype': 'time'}]},\n", 180 | " 'snaks-order': ['P143', 'P854', 'P813']}]}" 181 | ] 182 | }, 183 | "execution_count": 3, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "pop_pid = 'P1082'\n", 190 | "\n", 191 | "pop_0_entry = wd_utils.get_prop(ents_dict=ents_dict, qid=qid, pid=pop_pid)[0]\n", 192 | "pop_0_entry" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "The big thing to notice in that is that the qualifier ['P585' (point in time)](https://www.wikidata.org/wiki/Property:P585) is present. That and that `prop_0_entry['mainsnak']['datavalue']['value']['amount']` is a single value tells us that this property should go into a single column. \n", 200 | "\n", 201 | "Let's check this value, as well as get its date:" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 4, 207 | "metadata": { 208 | "ExecuteTime": { 209 | "end_time": "2020-12-08T11:29:37.212298Z", 210 | "start_time": "2020-12-08T11:29:37.208646Z" 211 | } 212 | }, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "'+80500000'" 218 | ] 219 | }, 220 | "execution_count": 4, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "pop_0_val = pop_0_entry['mainsnak']['datavalue']['value']['amount']\n", 227 | "pop_0_val" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 5, 233 | "metadata": { 234 | "ExecuteTime": { 235 | "end_time": "2020-12-08T11:29:37.644350Z", 236 | "start_time": "2020-12-08T11:29:37.640642Z" 237 | } 238 | }, 239 | "outputs": [ 240 | { 241 | "data": { 242 | "text/plain": [ 243 | "'+2012-12-31T00:00:00Z'" 244 | ] 245 | }, 246 | "execution_count": 5, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "pop_0_t = pop_0_entry['qualifiers']['P585'][0]['datavalue']['value']['time']\n", 253 | "pop_0_t" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "From that we see that we could have a character that needs to be ignored - specifically the `+`. We actually don't though, as wikirepo will convert this value to an integer, and `int('+string_number')` gets rid of the `+` for us.\n", 261 | "\n", 262 | "**Note:** wikirepo will also take care of the date for us. The package will first format the date, and then it will use a provided `time_lvl` variable's value to truncate this formatted `datetime.date` object to an appropriate level. Here's a quick demo of this assuming that the `time_lvl` of our query is `yearly`:" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 6, 268 | "metadata": { 269 | "ExecuteTime": { 270 | "end_time": "2020-12-08T11:29:38.241261Z", 271 | "start_time": "2020-12-08T11:29:38.237517Z" 272 | } 273 | }, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/plain": [ 278 | "datetime.date(2012, 12, 31)" 279 | ] 280 | }, 281 | "execution_count": 6, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "pop_0_t_formatted = wd_utils.format_t(pop_0_t)\n", 288 | "pop_0_t_formatted" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 7, 294 | "metadata": { 295 | "ExecuteTime": { 296 | "end_time": "2020-12-08T11:29:38.518157Z", 297 | "start_time": "2020-12-08T11:29:38.514295Z" 298 | } 299 | }, 300 | "outputs": [ 301 | { 302 | "data": { 303 | "text/plain": [ 304 | "'2012'" 305 | ] 306 | }, 307 | "execution_count": 7, 308 | "metadata": {}, 309 | "output_type": "execute_result" 310 | } 311 | ], 312 | "source": [ 313 | "time_utils.truncate_date(d=pop_0_t_formatted, time_lvl='yearly')" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "The value itself will be included if the above year is included in the `timespan` value passed. If no `time_lvl` variable is passed, then the full date will be maintained, and its value will be queried if it's the most recent, with the date then being appended as a string for documentation of when the value comes from.\n", 321 | "\n", 322 | "Final notes on the property module: the value in question can be accessed directly instead of through another property, so this tells us that we have no need for the `sub_pid` variable (more on this later); as the value goes into one column, we use the `col_name` variable instead of `col_prefix` (more on this later as well); and the value occurs at only one time, so we keep the `span` variable as `False` (more on this later too).\n", 323 | "\n", 324 | "We now have all the information needed to make the **population** module's header:" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 8, 330 | "metadata": { 331 | "ExecuteTime": { 332 | "end_time": "2020-12-08T11:29:39.325093Z", 333 | "start_time": "2020-12-08T11:29:39.322385Z" 334 | } 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "pid = 'P1082'\n", 339 | "sub_pid = None\n", 340 | "col_name = 'population'\n", 341 | "col_prefix = None\n", 342 | "ignore_char = ''\n", 343 | "span = False" 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "The final module can again be found in [data/demographic/population](https://github.com/andrewtavis/wikirepo/blob/main/wikirepo/data/demographic/population.py)." 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "metadata": {}, 356 | "source": [ 357 | "## Adding a single column property that spans time" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "metadata": {}, 363 | "source": [ 364 | "An executive via ['P6' (head of government)](https://www.wikidata.org/wiki/Property:P6) is an example of a property that goes in a single column that further occurs over a span of time.\n", 365 | "\n", 366 | "Let's start again by defining the pid and loading in an entry:" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": 9, 372 | "metadata": { 373 | "ExecuteTime": { 374 | "end_time": "2020-12-08T11:29:40.996529Z", 375 | "start_time": "2020-12-08T11:29:40.989802Z" 376 | } 377 | }, 378 | "outputs": [ 379 | { 380 | "data": { 381 | "text/plain": [ 382 | "{'mainsnak': {'snaktype': 'value',\n", 383 | " 'property': 'P6',\n", 384 | " 'datavalue': {'value': {'entity-type': 'item',\n", 385 | " 'numeric-id': 567,\n", 386 | " 'id': 'Q567'},\n", 387 | " 'type': 'wikibase-entityid'},\n", 388 | " 'datatype': 'wikibase-item'},\n", 389 | " 'type': 'statement',\n", 390 | " 'qualifiers': {'P580': [{'snaktype': 'value',\n", 391 | " 'property': 'P580',\n", 392 | " 'hash': 'ad8007db4be39b05f62a2bf5821d32c5464bb183',\n", 393 | " 'datavalue': {'value': {'time': '+2005-11-22T00:00:00Z',\n", 394 | " 'timezone': 0,\n", 395 | " 'before': 0,\n", 396 | " 'after': 0,\n", 397 | " 'precision': 11,\n", 398 | " 'calendarmodel': 'http://www.wikidata.org/entity/Q1985727'},\n", 399 | " 'type': 'time'},\n", 400 | " 'datatype': 'time'}]},\n", 401 | " 'qualifiers-order': ['P580'],\n", 402 | " 'id': 'q183$d0db3461-4291-0b36-4092-c40d14699212',\n", 403 | " 'rank': 'preferred',\n", 404 | " 'references': [{'hash': '7c5619b50f5af5766a660bda2eb09605dee4df72',\n", 405 | " 'snaks': {'P143': [{'snaktype': 'value',\n", 406 | " 'property': 'P143',\n", 407 | " 'datavalue': {'value': {'entity-type': 'item',\n", 408 | " 'numeric-id': 317027,\n", 409 | " 'id': 'Q317027'},\n", 410 | " 'type': 'wikibase-entityid'},\n", 411 | " 'datatype': 'wikibase-item'}],\n", 412 | " 'P854': [{'snaktype': 'value',\n", 413 | " 'property': 'P854',\n", 414 | " 'datavalue': {'value': 'http://www.bundeskanzlerin.de/Webs/BKin/EN/AngelaMerkel/Biography/biography_node.html;jsessionid=D04CEAD1827AC93F21CB6E908B31A5AA.s4t1',\n", 415 | " 'type': 'string'},\n", 416 | " 'datatype': 'url'}],\n", 417 | " 'P813': [{'snaktype': 'value',\n", 418 | " 'property': 'P813',\n", 419 | " 'datavalue': {'value': {'time': '+2014-03-28T00:00:00Z',\n", 420 | " 'timezone': 0,\n", 421 | " 'before': 0,\n", 422 | " 'after': 0,\n", 423 | " 'precision': 11,\n", 424 | " 'calendarmodel': 'http://www.wikidata.org/entity/Q1985727'},\n", 425 | " 'type': 'time'},\n", 426 | " 'datatype': 'time'}],\n", 427 | " 'P1683': [{'snaktype': 'value',\n", 428 | " 'property': 'P1683',\n", 429 | " 'datavalue': {'value': {'text': 'since November 2005 [Angela Merkel is] Chancellor of the Federal Republic of Germany',\n", 430 | " 'language': 'en'},\n", 431 | " 'type': 'monolingualtext'},\n", 432 | " 'datatype': 'monolingualtext'}]},\n", 433 | " 'snaks-order': ['P143', 'P854', 'P813', 'P1683']}]}" 434 | ] 435 | }, 436 | "execution_count": 9, 437 | "metadata": {}, 438 | "output_type": "execute_result" 439 | } 440 | ], 441 | "source": [ 442 | "exec_pid = 'P6'\n", 443 | "\n", 444 | "exec_0_entry = wd_utils.get_prop(ents_dict=ents_dict, qid=qid, pid=exec_pid)[0]\n", 445 | "exec_0_entry" 446 | ] 447 | }, 448 | { 449 | "cell_type": "markdown", 450 | "metadata": {}, 451 | "source": [ 452 | "Firstly we can see that the value in question cannot be directly subscripted for, as it is a QID entity itself. wikirepo will access the variable for us and derive its label, but let's find out who it is:" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 10, 458 | "metadata": { 459 | "ExecuteTime": { 460 | "end_time": "2020-12-08T11:29:41.713928Z", 461 | "start_time": "2020-12-08T11:29:41.709914Z" 462 | } 463 | }, 464 | "outputs": [ 465 | { 466 | "data": { 467 | "text/plain": [ 468 | "'Q567'" 469 | ] 470 | }, 471 | "execution_count": 10, 472 | "metadata": {}, 473 | "output_type": "execute_result" 474 | } 475 | ], 476 | "source": [ 477 | "exec_0_qid = exec_0_entry['mainsnak']['datavalue']['value']['id']\n", 478 | "exec_0_qid" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 11, 484 | "metadata": { 485 | "ExecuteTime": { 486 | "end_time": "2020-12-08T11:29:43.069363Z", 487 | "start_time": "2020-12-08T11:29:42.108621Z" 488 | } 489 | }, 490 | "outputs": [ 491 | { 492 | "data": { 493 | "text/plain": [ 494 | "'Angela Merkel'" 495 | ] 496 | }, 497 | "execution_count": 11, 498 | "metadata": {}, 499 | "output_type": "execute_result" 500 | } 501 | ], 502 | "source": [ 503 | "wd_utils.get_lbl(ents_dict=ents_dict, pq_id=exec_0_qid)" 504 | ] 505 | }, 506 | { 507 | "cell_type": "markdown", 508 | "metadata": {}, 509 | "source": [ 510 | "That this entity is a span can be seen by the fact that it does not have ['P585' (point in time)](https://www.wikidata.org/wiki/Property:P585), but rather ['P580' (start time)](https://www.wikidata.org/wiki/Property:P580). Values in this property can also have the property ['P582' (end time)](https://www.wikidata.org/wiki/Property:P582). \n", 511 | "\n", 512 | "**Note:** wikirepo assumes that an entity that has a start time and lacks an end time is the current subject for the property, so the latest date in the `timespan` argument for query functions will be used. The opposite is true for if an end time is present without a start time - the first date in the `timespan` will be used based on the assumption that this is the first subject of the property.\n", 513 | "\n", 514 | "Having values or subjects with start and end times implies that the `span` variable for the module header should in this case be `True`. We still are putting our results in a single column, so we use `col_name` instead of `col_prefix` (this is covered in the next section), and we can again the ignore `sub_pid` variable (also covered in the next section).\n", 515 | "\n", 516 | "From this we have all the information we need for the **executive** module's header:" 517 | ] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "execution_count": 12, 522 | "metadata": { 523 | "ExecuteTime": { 524 | "end_time": "2020-12-08T11:29:43.089259Z", 525 | "start_time": "2020-12-08T11:29:43.086580Z" 526 | } 527 | }, 528 | "outputs": [], 529 | "source": [ 530 | "pid = 'P6'\n", 531 | "sub_pid = None\n", 532 | "col_name = 'executive'\n", 533 | "col_prefix = None\n", 534 | "ignore_char = ''\n", 535 | "span = True" 536 | ] 537 | }, 538 | { 539 | "cell_type": "markdown", 540 | "metadata": {}, 541 | "source": [ 542 | "The resulting module can again be found in [data/political/executive](https://github.com/andrewtavis/wikirepo/blob/main/wikirepo/data/political/executive.py)." 543 | ] 544 | }, 545 | { 546 | "cell_type": "markdown", 547 | "metadata": {}, 548 | "source": [ 549 | "## Adding a multi-column property " 550 | ] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": {}, 555 | "source": [ 556 | "Ethnic diversity via ['P172' (ethnic group)](https://www.wikidata.org/wiki/Property:P172) is an example of a property that should be split over multiple columns. Rather than put all the information into a single column for the user to then split, wikirepo instead prefixes each potential element and creates columns for them for their respective data.\n", 557 | "\n", 558 | "Let's look at the first element of German ethnicity:" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": 13, 564 | "metadata": { 565 | "ExecuteTime": { 566 | "end_time": "2020-12-08T11:29:44.177540Z", 567 | "start_time": "2020-12-08T11:29:44.172241Z" 568 | } 569 | }, 570 | "outputs": [ 571 | { 572 | "data": { 573 | "text/plain": [ 574 | "{'mainsnak': {'snaktype': 'value',\n", 575 | " 'property': 'P172',\n", 576 | " 'datavalue': {'value': {'entity-type': 'item',\n", 577 | " 'numeric-id': 42884,\n", 578 | " 'id': 'Q42884'},\n", 579 | " 'type': 'wikibase-entityid'},\n", 580 | " 'datatype': 'wikibase-item'},\n", 581 | " 'type': 'statement',\n", 582 | " 'qualifiers': {'P1107': [{'snaktype': 'value',\n", 583 | " 'property': 'P1107',\n", 584 | " 'hash': '17752ff515cf871f1b4e82ae5ee4e1cea61556ff',\n", 585 | " 'datavalue': {'value': {'amount': '+0.915', 'unit': '1'},\n", 586 | " 'type': 'quantity'},\n", 587 | " 'datatype': 'quantity'}]},\n", 588 | " 'qualifiers-order': ['P1107'],\n", 589 | " 'id': 'Q183$c3db8ed3-4346-945d-75d7-de9ff7181e83',\n", 590 | " 'rank': 'normal',\n", 591 | " 'references': [{'hash': '35ad938ca5a2b12719ee2b3fbe70f8bf27e77284',\n", 592 | " 'snaks': {'P248': [{'snaktype': 'value',\n", 593 | " 'property': 'P248',\n", 594 | " 'datavalue': {'value': {'entity-type': 'item',\n", 595 | " 'numeric-id': 11191,\n", 596 | " 'id': 'Q11191'},\n", 597 | " 'type': 'wikibase-entityid'},\n", 598 | " 'datatype': 'wikibase-item'}],\n", 599 | " 'P813': [{'snaktype': 'value',\n", 600 | " 'property': 'P813',\n", 601 | " 'datavalue': {'value': {'time': '+2017-09-28T00:00:00Z',\n", 602 | " 'timezone': 0,\n", 603 | " 'before': 0,\n", 604 | " 'after': 0,\n", 605 | " 'precision': 11,\n", 606 | " 'calendarmodel': 'http://www.wikidata.org/entity/Q1985727'},\n", 607 | " 'type': 'time'},\n", 608 | " 'datatype': 'time'}]},\n", 609 | " 'snaks-order': ['P248', 'P813']}]}" 610 | ] 611 | }, 612 | "execution_count": 13, 613 | "metadata": {}, 614 | "output_type": "execute_result" 615 | } 616 | ], 617 | "source": [ 618 | "ethnic_div_pid = 'P172'\n", 619 | "\n", 620 | "ethnic_div_0_entry = wd_utils.get_prop(ents_dict=ents_dict, qid=qid, pid=ethnic_div_pid)[0]\n", 621 | "ethnic_div_0_entry" 622 | ] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "metadata": {}, 627 | "source": [ 628 | "Each of the values for this property is an entity, and the values are stored within sub PIDs. As before, let's check some QIDs of this value:" 629 | ] 630 | }, 631 | { 632 | "cell_type": "code", 633 | "execution_count": 14, 634 | "metadata": { 635 | "ExecuteTime": { 636 | "end_time": "2020-12-08T11:29:45.068913Z", 637 | "start_time": "2020-12-08T11:29:45.065127Z" 638 | } 639 | }, 640 | "outputs": [ 641 | { 642 | "data": { 643 | "text/plain": [ 644 | "'Q42884'" 645 | ] 646 | }, 647 | "execution_count": 14, 648 | "metadata": {}, 649 | "output_type": "execute_result" 650 | } 651 | ], 652 | "source": [ 653 | "ethnic_div_0_qid = ethnic_div_0_entry['mainsnak']['datavalue']['value']['id']\n", 654 | "ethnic_div_0_qid" 655 | ] 656 | }, 657 | { 658 | "cell_type": "code", 659 | "execution_count": 15, 660 | "metadata": { 661 | "ExecuteTime": { 662 | "end_time": "2020-12-08T11:29:46.092155Z", 663 | "start_time": "2020-12-08T11:29:45.474176Z" 664 | } 665 | }, 666 | "outputs": [ 667 | { 668 | "data": { 669 | "text/plain": [ 670 | "'Germans'" 671 | ] 672 | }, 673 | "execution_count": 15, 674 | "metadata": {}, 675 | "output_type": "execute_result" 676 | } 677 | ], 678 | "source": [ 679 | "ethnic_div_0_lbl = wd_utils.get_lbl(ents_dict=ents_dict, pq_id=ethnic_div_0_qid)\n", 680 | "ethnic_div_0_lbl" 681 | ] 682 | }, 683 | { 684 | "cell_type": "code", 685 | "execution_count": 16, 686 | "metadata": { 687 | "ExecuteTime": { 688 | "end_time": "2020-12-08T11:29:46.614310Z", 689 | "start_time": "2020-12-08T11:29:46.094634Z" 690 | } 691 | }, 692 | "outputs": [ 693 | { 694 | "data": { 695 | "text/plain": [ 696 | "'Turks'" 697 | ] 698 | }, 699 | "execution_count": 16, 700 | "metadata": {}, 701 | "output_type": "execute_result" 702 | } 703 | ], 704 | "source": [ 705 | "ethnic_div_1_entry = wd_utils.get_prop(ents_dict=ents_dict, qid=qid, pid=ethnic_div_pid)[1]\n", 706 | "ethnic_div_1_qid = ethnic_div_1_entry['mainsnak']['datavalue']['value']['id']\n", 707 | "ethnic_div_1_lbl = wd_utils.get_lbl(ents_dict=ents_dict, pq_id=ethnic_div_1_qid)\n", 708 | "ethnic_div_1_lbl" 709 | ] 710 | }, 711 | { 712 | "cell_type": "markdown", 713 | "metadata": {}, 714 | "source": [ 715 | "The value itself needs to be subsetted for using ['P1107' (proportion)](https://www.wikidata.org/wiki/Property:P1107). wikirepo will do this for us, but let's subset for the first value anyway:" 716 | ] 717 | }, 718 | { 719 | "cell_type": "code", 720 | "execution_count": 17, 721 | "metadata": { 722 | "ExecuteTime": { 723 | "end_time": "2020-12-08T11:29:46.868401Z", 724 | "start_time": "2020-12-08T11:29:46.864730Z" 725 | } 726 | }, 727 | "outputs": [ 728 | { 729 | "data": { 730 | "text/plain": [ 731 | "'+0.915'" 732 | ] 733 | }, 734 | "execution_count": 17, 735 | "metadata": {}, 736 | "output_type": "execute_result" 737 | } 738 | ], 739 | "source": [ 740 | "ethnic_div_0_val = ethnic_div_0_entry['qualifiers']['P1107'][0]['datavalue']['value']['amount']\n", 741 | "ethnic_div_0_val" 742 | ] 743 | }, 744 | { 745 | "cell_type": "markdown", 746 | "metadata": {}, 747 | "source": [ 748 | "For this property we thus need to use a `sub_pid` variable that tells wikirepo where to look for the value. \n", 749 | "\n", 750 | "**None:** another use of `sub_pid` is to set its value to `bool`. This tells wikirepo to assign `True` if the property is present. An example of this is [data/institutional/org_membership](https://github.com/andrewtavis/wikirepo/blob/main/wikirepo/data/institutional/org_membership.py) where a boolean value is assigned to columns based on if a location is a member of an organization at a given time. Values of `False` need to be filled afterwards, and some values are replaced for organizations that are widely known. This is thus an example of a property that requires a bit more work than simply setting the module header.\n", 751 | "\n", 752 | "Continuing, as we want the values to be put into separate columns where the QIDs labels for the entries get prefixed, we need to use the `col_prefix` variable and set the `col_name` variable to `None`. Let's choose `eth` for `col_prefix`, meaning that columns produced will be `eth_germans`, `eth_turks`, etc (an underscore is added automatically). To complete the needed information, the values themselves are only present at individual times, so in this case we can set `span` to `False`.\n", 753 | "\n", 754 | "From here we have the full information for the header of the **ethnic_div** module:" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": 18, 760 | "metadata": { 761 | "ExecuteTime": { 762 | "end_time": "2020-12-08T11:29:48.197124Z", 763 | "start_time": "2020-12-08T11:29:48.194308Z" 764 | } 765 | }, 766 | "outputs": [], 767 | "source": [ 768 | "pid = 'P172'\n", 769 | "sub_pid = 'P1107'\n", 770 | "col_name = None\n", 771 | "col_prefix = 'eth'\n", 772 | "ignore_char = ''\n", 773 | "span = False" 774 | ] 775 | }, 776 | { 777 | "cell_type": "markdown", 778 | "metadata": {}, 779 | "source": [ 780 | "The final version of this module can be found in [data/demographic/ethnic_div](https://github.com/andrewtavis/wikirepo/blob/main/wikirepo/data/demographic/ethnic_div.py)." 781 | ] 782 | }, 783 | { 784 | "cell_type": "markdown", 785 | "metadata": {}, 786 | "source": [ 787 | "# Adding a Property to Wikidata" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": null, 793 | "metadata": {}, 794 | "outputs": [], 795 | "source": [] 796 | } 797 | ], 798 | "metadata": { 799 | "kernelspec": { 800 | "display_name": "Python 3", 801 | "language": "python", 802 | "name": "python3" 803 | }, 804 | "language_info": { 805 | "codemirror_mode": { 806 | "name": "ipython", 807 | "version": 3 808 | }, 809 | "file_extension": ".py", 810 | "mimetype": "text/x-python", 811 | "name": "python", 812 | "nbconvert_exporter": "python", 813 | "pygments_lexer": "ipython3", 814 | "version": "3.7.7" 815 | }, 816 | "toc": { 817 | "base_numbering": 1, 818 | "nav_menu": {}, 819 | "number_sections": true, 820 | "sideBar": true, 821 | "skip_h1_title": false, 822 | "title_cell": "Table of Contents", 823 | "title_sidebar": "Contents", 824 | "toc_cell": true, 825 | "toc_position": {}, 826 | "toc_section_display": true, 827 | "toc_window_display": false 828 | } 829 | }, 830 | "nbformat": 4, 831 | "nbformat_minor": 4 832 | } 833 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | black>=19.10b0 2 | numpy>=1.19.2 3 | packaging>=20.9 4 | pandas>=1.1.5 5 | pyOpenSSL>=20.0.1 6 | pytest>=6.2.2 7 | pytest-cov>=2.11.1 8 | tqdm>=4.56.1 9 | Wikidata>=0.7.0 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | try: 4 | from setuptools import setup 5 | except ImportError: 6 | from distutils.core import setup 7 | 8 | from setuptools import find_packages 9 | 10 | package_directory = os.path.abspath(os.path.dirname(__file__)) 11 | with open(os.path.join(package_directory, "README.md"), encoding="utf-8") as fh: 12 | long_description = fh.read() 13 | 14 | with open(os.path.join(package_directory, "requirements.txt")) as req_file: 15 | requirements = req_file.readlines() 16 | 17 | on_rtd = os.environ.get("READTHEDOCS") == "True" 18 | if on_rtd: 19 | requirements = [] 20 | 21 | setup_args = dict( 22 | name="wikirepo", 23 | packages=find_packages(where="src"), 24 | package_dir={"": "src"}, 25 | version="1.0.1", 26 | author="Andrew Tavis McAllister", 27 | author_email="andrew.t.mcallister@gmail.com", 28 | classifiers=[ 29 | "Development Status :: 5 - Production/Stable", 30 | "Intended Audience :: Developers", 31 | "Intended Audience :: Education", 32 | "Intended Audience :: Science/Research", 33 | "License :: OSI Approved :: BSD License", 34 | "Programming Language :: Python", 35 | "Programming Language :: Python :: 3", 36 | "Programming Language :: Python :: 3.6", 37 | "Programming Language :: Python :: 3.7", 38 | "Programming Language :: Python :: 3.8", 39 | "Operating System :: OS Independent", 40 | ], 41 | python_requires=">=3.6", 42 | install_requires=requirements, 43 | description="Python based Wikidata framework for easy dataframe extraction", 44 | long_description=long_description, 45 | long_description_content_type="text/markdown", 46 | license="new BSD", 47 | url="https://github.com/andrewtavis/wikirepo", 48 | ) 49 | 50 | if __name__ == "__main__": 51 | setup(**setup_args) 52 | -------------------------------------------------------------------------------- /src/wikirepo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/__init__.py -------------------------------------------------------------------------------- /src/wikirepo/data/__init__.py: -------------------------------------------------------------------------------- 1 | # from wikirepo.data.upload import upload # function call wikirepo.data.upload() 2 | from wikirepo.data.query import query # function call wikirepo.data.query() 3 | -------------------------------------------------------------------------------- /src/wikirepo/data/climate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/climate/__init__.py -------------------------------------------------------------------------------- /src/wikirepo/data/climate/aqi.py: -------------------------------------------------------------------------------- 1 | """ 2 | Air Quality Index 3 | ----------------- 4 | 5 | Functions querying 'PXYZ' (air quality index) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | # Note: whether this is daily or a lower time_lvl would need to be discussed. 12 | -------------------------------------------------------------------------------- /src/wikirepo/data/climate/precipitation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Precipitation 3 | ------------- 4 | 5 | Functions querying 'PXYZ' (precipitation) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | # Note: whether this is daily or a lower time_lvl would need to be discussed. 12 | -------------------------------------------------------------------------------- /src/wikirepo/data/climate/temperature.py: -------------------------------------------------------------------------------- 1 | """ 2 | Temperature 3 | ----------- 4 | 5 | Functions querying 'PXYZ' (temperature) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | # Note: whether this is daily or a lower time_lvl would need to be discussed. 12 | -------------------------------------------------------------------------------- /src/wikirepo/data/demographic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/demographic/__init__.py -------------------------------------------------------------------------------- /src/wikirepo/data/demographic/ethnic_div.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ethnic Diversity 3 | ---------------- 4 | 5 | Functions querying 'P172' (ethnic group) information. 6 | 7 | This is done by querying 'P1107' (proportion) from each 'P172'. 8 | 9 | Contents 10 | query_prop_data 11 | """ 12 | 13 | from wikirepo.data import data_utils 14 | 15 | pid = "P172" 16 | sub_pid = "P1107" 17 | col_name = None # col_name is None for no data col 18 | col_prefix = "eth" # columns will be generated and prefixed from values 19 | ignore_char = "" 20 | span = False 21 | 22 | 23 | def query_prop_data( 24 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 25 | ): 26 | """ 27 | Queries data for the module property for given location(s), depth, timespan and interval. 28 | """ 29 | df, ents_dict = data_utils.query_wd_prop( 30 | dir_name=dir_name, 31 | ents_dict=ents_dict, 32 | locations=locations, 33 | depth=depth, 34 | timespan=timespan, 35 | interval=interval, 36 | pid=pid, 37 | sub_pid=sub_pid, 38 | col_name=col_name, 39 | col_prefix=col_prefix, 40 | ignore_char=ignore_char, 41 | span=span, 42 | ) 43 | 44 | return df, ents_dict 45 | -------------------------------------------------------------------------------- /src/wikirepo/data/demographic/life_expectancy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Life Expectancy 3 | --------------- 4 | 5 | Functions querying 'P2250' (life expectancy) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P2250" 14 | sub_pid = None 15 | col_name = "life_exp" 16 | col_prefix = None 17 | ignore_char = "+" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/demographic/literacy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Literacy 3 | -------- 4 | 5 | Functions querying 'P6897' (literacy rate) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P6897" 14 | sub_pid = None 15 | col_name = "literacy_rate" 16 | col_prefix = None 17 | ignore_char = " percent" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/demographic/out_of_school_children.py: -------------------------------------------------------------------------------- 1 | """ 2 | Out of School Children 3 | ---------------------- 4 | 5 | Functions querying 'P2573' (number of out-of-school children) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P2573" 14 | sub_pid = None 15 | col_name = "num_oosc" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/demographic/population.py: -------------------------------------------------------------------------------- 1 | """ 2 | Population 3 | ---------- 4 | 5 | Functions querying 'P1082' (population) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P1082" 14 | sub_pid = None 15 | col_name = "population" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/economic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/economic/__init__.py -------------------------------------------------------------------------------- /src/wikirepo/data/economic/gdp_ppp.py: -------------------------------------------------------------------------------- 1 | """ 2 | GDP Purchasing Power Parity 3 | --------------------------- 4 | 5 | Functions querying 'P4010' (GDP PPP) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P4010" 14 | sub_pid = None 15 | col_name = "gdp_ppp" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/economic/gini.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gini Index 3 | ---------- 4 | 5 | Functions querying 'P1125' (gini coefficient) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P1125" 14 | sub_pid = None 15 | col_name = "gini_idx" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/economic/inflation_rate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Inflation Rate 3 | -------------- 4 | 5 | Functions querying 'P1279' (inflation rate) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P1279" 14 | sub_pid = None 15 | col_name = "inflation" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/economic/median_income.py: -------------------------------------------------------------------------------- 1 | """ 2 | Median Income 3 | ------------- 4 | 5 | Functions querying 'P3529' (median income) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P3529" 14 | sub_pid = None 15 | col_name = "median_income" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/economic/nom_gdp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Nominal GDP 3 | ----------- 4 | 5 | Functions querying 'P2131' (nominal GDP) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P2131" 14 | sub_pid = None 15 | col_name = "nom_gdp" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/economic/nom_gdp_per_cap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Nominal GDP per Capita 3 | ---------------------- 4 | 5 | Functions querying 'P2132' (nominal GDP per capita) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P2132" 14 | sub_pid = None 15 | col_name = "nom_gdp_per_cap" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/economic/ppp_gdp_per_cap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Purchasing Power Parity GDP per Capita 3 | -------------------------------------- 4 | 5 | Functions querying 'P2299' (PPP GDP per capita) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P2299" 14 | sub_pid = None 15 | col_name = "ppp_gdp_per_cap" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/economic/total_reserves.py: -------------------------------------------------------------------------------- 1 | """ 2 | Total Reserves 3 | -------------- 4 | 5 | Functions querying 'P2134' (total reserves) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P2134" 14 | sub_pid = None 15 | col_name = "toal_reserves" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/economic/unemployment.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unemployment 3 | ------------ 4 | 5 | Functions querying 'P1198' (unemployment rate) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P1198" 14 | sub_pid = None 15 | col_name = "unemployment" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_polls/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_polls/__init__.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/__init__.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/ballot_measures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/ballot_measures.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/pri_executive.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executive Primary 3 | ----------------- 4 | 5 | Functions querying 'PXYZ' (primary executive election results) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/pri_lower_house.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lower House Primary 3 | ------------------- 4 | 5 | Functions querying 'PXYZ' (primary lower house election results) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/pri_sup_executive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/pri_sup_executive.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/pri_sup_lower_house.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/pri_sup_lower_house.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/pri_sup_upper_house.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/pri_sup_upper_house.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/pri_upper_house.py: -------------------------------------------------------------------------------- 1 | """ 2 | Upper House Primary 3 | ------------------- 4 | 5 | Functions querying 'PXYZ' (primary upper house election results) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd1_executive.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executive Round 1 3 | ----------------- 4 | 5 | Functions querying 'PXYZ' (first round executive election results) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd1_lower_house.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lower House Round 1 3 | ------------------- 4 | 5 | Functions querying 'PXYZ' (first round lower house election results) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd1_sup_executive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/rd1_sup_executive.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd1_sup_lower_house.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/rd1_sup_lower_house.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd1_sup_upper_house.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/rd1_sup_upper_house.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd1_upper_house.py: -------------------------------------------------------------------------------- 1 | """ 2 | Upper House Round 1 3 | ------------------- 4 | 5 | Functions querying 'PXYZ' (first round upper house election results) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd2_executive.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executive Round 2 3 | ----------------- 4 | 5 | Functions querying 'PXYZ' (second round executive election results) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd2_lower_house.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lower House Round 2 3 | ------------------- 4 | 5 | Functions querying 'PXYZ' (second round lower house election results) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd2_sup_executive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/rd2_sup_executive.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd2_sup_lower_house.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/rd2_sup_lower_house.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd2_sup_upper_house.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/rd2_sup_upper_house.py -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/rd2_upper_house.py: -------------------------------------------------------------------------------- 1 | """ 2 | Upper House Round 2 3 | ------------------- 4 | 5 | Functions querying 'PXYZ' (second round upper house election results) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/electoral_results/sup_ballot_measures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/electoral_results/sup_ballot_measures.py -------------------------------------------------------------------------------- /src/wikirepo/data/geographic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/geographic/__init__.py -------------------------------------------------------------------------------- /src/wikirepo/data/geographic/area.py: -------------------------------------------------------------------------------- 1 | """ 2 | Area 3 | ---- 4 | 5 | Functions querying 'P2046' (area) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P2046" 14 | sub_pid = None 15 | col_name = "area_km2" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/geographic/continent.py: -------------------------------------------------------------------------------- 1 | """ 2 | Continent 3 | --------- 4 | 5 | Functions querying 'P30' (continent) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P30" 14 | sub_pid = None 15 | col_name = "continent" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = True 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/geographic/country.py: -------------------------------------------------------------------------------- 1 | """ 2 | Country 3 | ------- 4 | 5 | Functions querying 'P17' (country) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils, lctn_utils 12 | 13 | pid = "P17" 14 | sub_pid = None 15 | col_name = "country" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = True 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | fixes = [ 43 | ["Aruba", "Netherlands"], # Territory 44 | ["Abkhazia", "Georgia"], # Recognized by most states as a part of Georgia 45 | ["Denmark", "Denmark"], # Fix 46 | ["Ghana", "Ghana"], # Fix 47 | ["Guinea-Bissau", "Guinea-Bissau"], # Fix 48 | ["Guernsey", "United Kingdom"], # Territory 49 | ["Jersey", "United Kingdom"], # Territory 50 | ["Nauru", "Nauru"], # Fix 51 | [ 52 | "Republic of Artsakh", 53 | "Azerbaijan", 54 | ], # Recognized by most states as a part of Azerbaijan 55 | ["Sri Lanka", "Sri Lanka"], # Fix 56 | ["South Ossetia", "Georgia"], # Recognized by most states as a part of Georgia 57 | ["Transnistria", "Moldova"], # Recognized by most states as a part of Moldova 58 | [ 59 | "Turkish Republic of Northern Cyprus", 60 | "Cyprus", 61 | ], # Recognized by most states as a part of Cyprus 62 | ["Western Sahara", "Morocco"], 63 | ] # 80% is controlled by Morocco 64 | 65 | for f in fixes: 66 | if f[0] in list(df[lctn_utils.depth_to_col_name(depth)]): 67 | df.loc[ 68 | df.loc[df[lctn_utils.depth_to_col_name(depth)] == f[0]].index, col_name 69 | ] = f[1] 70 | 71 | df[col_name] = df[col_name].replace("Kingdom of the Netherlands", "Netherlands") 72 | df[col_name] = df[col_name].replace("Kingdom of Denmark", "Denmark") 73 | df[col_name] = df[col_name].replace("Danish Realm", "Denmark") 74 | 75 | return df, ents_dict 76 | -------------------------------------------------------------------------------- /src/wikirepo/data/geographic/sub_electoral_region.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions querying sub_electoral_region information. 3 | 4 | Ex: US congressional voting districts are sub_electoral_regions of states. 5 | 6 | This is done via 'PXYZ' (contains electoral district) applied to any level. 7 | Explicitly used when the there are different electoral and administrative sub_regions. 8 | 9 | Contents 10 | -------- 11 | 0. No Class 12 | query_prop_data 13 | """ 14 | 15 | # pid = 'PXYZ' 16 | # sub_pid = None 17 | # col_name = 'electoral_reg' 18 | # col_prefix = None 19 | # ignore_char = '' 20 | # span = True 21 | 22 | # def query_prop_data(): 23 | # return 24 | -------------------------------------------------------------------------------- /src/wikirepo/data/institutional/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/institutional/__init__.py -------------------------------------------------------------------------------- /src/wikirepo/data/institutional/bti_gov_idx.py: -------------------------------------------------------------------------------- 1 | """ 2 | BTI Government Index 3 | -------------------- 4 | 5 | Functions querying 'P8476' (BTI Governance Index) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P8476" 14 | sub_pid = None 15 | col_name = "bti_gov_idx" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/institutional/bti_status_idx.py: -------------------------------------------------------------------------------- 1 | """ 2 | BTI Status Index 3 | ---------------- 4 | 5 | Functions querying 'P8477' (BTI Status Index) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P8477" 14 | sub_pid = None 15 | col_name = "bti_status_idx" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/institutional/capital.py: -------------------------------------------------------------------------------- 1 | """ 2 | Capital 3 | ------- 4 | 5 | Functions querying 'P36' (capital) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P36" 14 | sub_pid = None 15 | col_name = "capital" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = True 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/institutional/fh_category.py: -------------------------------------------------------------------------------- 1 | """ 2 | Freedom House Category 3 | ---------------------- 4 | 5 | Functions querying 'PXYZ' (freedom house category) information. 6 | 7 | This is done via 'P1552' (has quality) applied to regions. 8 | Ideally a property would be created for this that would allow it to be traced over time. 9 | 10 | Contents 11 | query_prop_data 12 | """ 13 | 14 | from wikirepo.data import data_utils 15 | 16 | pid = "P1552" 17 | sub_pid = None 18 | col_name = "fh_category" 19 | col_prefix = None 20 | ignore_char = " country" 21 | span = False 22 | 23 | 24 | def query_prop_data( 25 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 26 | ): 27 | """ 28 | Queries data for the module property for given location(s), depth, timespan and interval. 29 | """ 30 | df, ents_dict = data_utils.query_wd_prop( 31 | dir_name=dir_name, 32 | ents_dict=ents_dict, 33 | locations=locations, 34 | depth=depth, 35 | timespan=timespan, 36 | interval=interval, 37 | pid=pid, 38 | sub_pid=sub_pid, 39 | col_name=col_name, 40 | col_prefix=col_prefix, 41 | ignore_char=ignore_char, 42 | span=span, 43 | ) 44 | 45 | return df, ents_dict 46 | -------------------------------------------------------------------------------- /src/wikirepo/data/institutional/human_dev_idx.py: -------------------------------------------------------------------------------- 1 | """ 2 | Human Development Index 3 | ----------------------- 4 | 5 | Functions querying 'P1081' (human development index) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P1081" 14 | sub_pid = None 15 | col_name = "human_dev_idx" 16 | col_prefix = None 17 | ignore_char = "+" 18 | span = False 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/institutional/org_membership.py: -------------------------------------------------------------------------------- 1 | """ 2 | Organization Membership 3 | ----------------------- 4 | 5 | Functions querying organizational information. 6 | 7 | This is done via 'P463' (member of) applied to locations. 8 | 9 | Contents 10 | query_prop_data 11 | """ 12 | 13 | from wikirepo.data import data_utils 14 | 15 | pid = "P463" 16 | sub_pid = bool # assign boolean values 17 | col_name = None # col_name is None for no data col 18 | col_prefix = "mem" # columns will be generated and prefixed from values 19 | ignore_char = "" 20 | span = True 21 | 22 | 23 | def query_prop_data( 24 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 25 | ): 26 | """ 27 | Queries data for the module property for given location(s), depth, timespan and interval. 28 | """ 29 | df, ents_dict = data_utils.query_wd_prop( 30 | dir_name=dir_name, 31 | ents_dict=ents_dict, 32 | locations=locations, 33 | depth=depth, 34 | timespan=timespan, 35 | interval=interval, 36 | pid=pid, 37 | sub_pid=sub_pid, 38 | col_name=col_name, 39 | col_prefix=col_prefix, 40 | ignore_char=ignore_char, 41 | span=span, 42 | ) 43 | 44 | org_renames = [ 45 | ("mem_world_trade_organization", "mem_wto"), 46 | ("mem_european_union", "mem_eu"), 47 | ("mem_organisation_for_economic_cooperation_and_development", "mem_oecd"), 48 | ("mem_united_nations", "mem_un"), 49 | ("mem_world_health_organization", "mem_who"), 50 | ("mem_international_monetary_fund", "mem_imf"), 51 | ] 52 | 53 | for o_r in org_renames: 54 | if o_r[0] in df.columns: 55 | df.rename(columns={o_r[0]: o_r[1]}, inplace=True) 56 | 57 | df.fillna(value=False, inplace=True) 58 | 59 | return df, ents_dict 60 | -------------------------------------------------------------------------------- /src/wikirepo/data/misc/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/misc/__init__.py -------------------------------------------------------------------------------- /src/wikirepo/data/misc/country_abbr.py: -------------------------------------------------------------------------------- 1 | """ 2 | Country Abbreviation 3 | -------------------- 4 | 5 | Functions querying 'P297' (ISO 3166-1 alpha-2 code) information. 6 | 7 | This property is used to derive abbreviations for countries. 8 | 9 | Contents 10 | query_prop_data 11 | """ 12 | 13 | from wikirepo.data import data_utils 14 | 15 | pid = "P297" 16 | sub_pid = None 17 | col_name = "abbr" 18 | col_prefix = None 19 | ignore_char = "" 20 | span = False 21 | 22 | 23 | def query_prop_data( 24 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 25 | ): 26 | """ 27 | Queries data for the module property for given location(s), depth, timespan and interval. 28 | """ 29 | df, ents_dict = data_utils.query_wd_prop( 30 | dir_name=dir_name, 31 | ents_dict=ents_dict, 32 | locations=locations, 33 | depth=depth, 34 | timespan=timespan, 35 | interval=interval, 36 | pid=pid, 37 | sub_pid=sub_pid, 38 | col_name=col_name, 39 | col_prefix=col_prefix, 40 | ignore_char=ignore_char, 41 | span=span, 42 | ) 43 | 44 | return df, ents_dict 45 | -------------------------------------------------------------------------------- /src/wikirepo/data/misc/sub_country_abbr.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sub-Country Abbreviation 3 | ------------------------ 4 | 5 | Functions querying 'P300' (ISO 3166-2 code) information. 6 | 7 | This property is used to derive abbreviations for country sub-regions. 8 | 9 | Contents 10 | query_prop_data 11 | """ 12 | 13 | from wikirepo.data import data_utils 14 | 15 | pid = "P300" 16 | sub_pid = None 17 | col_name = "sub_abbr" 18 | col_prefix = None 19 | ignore_char = "" 20 | span = False 21 | 22 | 23 | def query_prop_data( 24 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 25 | ): 26 | """ 27 | Queries data for the module property for given location(s), depth, timespan and interval. 28 | """ 29 | df, ents_dict = data_utils.query_wd_prop( 30 | dir_name=dir_name, 31 | ents_dict=ents_dict, 32 | locations=locations, 33 | depth=depth, 34 | timespan=timespan, 35 | interval=interval, 36 | pid=pid, 37 | sub_pid=sub_pid, 38 | col_name=col_name, 39 | col_prefix=col_prefix, 40 | ignore_char=ignore_char, 41 | span=span, 42 | ) 43 | 44 | return df, ents_dict 45 | -------------------------------------------------------------------------------- /src/wikirepo/data/political/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/political/__init__.py -------------------------------------------------------------------------------- /src/wikirepo/data/political/executive.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executive 3 | --------- 4 | 5 | Functions querying 'P6' (head of government) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | 11 | from wikirepo.data import data_utils 12 | 13 | pid = "P6" 14 | sub_pid = None 15 | col_name = "executive" 16 | col_prefix = None 17 | ignore_char = "" 18 | span = True 19 | 20 | 21 | def query_prop_data( 22 | dir_name=None, ents_dict=None, locations=None, depth=0, timespan=None, interval=None 23 | ): 24 | """ 25 | Queries data for the module property for given location(s), depth, timespan and interval. 26 | """ 27 | df, ents_dict = data_utils.query_wd_prop( 28 | dir_name=dir_name, 29 | ents_dict=ents_dict, 30 | locations=locations, 31 | depth=depth, 32 | timespan=timespan, 33 | interval=interval, 34 | pid=pid, 35 | sub_pid=sub_pid, 36 | col_name=col_name, 37 | col_prefix=col_prefix, 38 | ignore_char=ignore_char, 39 | span=span, 40 | ) 41 | 42 | return df, ents_dict 43 | -------------------------------------------------------------------------------- /src/wikirepo/data/political/num_seats_lower_house.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/political/num_seats_lower_house.py -------------------------------------------------------------------------------- /src/wikirepo/data/political/num_seats_upper_house.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/political/num_seats_upper_house.py -------------------------------------------------------------------------------- /src/wikirepo/data/political/rep_lower_house.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lower House Representative 3 | -------------------------- 4 | 5 | Functions querying 'PXYZ' (lower house representative) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/political/rep_upper_house.py: -------------------------------------------------------------------------------- 1 | """ 2 | Upper House Representative 3 | -------------------------- 4 | 5 | Functions querying 'PXYZ' (upper house representative) information. 6 | 7 | Contents 8 | query_prop_data 9 | """ 10 | -------------------------------------------------------------------------------- /src/wikirepo/data/political/sup_executive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/src/wikirepo/data/political/sup_executive.py -------------------------------------------------------------------------------- /src/wikirepo/data/query.py: -------------------------------------------------------------------------------- 1 | """ 2 | Query 3 | ----- 4 | 5 | A function that calls and combines data from Wikidata. 6 | 7 | Note: the purpose of this module is for a wikirepo.data.query() function call. 8 | 9 | Contents 10 | query 11 | """ 12 | 13 | from ast import literal_eval 14 | 15 | # import modin.pandas as pd 16 | import pandas as pd 17 | from tqdm.auto import tqdm 18 | from wikirepo import utils 19 | from wikirepo.data import data_utils, lctn_utils, time_utils, wd_utils 20 | 21 | 22 | def query( 23 | ents_dict=None, 24 | locations=None, 25 | depth=None, 26 | timespan=None, 27 | interval=None, 28 | climate_props=None, 29 | demographic_props=None, 30 | economic_props=None, 31 | electoral_poll_props=None, 32 | electoral_result_props=None, 33 | geographic_props=None, 34 | institutional_props=None, 35 | political_props=None, 36 | misc_props=None, 37 | # multicore=True, 38 | verbose=True, 39 | ): 40 | """ 41 | Queries Wikidata properties based on module arguments for locations given a depth, interval, and timespan. 42 | 43 | Parameters 44 | ---------- 45 | ents_dict : wd_utils.EntitiesDict : optional (default=None) 46 | A dictionary with keys being Wikidata QIDs and values being their entities. 47 | 48 | locations : str, list, or lctn_utils.LocationsDict (contains strs) : optional (default=None) 49 | The locations to query either as strings for indexed locations or Wikidata QIDs. 50 | 51 | depth : int (default=0, no sub_locations) 52 | The depth from the given lbls or qids that data should go. 53 | 54 | Note: this uses 'P150' (contains administrative territorial entity). 55 | 56 | timespan : two element tuple or list : contains datetime.date or tuple (default=None: (date.today(), date.today())). 57 | A tuple or list that defines the start and end dates to be queried. 58 | 59 | Note 1: if None, then only the most recent data for the interval will be queried. 60 | 61 | Note 2: if True, then the full timespan from 1-1-1 to the current day will be queried. 62 | 63 | Note 3: passing a single entry will query for that date only. 64 | 65 | interval : str (default=None) 66 | The time interval over which queries will be made. 67 | 68 | Note 1: see data.time_utils for options. 69 | 70 | Note 2: if None, then only the most recent data will be queried. 71 | 72 | climate_props : str or list (contains strs) : optional (default=None) 73 | String representations of data/climate modules for data_utils.query_repo_dir. 74 | 75 | demographic_props : str or list (contains strs) : optional (default=None) 76 | String representations of data/demographic modules for data_utils.query_repo_dir. 77 | 78 | economic_props : str or list (contains strs) : optional (default=None) 79 | String representations of data/economic modules for data_utils.query_repo_dir. 80 | 81 | electoral_poll_props : str or list (contains strs) : optional (default=None) 82 | String representations of data/electoral_polls modules for data_utils.query_repo_dir. 83 | 84 | electoral_result_props : str or list (contains strs) : optional (default=None) 85 | String representations of data/electoral_results modules for data_utils.query_repo_dir. 86 | 87 | geographic_props : str or list (contains strs) : optional (default=None) 88 | String representations of data/geographic modules for data_utils.query_repo_dir. 89 | 90 | institutional_props : str or list (contains strs) : optional (default=None) 91 | String representations of data/institutional modules for data_utils.query_repo_dir. 92 | 93 | political_props : str or list (contains strs) : optional (default=None) 94 | String representations of data/political modules for data_utils.query_repo_dir. 95 | 96 | misc_props : str or list (contains strs) : optional (default=None) 97 | String representations of data/misc (miscellaneous) modules for data_utils.query_repo_dir. 98 | 99 | verbose : bool (default=True) 100 | Whether to show a tqdm progress bar for the query 101 | Note: passing 'full' calls progress bars for each data_utils.query_repo_dir. 102 | 103 | Potential later arguments: 104 | multicore : bool or int (default=False) 105 | Whether to make use of multiple processes and threads, and how many to use 106 | Note: True uses all available. 107 | 108 | source : bool (default=False) 109 | Whether to add columns for sources for all data. 110 | 111 | Returns 112 | ------- 113 | df_merge : pd.DataFrame 114 | A df of locations and data given timespan and data source arguments. 115 | """ 116 | local_args = locals() 117 | 118 | # Baseline args that do not have imbedded lower level functional arguments. 119 | # These are passed directly. 120 | baseline_args = [ 121 | "ents_dict", 122 | "locations", 123 | "depth", 124 | "timespan", 125 | "interval", 126 | "verbose", 127 | ] 128 | 129 | if isinstance(locations, lctn_utils.LocationsDict): 130 | if depth == None: 131 | depth = locations.get_depth() 132 | # if interval == None: 133 | # interval = locations.get_interval() 134 | # if timespan == None: 135 | # timespan = locations.get_timespan() 136 | 137 | query_args = [ 138 | arg 139 | for arg in local_args.keys() 140 | if (arg not in baseline_args) 141 | and (local_args[arg] != None and local_args[arg] != False) 142 | ] 143 | 144 | # Initialize a merge df, a dictionary of parameters, and an entities dictionary. 145 | df_merge = None 146 | query_params = {} 147 | if ents_dict == None: 148 | ents_dict = wd_utils.EntitiesDict() 149 | 150 | if isinstance(locations, str): 151 | locations = [locations] 152 | 153 | for arg in tqdm( 154 | query_args, desc="Directories queried", unit="dir", disable=not verbose 155 | ): 156 | sub_directory = arg[: -len("_props")] 157 | 158 | if sub_directory == "electoral_poll" or sub_directory == "electoral_result": 159 | sub_directory += "s" 160 | 161 | query_params["ents_dict"] = literal_eval(str(ents_dict._print())) 162 | query_params["dir_name"] = sub_directory 163 | if isinstance(locations, lctn_utils.LocationsDict): 164 | query_params["locations"] = literal_eval(str(locations._print())) 165 | else: 166 | query_params["locations"] = locations 167 | query_params["depth"] = depth 168 | 169 | # The following is necessary for passing tuples with datetime.date 170 | # objects to literal_eval. 171 | # Convert to a tuple of tuples, and then back again in the lower 172 | # functions via time_utils.make_timespan() in data_utils.gen_base_df(). 173 | timespan = f"{timespan}".replace("datetime.date", "") 174 | timespan = literal_eval(timespan) 175 | query_params["timespan"] = timespan 176 | query_params["interval"] = interval 177 | 178 | if verbose == "full": 179 | query_params["verbose"] = True 180 | elif verbose == True: 181 | query_params["verbose"] = False 182 | else: 183 | query_params["verbose"] = False 184 | 185 | # Included indexes for the given data type. 186 | incl_indexes = data_utils.incl_dir_idxs(dir_name=sub_directory) 187 | 188 | # Assigning True for the specific data indexes to be queried, which is passed to data_utils.query_repo_dir. 189 | query_arg_indexes = local_args[arg] 190 | if query_arg_indexes == True: 191 | for i in incl_indexes: 192 | query_params[i] = True 193 | 194 | else: 195 | if isinstance(query_arg_indexes, str): 196 | query_arg_indexes = [query_arg_indexes] 197 | for i in query_arg_indexes: 198 | if i in incl_indexes: 199 | query_params[i] = True 200 | else: 201 | utils.check_str_args(arguments=i, valid_args=incl_indexes) 202 | 203 | # Pass the created dictionary as kwargs for data_utils.query_repo_dir. 204 | if df_merge is not None: 205 | # geo cols are queried as a list, and time as a string. 206 | if interval is not None: 207 | merge_on = lctn_utils.depth_to_cols(depth=depth) + [ 208 | time_utils.interval_to_col_name(interval=interval) 209 | ] 210 | else: 211 | merge_on = lctn_utils.depth_to_cols(depth=depth) 212 | 213 | df_dir_props, new_ents_dict = data_utils.query_repo_dir( 214 | **literal_eval(str(query_params)) 215 | ) 216 | 217 | df_merge = pd.merge(df_merge, df_dir_props, on=merge_on) 218 | 219 | else: 220 | df_merge, new_ents_dict = data_utils.query_repo_dir( 221 | **literal_eval(str(query_params)) 222 | ) 223 | 224 | for i in incl_indexes: 225 | query_params.pop(i, None) 226 | 227 | for k in new_ents_dict.keys(): 228 | if k not in ents_dict.keys(): 229 | ents_dict[k] = new_ents_dict[k] 230 | 231 | # Reduce QID columns to just one directly after the last locations column. 232 | qid_cols = [col for col in list(df_merge.columns) if col[: len("qid")] == "qid"] 233 | first_qid_col = qid_cols[0] 234 | df_merge.rename(columns={first_qid_col: "keep_this_col"}, inplace=True) 235 | df_merge = df_merge.loc[ 236 | :, ~df_merge.columns.duplicated() 237 | ] # qid columns can be named the same. 238 | qid_cols = [col for col in list(df_merge.columns) if col[: len("qid")] == "qid"] 239 | for col in list(set(qid_cols)): 240 | df_merge.drop(col, axis=1, inplace=True) 241 | 242 | df_merge.rename(columns={"keep_this_col": "qid"}, inplace=True) 243 | 244 | return df_merge 245 | -------------------------------------------------------------------------------- /src/wikirepo/data/time_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Time Utilities 3 | -------------- 4 | 5 | Functions querying time information. 6 | 7 | Contents 8 | interval_to_col_name, 9 | truncate_date, 10 | truncate_date_col, 11 | incl_intervals, 12 | make_timespan, 13 | latest_date, 14 | earliest_date, 15 | truncated_latest_date, 16 | truncated_earliest_date 17 | """ 18 | 19 | from datetime import date, datetime 20 | 21 | from dateutil.rrule import DAILY, MONTHLY, WEEKLY, YEARLY, rrule 22 | 23 | 24 | def interval_to_col_name(interval): 25 | """ 26 | Queries the proper name of the column for timespans given an interval. 27 | """ 28 | interval = interval.lower() 29 | 30 | if interval == "yearly": 31 | return "year" 32 | elif interval == "monthly": 33 | return "month" 34 | elif interval == "weekly": 35 | return "week" 36 | elif interval == "daily": 37 | return "day" 38 | 39 | 40 | def truncate_date(d, interval=None): 41 | """ 42 | Truncates a date object given an interval. 43 | """ 44 | assert isinstance(interval, str) or interval == None, ( 45 | "'interval' argument must be None or one of " 46 | + ", ".join(i for i in incl_intervals()) 47 | + "." 48 | ) 49 | 50 | if interval is not None: 51 | if not isinstance(d, str): # hasn't been formatted already 52 | if isinstance(d, tuple): 53 | d = datetime.strptime(f"{d[0]}-{d[1]}-{d[2]}", "%Y-%m-%d").date() 54 | 55 | interval = interval.lower() 56 | 57 | if interval == "yearly": 58 | return d.strftime("%Y") 59 | 60 | elif interval == "monthly": 61 | return d.strftime("%Y-%m") 62 | 63 | elif interval == "weekly": 64 | return d.strftime("%Y-%W") 65 | 66 | elif interval == "daily": 67 | return d.strftime("%Y-%m-%d") 68 | 69 | else: 70 | return d 71 | 72 | else: 73 | return d 74 | 75 | 76 | def truncate_date_col(df, col, interval): 77 | """ 78 | Truncates the date column of a df based on a provided interval. 79 | """ 80 | df[col] = df[col].map(lambda x: truncate_date(d=x, interval=interval)) 81 | 82 | return df 83 | 84 | 85 | def incl_intervals(): 86 | """ 87 | Queries the included intervals. 88 | 89 | Note: timespans will not be able to be queried if their interval is not included. 90 | """ 91 | return ["yearly", "monthly", "weekly", "daily"] 92 | 93 | 94 | def make_timespan(timespan=None, interval=None): 95 | """ 96 | Queries a timespan given user input of strings, ints, or time values. 97 | 98 | Parameters 99 | ---------- 100 | timespan : two element tuple or list : contains datetime.date or tuple (default=None: (date.today(), date.today())) 101 | A tuple or list that defines the start and end dates to be queried. 102 | 103 | Note 1: if True, then the full timespan from 1-1-1 to the current day will be queried. 104 | 105 | Note 2: passing a single entry will query for that date only. 106 | 107 | interval : str 108 | The time interval over which queries will be made. 109 | 110 | Note 1: see data.time_utils for options. 111 | 112 | Note 2: if None, then only the most recent data will be queried. 113 | 114 | Returns 115 | ------- 116 | formatted_timespan : list (contains datetime.date) 117 | The timespan formatted going back in time. 118 | """ 119 | if interval is None and timespan is None: 120 | # Most recent data wanted. 121 | return 122 | 123 | order = -1 # default order is decreasing in time 124 | 125 | if timespan is None: 126 | timespan = (date.today(), date.today()) 127 | 128 | elif timespan == True: 129 | timespan = (date.min, date.today()) 130 | 131 | elif isinstance(timespan, date): 132 | timespan = (timespan, timespan) 133 | 134 | elif timespan[0] > timespan[1]: 135 | timespan = (timespan[1], timespan[0]) 136 | order = ( 137 | 1 # user wants the dates to be increasing in df rows instead of the default 138 | ) 139 | else: 140 | ValueError("An invalid value was passed to the 'timespan' argument.") 141 | 142 | if isinstance(timespan[0], date): 143 | start_dt = timespan[0] 144 | elif isinstance(timespan[0], tuple): 145 | start_dt = date(*timespan[0]) 146 | 147 | if isinstance(timespan[1], date): 148 | end_dt = timespan[1] 149 | elif isinstance(timespan[1], tuple): 150 | end_dt = date(*timespan[1]) 151 | 152 | if interval == "yearly": 153 | return [dt.date() for dt in rrule(YEARLY, dtstart=start_dt, until=end_dt)][ 154 | ::order 155 | ] 156 | 157 | elif interval == "monthly": 158 | return [dt.date() for dt in rrule(MONTHLY, dtstart=start_dt, until=end_dt)][ 159 | ::order 160 | ] 161 | 162 | elif interval == "weekly": 163 | return [dt.date() for dt in rrule(WEEKLY, dtstart=start_dt, until=end_dt)][ 164 | ::order 165 | ] 166 | 167 | elif interval == "daily": 168 | return [dt.date() for dt in rrule(DAILY, dtstart=start_dt, until=end_dt)][ 169 | ::order 170 | ] 171 | 172 | else: 173 | ValueError( 174 | ( 175 | "An invalid value was passed to the 'interval' argument. Please choose one of " 176 | + ", ".join(incl_intervals()) 177 | + "." 178 | ) 179 | ) 180 | 181 | 182 | def latest_date(timespan): 183 | """ 184 | Returns the latest date in a timespan. 185 | """ 186 | if timespan[1] > timespan[0]: 187 | return timespan[1] 188 | else: 189 | return timespan[0] 190 | 191 | 192 | def earliest_date(timespan): 193 | """ 194 | Returns the earliest date in a timespan. 195 | """ 196 | if timespan[0] < timespan[1]: 197 | return timespan[0] 198 | else: 199 | return timespan[1] 200 | 201 | 202 | def truncated_latest_date(timespan, interval): 203 | """ 204 | Returns the truncated latest date in a timespan. 205 | """ 206 | return truncate_date(latest_date(timespan), interval=interval) 207 | 208 | 209 | def truncated_earliest_date(timespan, interval): 210 | """ 211 | Returns the truncated earliest date in a timespan. 212 | """ 213 | return truncate_date(earliest_date(timespan), interval=interval) 214 | -------------------------------------------------------------------------------- /src/wikirepo/data/upload.py: -------------------------------------------------------------------------------- 1 | """ 2 | Upload 3 | ------ 4 | 5 | A function that uploads data to Wikidata. 6 | 7 | Note: the purpose of this module is for a wikirepo.data.upload() function call. 8 | 9 | Contents 10 | upload 11 | """ 12 | 13 | 14 | # def upload(): 15 | # return 16 | -------------------------------------------------------------------------------- /src/wikirepo/data/wd_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wiki Data Utilities 3 | ------------------- 4 | 5 | Utility functions for accessing and storing Wikidata information. 6 | 7 | Contents 8 | load_ent, 9 | check_in_ents_dict, 10 | is_wd_id, 11 | prop_has_many_entries, 12 | get_lbl, 13 | get_prop, 14 | get_prop_id, 15 | get_prop_lbl, 16 | get_prop_val, 17 | prop_has_qualifiers, 18 | get_qualifiers, 19 | get_prop_qualifier_val, 20 | get_val, 21 | get_prop_t, 22 | get_prop_start_t, 23 | get_prop_end_t, 24 | format_t, 25 | get_formatted_prop_t, 26 | get_formatted_prop_start_t, 27 | get_formatted_prop_end_t, 28 | get_prop_timespan_intersection, 29 | get_formatted_prop_start_end_t, 30 | prop_start_end_to_timespan, 31 | get_prop_timespan, 32 | dir_to_topic_page, 33 | check_for_pid_sub_page, 34 | t_to_prop_val_dict, 35 | t_to_prop_val_dict_dict 36 | 37 | EntitiesDict Class 38 | __init__, 39 | __repr__, 40 | __str__, 41 | key_lbls, 42 | _print 43 | """ 44 | 45 | from datetime import date, datetime 46 | 47 | import numpy as np 48 | from wikidata.client import Client 49 | from wikirepo import utils 50 | from wikirepo.data import time_utils 51 | 52 | client = Client() 53 | 54 | 55 | def load_ent(ents_dict, pq_id): 56 | """ 57 | Loads an entity. 58 | """ 59 | if pq_id[0] == "Q": 60 | if pq_id not in ents_dict.keys(): 61 | check_in_ents_dict(ents_dict, pq_id) 62 | return ents_dict[pq_id] 63 | 64 | return client.get(pq_id, load=True).data 65 | 66 | 67 | def check_in_ents_dict(ents_dict, qid): 68 | """ 69 | Checks an the provided entity dictionary and adds to it if not present. 70 | """ 71 | if ents_dict is not None and qid not in ents_dict.keys(): 72 | ents_dict[qid] = client.get(qid, load=True).data 73 | 74 | 75 | def is_wd_id(var): 76 | """ 77 | Checks whether a variable is a Wikidata id. 78 | """ 79 | if var[0] == "Q" and var.split("Q")[1].isnumeric(): # check if it's a QID 80 | return True 81 | 82 | if var[0] == "P" and var.split("P")[1].isnumeric(): # check if it's a PID 83 | return True 84 | 85 | return False 86 | 87 | 88 | def prop_has_many_entries(prop_ent): 89 | """ 90 | Check if a Wikidata entry has multiple values for a given property. 91 | """ 92 | try: 93 | prop_ent[1] 94 | return True 95 | except IndexError: 96 | return False 97 | 98 | 99 | def print_not_available(ents_dict=None, qid=None, pid=None, extra_msg=""): 100 | """ 101 | Notify the user that a given property is not available for a given subject. 102 | """ 103 | print( 104 | f"{get_lbl(ents_dict, qid)} '{qid}' currently does not have the '{get_lbl(ents_dict, pid)}' property '{pid}'{extra_msg}." 105 | ) 106 | 107 | 108 | def get_lbl(ents_dict=None, pq_id=None): 109 | """ 110 | Gets an English label of a Wikidata entity. 111 | """ 112 | if ents_dict is None and pq_id is None: 113 | return 114 | 115 | try: 116 | return load_ent(ents_dict, pq_id)["labels"]["en"]["value"] 117 | except KeyError: 118 | return load_ent(ents_dict, pq_id)["labels"]["de"]["value"] 119 | 120 | 121 | def get_prop(ents_dict, qid, pid): 122 | """ 123 | Gets property information from a Wikidata entity. 124 | """ 125 | check_in_ents_dict(ents_dict=ents_dict, qid=qid) # checks for all further functions 126 | return ents_dict[qid]["claims"][pid] 127 | 128 | 129 | def get_prop_id(ents_dict, qid, pid, i): 130 | """ 131 | Gets the qid of an indexed property label of a Wikidata entity. 132 | """ 133 | return get_prop(ents_dict=ents_dict, qid=qid, pid=pid)[i]["mainsnak"]["datavalue"][ 134 | "value" 135 | ]["id"] 136 | 137 | 138 | def get_prop_lbl(ents_dict, qid, pid, i): 139 | """ 140 | Gets a label of an indexed property label of a Wikidata entity. 141 | """ 142 | return get_lbl( 143 | ents_dict=ents_dict, 144 | pq_id=get_prop_id(ents_dict=ents_dict, qid=qid, pid=pid, i=i), 145 | ) 146 | 147 | 148 | def get_prop_val(ents_dict, qid, pid, i, ignore_char=""): 149 | """ 150 | Gets a values of an indexed property label of a Wikidata entity. 151 | """ 152 | try: 153 | # Check to see if the value is a QID. 154 | val = get_lbl( 155 | ents_dict=ents_dict, 156 | pq_id=get_prop(ents_dict=ents_dict, qid=qid, pid=pid)[i]["mainsnak"][ 157 | "datavalue" 158 | ]["value"]["id"], 159 | ).replace(ignore_char, "") 160 | return val 161 | 162 | except: 163 | pass 164 | 165 | try: 166 | val = get_prop(ents_dict=ents_dict, qid=qid, pid=pid)[i]["mainsnak"][ 167 | "datavalue" 168 | ]["value"]["amount"].replace(ignore_char, "") 169 | try: 170 | return int(val) 171 | except: 172 | pass 173 | try: 174 | return float(val) 175 | except: 176 | return val 177 | 178 | except: 179 | pass 180 | 181 | try: 182 | val = get_prop(ents_dict=ents_dict, qid=qid, pid=pid)[i]["mainsnak"][ 183 | "datavalue" 184 | ]["value"].replace(ignore_char, "") 185 | try: 186 | return int(val) 187 | except: 188 | pass 189 | try: 190 | return float(val) 191 | except: 192 | return val 193 | 194 | except: 195 | # Property has no datavalue at the given index. 196 | return np.nan 197 | 198 | 199 | def prop_has_qualifiers(ents_dict, qid, pid, i): 200 | """ 201 | Checks if the property has qualifiers. 202 | """ 203 | return "qualifiers" in get_prop(ents_dict=ents_dict, qid=qid, pid=pid)[i].keys() 204 | 205 | 206 | def get_qualifiers(ents_dict, qid, pid, i): 207 | """ 208 | Gets the qualifiers of a property of a Wikidata entity. 209 | """ 210 | return get_prop(ents_dict=ents_dict, qid=qid, pid=pid)[i]["qualifiers"] 211 | 212 | 213 | def get_prop_qualifier_val(ents_dict, qid, pid, sub_pid, i, ignore_char=""): 214 | """ 215 | Gets a values of an indexed qualifier property label of a Wikidata entity. 216 | """ 217 | try: 218 | # Check to see if the value is a QID. 219 | val = get_lbl( 220 | ents_dict=ents_dict, 221 | pq_id=get_prop(ents_dict=ents_dict, qid=qid, pid=pid)[i]["qualifiers"][ 222 | sub_pid 223 | ][0]["datavalue"]["value"]["id"], 224 | ).replace(ignore_char, "") 225 | return val 226 | 227 | except: 228 | pass 229 | 230 | try: 231 | val = get_prop(ents_dict=ents_dict, qid=qid, pid=pid)[i]["qualifiers"][sub_pid][ 232 | 0 233 | ]["datavalue"]["value"]["amount"].replace(ignore_char, "") 234 | try: 235 | return int(val) 236 | except: 237 | pass 238 | try: 239 | return float(val) 240 | except: 241 | return val 242 | except: 243 | pass 244 | 245 | try: 246 | val = get_prop(ents_dict=ents_dict, qid=qid, pid=pid)[i]["qualifiers"][sub_pid][ 247 | 0 248 | ]["datavalue"]["value"].replace(ignore_char, "") 249 | try: 250 | return int(val) 251 | except: 252 | pass 253 | try: 254 | return float(val) 255 | except: 256 | return val 257 | except: 258 | # Property has no datavalue at the given index. 259 | return np.nan 260 | 261 | 262 | def get_val(ents_dict, qid, pid, sub_pid, i, ignore_char=""): 263 | """ 264 | Combines get_prop_val, get_prop_qualifier_val, and boolean assignment. 265 | """ 266 | if sub_pid == bool: 267 | return True 268 | 269 | elif isinstance(sub_pid, str): 270 | return get_prop_qualifier_val(ents_dict, qid, pid, sub_pid, i, ignore_char) 271 | 272 | else: 273 | return get_prop_val(ents_dict, qid, pid, i, ignore_char) 274 | 275 | 276 | def get_prop_t(pid, i): 277 | """ 278 | Gets a value of 'P585' (point in time) from a Wikidata property. 279 | """ 280 | return pid[i]["qualifiers"]["P585"][0]["datavalue"]["value"]["time"] 281 | 282 | 283 | def get_prop_start_t(pid, i): 284 | """ 285 | Gets a value of 'P580' (start time) from a Wikidata property. 286 | """ 287 | try: 288 | return pid[i]["qualifiers"]["P580"][0]["datavalue"]["value"]["time"] 289 | except: 290 | return 291 | 292 | 293 | def get_prop_end_t(pid, i): 294 | """ 295 | Gets a value of 'P582' (end time) from a Wikidata property. 296 | """ 297 | try: 298 | return pid[i]["qualifiers"]["P582"][0]["datavalue"]["value"]["time"] 299 | except: 300 | return 301 | 302 | 303 | def format_t(t): 304 | """ 305 | Formats the date strings of a Wikidata entry. 306 | """ 307 | if t is not None: 308 | return datetime.strptime(t[1:11].replace("-00", "-01"), "%Y-%m-%d").date() 309 | else: 310 | return t 311 | 312 | 313 | def get_formatted_prop_t(ents_dict, qid, pid, i): 314 | """ 315 | Gets the formatted 'P585' (point in time) from a Wikidata property. 316 | """ 317 | return format_t(get_prop_t(get_prop(ents_dict=ents_dict, qid=qid, pid=pid), i)) 318 | 319 | 320 | def get_formatted_prop_start_t(ents_dict, qid, pid, i): 321 | """ 322 | Gets the formatted 'P580' (start time) from a Wikidata property. 323 | """ 324 | return format_t( 325 | get_prop_start_t(get_prop(ents_dict=ents_dict, qid=qid, pid=pid), i) 326 | ) 327 | 328 | 329 | def get_formatted_prop_end_t(ents_dict, qid, pid, i): 330 | """ 331 | Gets the formatted 'P582' (end time) from a Wikidata property. 332 | """ 333 | return format_t(get_prop_end_t(get_prop(ents_dict=ents_dict, qid=qid, pid=pid), i)) 334 | 335 | 336 | def get_prop_timespan_intersection(ents_dict, qid, pid, i, timespan, interval): 337 | """ 338 | Combines get_formatted_prop_start_end_t and prop_start_end_to_timespan. 339 | """ 340 | included_times = time_utils.make_timespan(timespan=timespan, interval=interval) 341 | 342 | start_t = get_formatted_prop_start_t(ents_dict, qid, pid, i) 343 | end_t = get_formatted_prop_end_t(ents_dict, qid, pid, i) 344 | 345 | if interval is None and timespan is None: 346 | # We want the most recent data, so return the end date if it 347 | # exists, or today's date. 348 | if end_t is not None: 349 | return 350 | 351 | else: 352 | prop_t_intersection = [ 353 | time_utils.truncate_date(date.today(), interval="daily") 354 | ] 355 | 356 | else: 357 | if start_t != None and end_t != None: 358 | if all(start_t > t for t in included_times) or all( 359 | end_t < t for t in included_times 360 | ): 361 | return 362 | 363 | else: 364 | prop_t_intersection = [ 365 | t for t in included_times if t >= start_t and t <= end_t 366 | ] 367 | 368 | elif start_t != None: 369 | if all(start_t > t for t in included_times): 370 | return 371 | 372 | else: 373 | prop_t_intersection = [t for t in included_times if t >= start_t] 374 | 375 | elif end_t != None: 376 | if all(end_t < t for t in included_times): 377 | return 378 | 379 | else: 380 | prop_t_intersection = included_times 381 | 382 | try: 383 | prop_t_intersection = [ 384 | time_utils.truncate_date(t, interval=interval) for t in prop_t_intersection 385 | ] 386 | except: 387 | return 388 | 389 | return prop_t_intersection 390 | 391 | 392 | def dir_to_topic_page(dir_name=None, ents_dict=None, qid=None): 393 | """ 394 | Allows for the checking of subject entities for a given QID. 395 | 396 | Parameters 397 | ---------- 398 | dir_name : str (default=None) 399 | The name of the directory within wikirepo.data. 400 | 401 | ents_dict : wd_utils.EntitiesDict (default=None) 402 | A dictionary with keys being Wikidata QIDs and values being their entities. 403 | 404 | qid : str (default=None) 405 | Wikidata QID for a location. 406 | 407 | Returns 408 | ------- 409 | topic_qid or None : str or None 410 | The qid for an existing topic for the location or None to cancel later steps. 411 | """ 412 | # Needs sub-topics for other wikirepo directories. 413 | name_to_topic_pid_dict = {"economic": "P8744", "geographic": "P2633"} 414 | 415 | if dir_name in name_to_topic_pid_dict: 416 | topic_pid = name_to_topic_pid_dict[dir_name] 417 | 418 | if topic_pid in load_ent(ents_dict, qid)["claims"].keys(): 419 | return get_prop_id(ents_dict, qid, topic_pid, i=0) 420 | 421 | else: 422 | return 423 | 424 | else: 425 | return 426 | 427 | 428 | def check_for_pid_topic_page( 429 | dir_name=None, 430 | ents_dict=None, 431 | qid=None, 432 | orig_qid=None, 433 | pid=None, 434 | interval=None, 435 | timespan=None, 436 | vd_or_vdd="vd", 437 | ): 438 | """ 439 | Tries to find a topic-page for the topic of the current directory and return the needed variables. 440 | 441 | Parameters 442 | ---------- 443 | dir_name : str (default=None) 444 | The name of the directory within wikirepo.data. 445 | 446 | ents_dict : wd_utils.EntitiesDict (default=None) 447 | A dictionary with keys being Wikidata QIDs and values being their entities. 448 | 449 | qid : str (default=None) 450 | Wikidata QID for a location. 451 | 452 | orig_qid : str (default=None) 453 | Maintains the original QID for assignment if qid is changed to that of the topic-page. 454 | 455 | pid : str (default=None) 456 | The Wikidata property that is being queried. 457 | 458 | timespan : two element tuple or list : contains datetime.date or tuple (default=None: (date.today(), date.today())) 459 | A tuple or list that defines the start and end dates to be queried. 460 | 461 | Note 1: if True, then the full timespan from 1-1-1 to the current day will be queried. 462 | 463 | Note 2: passing a single entry will query for that date only. 464 | 465 | interval : str (default=None) 466 | The time interval over which queries will be made. 467 | 468 | Note 1: see data.time_utils for options. 469 | 470 | Note 2: if None, then only the most recent data will be queried. 471 | 472 | vd_or_vdd : str (default=vd) 473 | Whether the function is being called in val_dict or val_dict_dict. 474 | 475 | Note: this controls the depth of the returned placeholders. 476 | 477 | Returns 478 | ------- 479 | qid, orig_qid, t_p_d, skip_assignment : str, str, dict, bool 480 | Arguments necessary to derive if and how assignment should occur. 481 | """ 482 | topic_qid = dir_to_topic_page(dir_name, ents_dict, qid) 483 | 484 | t_p_d = {} 485 | skip_assignment = False 486 | if topic_qid != None and pid in load_ent(ents_dict, topic_qid)["claims"].keys(): 487 | # A sub-page for the location that has the property exists. 488 | # Save the original QID for assignment and replace with the topic page for access. 489 | orig_qid = qid 490 | qid = topic_qid 491 | 492 | else: 493 | print_not_available(ents_dict=ents_dict, qid=qid, pid=pid, extra_msg="") 494 | # Assign no date for on interval or the most recent time in the 495 | # timespan with np.nan as a placeholder. 496 | if interval is None and timespan is None: 497 | if vd_or_vdd == "vd": 498 | t_p_d = {"no date": np.nan} 499 | else: 500 | t_p_d = {"no date": {"no date": np.nan}} 501 | else: 502 | if vd_or_vdd == "vd": 503 | t_p_d = { 504 | time_utils.truncated_latest_date( 505 | timespan=timespan, interval=interval 506 | ): np.nan 507 | } 508 | else: 509 | t_p_d = { 510 | time_utils.truncated_latest_date( 511 | timespan=timespan, interval=interval 512 | ): {get_prop_val(ents_dict, qid, pid, i=0, ignore_char=""): np.nan} 513 | } 514 | 515 | skip_assignment = True 516 | 517 | return qid, orig_qid, t_p_d, skip_assignment 518 | 519 | 520 | def t_to_prop_val_dict( 521 | dir_name=None, 522 | ents_dict=None, 523 | qids=None, 524 | pid=None, 525 | sub_pid=None, 526 | interval=None, 527 | timespan=None, 528 | ignore_char="", 529 | span=False, 530 | ): 531 | """ 532 | Gets a dictionary of property value(s) indexed by time(s) from a locational entity. 533 | 534 | Notes 535 | ----- 536 | Used to assign property values to a single column (values cannot have the same time value). 537 | 538 | Parameters 539 | ---------- 540 | dir_name : str (default=None) 541 | The name of the directory within wikirepo.data. 542 | 543 | ents_dict : wd_utils.EntitiesDict (default=None) 544 | A dictionary with keys being Wikidata QIDs and values being their entities. 545 | 546 | qids : str or list (contains strs) (default=None) 547 | Wikidata QIDs for locations. 548 | 549 | pid : str (default=None) 550 | The Wikidata property that is being queried. 551 | 552 | sub_pid : str (default=None) 553 | The Wikidata property that subsets time values. 554 | 555 | timespan : two element tuple or list : contains datetime.date or tuple (default=None: (date.today(), date.today())) 556 | A tuple or list that defines the start and end dates to be queried. 557 | 558 | Note 1: if True, then the full timespan from 1-1-1 to the current day will be queried. 559 | 560 | Note 2: passing a single entry will query for that date only. 561 | 562 | interval : str (default=None) 563 | The time interval over which queries will be made. 564 | 565 | Note 1: see data.time_utils for options. 566 | 567 | Note 2: if None, then only the most recent data will be queried. 568 | 569 | ignore_char : str (default='', no character to ignore) 570 | Characters in the output that should be ignored. 571 | 572 | span : bool (default=False) 573 | Whether to check for P580 'start time' and P582 'end time' to create spans. 574 | 575 | Returns 576 | ------- 577 | t_prop_dict : dict 578 | A dictionary of Wikidata properties indexed by their time. 579 | """ 580 | qids = utils._make_var_list(qids)[0] 581 | 582 | if interval != None: 583 | included_times = [ 584 | time_utils.truncate_date(t, interval=interval) 585 | for t in time_utils.make_timespan(timespan=timespan, interval=interval) 586 | ] 587 | else: 588 | # Triggers acceptance of a all values so that the most recent can be selected. 589 | included_times = None 590 | 591 | t_prop_dict = {} 592 | for q in qids: 593 | t_p_d = {} 594 | orig_qid = None 595 | skip_assignment = False 596 | if pid not in load_ent(ents_dict, q)["claims"].keys(): 597 | q, orig_qid, t_p_d, skip_assignment = check_for_pid_topic_page( 598 | dir_name=dir_name, 599 | ents_dict=ents_dict, 600 | qid=q, 601 | orig_qid=orig_qid, 602 | pid=pid, 603 | timespan=timespan, 604 | interval=interval, 605 | vd_or_vdd="vd", 606 | ) 607 | 608 | if skip_assignment == False: 609 | if span: 610 | for i in range(len(get_prop(ents_dict, q, pid))): 611 | prop_t_intersection = get_prop_timespan_intersection( 612 | ents_dict, q, pid, i, timespan, interval 613 | ) 614 | if prop_t_intersection != None: 615 | for t in prop_t_intersection: 616 | if t in t_p_d.keys(): 617 | t_p_d[t] = str(t_p_d[t]) 618 | t_p_d[t] += ", " + str( 619 | get_val(ents_dict, q, pid, sub_pid, i, ignore_char) 620 | ) 621 | 622 | else: 623 | t_p_d[t] = get_val( 624 | ents_dict, q, pid, sub_pid, i, ignore_char 625 | ) 626 | 627 | else: 628 | for i in range(len(get_prop(ents_dict, q, pid))): 629 | try: 630 | t = time_utils.truncate_date( 631 | get_formatted_prop_t(ents_dict, q, pid, i), 632 | interval=interval, 633 | ) 634 | except: 635 | if interval is None and timespan is None: 636 | t = "no date" 637 | 638 | else: 639 | # Assign the most recent time in the timespan. 640 | t = time_utils.truncated_latest_date( 641 | timespan=timespan, interval=interval 642 | ) 643 | 644 | if included_times is None or t in included_times: 645 | t_p_d[t] = get_val(ents_dict, q, pid, sub_pid, i, ignore_char) 646 | 647 | if orig_qid is None: 648 | t_prop_dict[q] = t_p_d 649 | else: 650 | t_prop_dict[orig_qid] = t_p_d 651 | 652 | return t_prop_dict 653 | 654 | 655 | def t_to_prop_val_dict_dict( 656 | dir_name=None, 657 | ents_dict=None, 658 | qids=None, 659 | pid=None, 660 | sub_pid=None, 661 | interval=None, 662 | timespan=None, 663 | ignore_char="", 664 | span=False, 665 | ): 666 | """ 667 | Gets a dictionary of dictionaries of multiple property values that are indexed by time(s) from a locational entity. 668 | 669 | Notes 670 | ----- 671 | Used to assign property values to separate columns (values can have the same time value) 672 | 673 | Parameters 674 | ---------- 675 | dir_name : str (default=None) 676 | The name of the directory within wikirepo.data. 677 | 678 | ents_dict : wd_utils.EntitiesDict (default=None) 679 | A dictionary with keys being Wikidata QIDs and values being their entities. 680 | 681 | qids : str or list (contains strs) (default=None) 682 | Wikidata QIDs for locations. 683 | 684 | pid : str (default=None) 685 | The Wikidata property that is being queried. 686 | 687 | sub_pid : str (default=None) 688 | The Wikidata property that subsets time values. 689 | 690 | timespan : two element tuple or list : contains datetime.date or tuple (default=None: (date.today(), date.today())) 691 | A tuple or list that defines the start and end dates to be queried. 692 | 693 | Note 1: if True, then the full timespan from 1-1-1 to the current day will be queried. 694 | 695 | Note 2: passing a single entry will query for that date only. 696 | 697 | interval : str (default=None) 698 | The time interval over which queries will be made. 699 | 700 | Note 1: see data.time_utils for options. 701 | 702 | Note 2: if None, then only the most recent data will be queried. 703 | 704 | ignore_char : str (default='', no character to ignore) 705 | Characters in the output that should be ignored. 706 | 707 | span : bool (default=False) 708 | Whether to check for P580 'start time' and P582 'end time' to create spans. 709 | 710 | Returns 711 | ------- 712 | t_prop_dict : dict 713 | A dictionary of Wikidata properties indexed by their time. 714 | """ 715 | qids = utils._make_var_list(qids)[0] 716 | 717 | if interval is None: 718 | # Triggers acceptance of a all values so that the most recent can be selected. 719 | included_times = None 720 | 721 | else: 722 | included_times = [ 723 | time_utils.truncate_date(t, interval=interval) 724 | for t in time_utils.make_timespan(timespan=timespan, interval=interval) 725 | ] 726 | t_prop_dict = {} 727 | for q in qids: 728 | t_p_d = {} 729 | orig_qid = None 730 | skip_assignment = False 731 | if pid not in load_ent(ents_dict, q)["claims"].keys(): 732 | q, orig_qid, t_p_d, skip_assignment = check_for_pid_topic_page( 733 | dir_name=dir_name, 734 | ents_dict=ents_dict, 735 | qid=q, 736 | orig_qid=orig_qid, 737 | pid=pid, 738 | timespan=timespan, 739 | interval=interval, 740 | vd_or_vdd="vdd", 741 | ) 742 | 743 | if skip_assignment == False: 744 | if span: 745 | for i in range(len(get_prop(ents_dict, q, pid))): 746 | if "qualifiers" in get_prop(ents_dict, q, pid)[i].keys(): 747 | prop_t_intersection = get_prop_timespan_intersection( 748 | ents_dict, q, pid, i, timespan, interval 749 | ) 750 | 751 | else: 752 | prop_t_intersection = included_times 753 | 754 | if prop_t_intersection is not None: 755 | for t in prop_t_intersection: 756 | if t not in t_p_d.keys(): 757 | t_p_d[t] = {} 758 | t_p_d[t][ 759 | get_prop_val(ents_dict, q, pid, i, ignore_char) 760 | ] = get_val(ents_dict, q, pid, sub_pid, i, ignore_char) 761 | 762 | else: 763 | for i in range(len(get_prop(ents_dict, q, pid))): 764 | try: 765 | t = time_utils.truncate_date( 766 | get_formatted_prop_t(ents_dict, q, pid, i), 767 | interval=interval, 768 | ) 769 | except: 770 | if interval is None and timespan is None: 771 | t = "no date" 772 | 773 | else: 774 | # Assign the most recent time in the timespan. 775 | t = time_utils.truncated_latest_date( 776 | timespan=timespan, interval=interval 777 | ) 778 | 779 | if included_times is None or t in included_times: 780 | if t not in t_p_d.keys(): 781 | t_p_d[t] = {} 782 | t_p_d[t][ 783 | get_prop_val(ents_dict, q, pid, i, ignore_char) 784 | ] = get_val(ents_dict, q, pid, sub_pid, i, ignore_char) 785 | 786 | if orig_qid is None: 787 | t_prop_dict[q] = t_p_d 788 | else: 789 | t_prop_dict[orig_qid] = t_p_d 790 | 791 | return t_prop_dict 792 | 793 | 794 | class EntitiesDict(dict): 795 | """ 796 | A dictionary for storing WikiData entities. 797 | 798 | Keywords are QIDs, and values are QID entities. 799 | """ 800 | 801 | __slots__ = () 802 | 803 | def __init__(self, *args, **kwargs): 804 | super(EntitiesDict, self).__init__(*args, **kwargs) 805 | 806 | def __repr__(self): 807 | return "%s" % self.__class__ 808 | 809 | def __str__(self): 810 | return """ 811 | The EntitiesDict class is meant to store WikiData entities. 812 | - Keys are QIDs 813 | - Values are QID entities 814 | 815 | Because of the potential size, print() has been disabled. 816 | 817 | All other dictionary methods are included, as well as: 818 | key_lbls - a list of labels of the QID keys 819 | _print - prints the full dictionary 820 | """ 821 | 822 | def key_lbls(self): 823 | """ 824 | Provides a list of the labels of all entities within the dictionary. 825 | """ 826 | return [get_lbl(ents_dict=self, pq_id=q) for q in self.keys()] 827 | 828 | def _print(self): 829 | """ 830 | Prints the full entities dictionary (not advisable). 831 | """ 832 | return {k: v for k, v in self.items()} 833 | -------------------------------------------------------------------------------- /src/wikirepo/maps/__init__.py: -------------------------------------------------------------------------------- 1 | # from wikirepo.maps.upload import upload # function call wikirepo.maps.upload() 2 | # from wikirepo.maps.query import query # function call wikirepo.maps.query() 3 | -------------------------------------------------------------------------------- /src/wikirepo/maps/map_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions querying map shapefiles for mapping. 3 | 4 | Reference: https://plotly.com/python/choropleth-maps/. 5 | 6 | Contents 7 | -------- 8 | """ 9 | 10 | # import pandas as pd 11 | # import plotly.express as px 12 | # import matplotlib.pyplot as plt 13 | 14 | # from wikirepo.data import data_utils 15 | -------------------------------------------------------------------------------- /src/wikirepo/maps/query.py: -------------------------------------------------------------------------------- 1 | """ 2 | A function that calls map data from Wikidata. 3 | 4 | Note: the purpose of this module is for a wikirepo.maps.query() function call. 5 | 6 | Contents 7 | -------- 8 | query 9 | """ 10 | 11 | 12 | # def query(): 13 | # return 14 | -------------------------------------------------------------------------------- /src/wikirepo/maps/upload.py: -------------------------------------------------------------------------------- 1 | """ 2 | A function that uploads map data from Wikidata, 3 | 4 | Note: the purpose of this module is for a wikirepo.maps.upload() function call. 5 | 6 | Contents 7 | -------- 8 | upload 9 | """ 10 | 11 | 12 | # def upload(): 13 | # return 14 | -------------------------------------------------------------------------------- /src/wikirepo/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities 3 | --------- 4 | 5 | Utility functions for general operations. 6 | 7 | Contents 8 | _make_var_list, 9 | _return_given_type, 10 | try_float, 11 | round_if_int, 12 | gen_list_of_lists, 13 | check_str_similarity, 14 | check_str_args 15 | """ 16 | 17 | from difflib import SequenceMatcher 18 | 19 | 20 | def _make_var_list(var): 21 | """ 22 | Allows for a one line check for if a variable is a list. 23 | """ 24 | var_was_str = False 25 | if isinstance(var, str): 26 | var_was_str = True 27 | var = [var] 28 | 29 | return var, var_was_str 30 | 31 | 32 | def _return_given_type(var, var_was_str): 33 | """ 34 | Allows for a one line return or list or string variables. 35 | """ 36 | if var_was_str: 37 | # Check if there's only one element. 38 | if len(var) == 1: 39 | return var[0] 40 | 41 | else: 42 | return var 43 | 44 | else: 45 | return var 46 | 47 | 48 | def try_float(string): 49 | """Checks if a string is a float.""" 50 | try: 51 | return float(string) 52 | 53 | except: 54 | return string 55 | 56 | 57 | def round_if_int(val): 58 | """Rounds off the decimal of a value if it is an integer float.""" 59 | if isinstance(val, float) and val.is_integer(): 60 | val = int(val) 61 | 62 | return val 63 | 64 | 65 | def gen_list_of_lists(original_list, new_structure): 66 | """Generates a list of lists with a given structure from a given list.""" 67 | assert len(original_list) == sum( 68 | new_structure 69 | ), "The number of elements in the original list and desired structure don't match." 70 | 71 | return [ 72 | [original_list[i + sum(new_structure[:j])] for i in range(new_structure[j])] 73 | for j in range(len(new_structure)) 74 | ] 75 | 76 | 77 | def check_str_similarity(str_1, str_2): 78 | """Checks the similarity of two strings.""" 79 | return SequenceMatcher(None, str_1, str_2).ratio() 80 | 81 | 82 | def check_str_args(arguments, valid_args): 83 | """ 84 | Checks whether a str argument is valid, and makes suggestions if not. 85 | """ 86 | if isinstance(arguments, str): 87 | if arguments in valid_args: 88 | return arguments 89 | 90 | suggestions = [] 91 | for v in valid_args: 92 | similarity_score = round(check_str_similarity(str_1=arguments, str_2=v), 2) 93 | arg_and_score = (v, similarity_score) 94 | suggestions.append(arg_and_score) 95 | 96 | ordered_suggestions = sorted(suggestions, key=lambda x: x[1], reverse=True) 97 | 98 | print(f"'{arguments}' is not a valid argument for the given function.") 99 | print(f"The closest valid options to '{arguments}' are:") 100 | for item in ordered_suggestions[:5]: 101 | print(item) 102 | 103 | raise ValueError("An invalid string has been passed.") 104 | 105 | elif isinstance(arguments, list): 106 | # Check arguments, and remove them if they're invalid. 107 | for a in arguments: 108 | check_str_args(arguments=a, valid_args=valid_args) 109 | 110 | return arguments 111 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewtavis/wikirepo/5618479c0065dacce2f682441ae6072120b15865/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Fixtures 3 | -------- 4 | """ 5 | 6 | from datetime import date 7 | 8 | import pytest 9 | import wikirepo 10 | from wikirepo.data import data_utils, lctn_utils, wd_utils 11 | 12 | entities_dict = wd_utils.EntitiesDict() 13 | countries = ["Germany"] 14 | depth = 0 15 | timespan = (date(2009, 1, 1), date(2010, 1, 1)) 16 | interval = "yearly" 17 | 18 | # Test of values for a given timespan. 19 | df_timespan = wikirepo.data.query( 20 | ents_dict=entities_dict, 21 | locations=countries, 22 | depth=depth, 23 | timespan=timespan, 24 | interval=interval, 25 | demographic_props=[ 26 | "ethnic_div", 27 | "life_expectancy", 28 | "literacy", 29 | "out_of_school_children", 30 | "population", 31 | ], 32 | economic_props=[ 33 | "gdp_ppp", 34 | "gini", 35 | "inflation_rate", 36 | "median_income", 37 | "nom_gdp_per_cap", 38 | "nom_gdp", 39 | "ppp_gdp_per_cap", 40 | "total_reserves", 41 | "unemployment", 42 | ], 43 | electoral_poll_props=False, 44 | electoral_result_props=False, 45 | geographic_props=["area", "continent", "country",], 46 | institutional_props=[ 47 | "bti_gov_idx", 48 | "bti_status_idx", 49 | "capital", 50 | "fh_category", 51 | "human_dev_idx", 52 | "org_membership", 53 | ], 54 | political_props="executive", 55 | misc_props="country_abbr", 56 | verbose=True, 57 | ) 58 | 59 | # Test of most recent values. 60 | df_most_recent = wikirepo.data.query( 61 | ents_dict=entities_dict, 62 | locations=countries, 63 | depth=depth, 64 | timespan=None, 65 | interval=None, 66 | demographic_props=[ 67 | "ethnic_div", 68 | "life_expectancy", 69 | "literacy", 70 | "out_of_school_children", 71 | "population", 72 | ], 73 | economic_props=[ 74 | "gdp_ppp", 75 | "gini", 76 | "inflation_rate", 77 | "median_income", 78 | "nom_gdp_per_cap", 79 | "nom_gdp", 80 | "ppp_gdp_per_cap", 81 | "total_reserves", 82 | "unemployment", 83 | ], 84 | electoral_poll_props=False, 85 | electoral_result_props=False, 86 | geographic_props=["area", "continent", "country",], 87 | institutional_props=[ 88 | "bti_gov_idx", 89 | "bti_status_idx", 90 | "capital", 91 | "fh_category", 92 | "human_dev_idx", 93 | "org_membership", 94 | ], 95 | political_props="executive", 96 | misc_props="country_abbr", 97 | verbose=True, 98 | ) 99 | df_most_recent = data_utils.split_col_val_dates(df_most_recent, col="population") 100 | 101 | entities_dict_bundeslands = wd_utils.EntitiesDict() 102 | depth = 1 103 | sub_lctns = True 104 | bundeslands_dict = lctn_utils.gen_lctns_dict( 105 | ents_dict=entities_dict_bundeslands, 106 | depth=depth, 107 | locations=countries, 108 | sub_lctns=sub_lctns, 109 | timespan=timespan, 110 | interval=interval, 111 | verbose=True, 112 | ) 113 | df_bundeslands = wikirepo.data.query( 114 | ents_dict=entities_dict_bundeslands, 115 | locations=bundeslands_dict, 116 | depth=depth, 117 | timespan=timespan, 118 | interval=interval, 119 | demographic_props="population", 120 | economic_props=False, 121 | electoral_poll_props=False, 122 | electoral_result_props=False, 123 | geographic_props=False, 124 | institutional_props="capital", 125 | political_props=False, 126 | misc_props="sub_country_abbr", 127 | verbose=True, 128 | ) 129 | 130 | 131 | @pytest.fixture(params=[entities_dict]) 132 | def ents_dict(request): 133 | return request.param 134 | 135 | 136 | @pytest.fixture(params=[bundeslands_dict]) 137 | def lctns_dict(request): 138 | return request.param 139 | 140 | 141 | @pytest.fixture(params=[df_bundeslands]) 142 | def df(request): 143 | return request.param 144 | 145 | 146 | @pytest.fixture(params=["Q183"]) 147 | def qid(request): 148 | return request.param 149 | 150 | 151 | @pytest.fixture(params=["P1082"]) 152 | def pop_pid(request): 153 | return request.param 154 | 155 | 156 | @pytest.fixture(params=["P6"]) 157 | def exec_pid(request): 158 | return request.param 159 | -------------------------------------------------------------------------------- /tests/test_data_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data Utilities Tests 3 | -------------------- 4 | """ 5 | 6 | from wikirepo.data import data_utils 7 | 8 | 9 | def test_interp_by_subset(df): 10 | df_interp = data_utils.interp_by_subset( 11 | df=df, depth=1, col_name="sub_abbr", method="pad", limit_direction="both" 12 | ) 13 | assert df_interp["sub_abbr"].isnull().values.any() == False 14 | 15 | 16 | def test_sum_df_prop_vals(df): 17 | df.loc[ 18 | df.loc[(df["sub_lctn"] == "Berlin") & (df["year"] == "2009")].index, 19 | "population", 20 | ] = 100 21 | 22 | df.loc[ 23 | df.loc[(df["sub_lctn"] == "Berlin") & (df["year"] == "2010")].index, 24 | "population", 25 | ] = 100 26 | 27 | df.loc[ 28 | df.loc[(df["sub_lctn"] == "Hamburg") & (df["year"] == "2009")].index, 29 | "population", 30 | ] = 100 31 | 32 | df.loc[ 33 | df.loc[(df["sub_lctn"] == "Hamburg") & (df["year"] == "2010")].index, 34 | "population", 35 | ] = 100 36 | 37 | df_test = data_utils.sum_df_prop_vals( 38 | df=df, 39 | target_lctn="Berlin", 40 | vals_lctn="Hamburg", 41 | lctn_col="sub_lctn", 42 | time_col="year", 43 | prop_col="population", 44 | subtract=False, 45 | drop_vals_lctn=True, 46 | ) 47 | 48 | assert ( 49 | df_test.loc[df_test.loc[df_test["sub_lctn"] == "Berlin"].index[0], "population"] 50 | > df.loc[df.loc[df["sub_lctn"] == "Berlin"].index[0], "population"] 51 | ) 52 | assert "Hamburg" not in list(df_test["sub_lctn"]) 53 | 54 | df_test = data_utils.sum_df_prop_vals( 55 | df=df, 56 | target_lctn="Berlin", 57 | vals_lctn="Hamburg", 58 | lctn_col="sub_lctn", 59 | time_col=None, 60 | prop_col="population", 61 | subtract=True, 62 | drop_vals_lctn=True, 63 | ) 64 | 65 | assert ( 66 | df_test.loc[df_test.loc[df_test["sub_lctn"] == "Berlin"].index[0], "population"] 67 | < df.loc[df.loc[df["sub_lctn"] == "Berlin"].index[0], "population"] 68 | ) 69 | assert "Hamburg" not in list(df_test["sub_lctn"]) 70 | -------------------------------------------------------------------------------- /tests/test_lctn_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Location Utilities Tests 3 | ------------------------ 4 | """ 5 | 6 | from wikirepo.data import lctn_utils 7 | 8 | 9 | def test_lctn_to_qid_dict(): 10 | assert isinstance(lctn_utils.lctn_to_qid_dict(), dict) 11 | 12 | 13 | def test_qid_to_lctn_dict(): 14 | assert isinstance(lctn_utils.qid_to_lctn_dict(), dict) 15 | 16 | 17 | def test_incl_lctn_lbls(): 18 | assert len(lctn_utils.incl_lctn_lbls("world")) == 1 19 | assert len(lctn_utils.incl_lctn_lbls("continent")) == 7 20 | assert isinstance(lctn_utils.incl_lctn_lbls("country"), list) 21 | assert isinstance(lctn_utils.incl_lctn_lbls("region"), list) 22 | 23 | 24 | def test_incl_lctn_ids(): 25 | assert isinstance(lctn_utils.incl_lctn_ids(), list) 26 | 27 | 28 | def test_qid_to_lctn_lbl(qid): 29 | assert isinstance(lctn_utils.qid_to_lctn_lbl(qid), str) 30 | 31 | 32 | def test_merge_lctn_dicts(lctns_dict): 33 | assert isinstance( 34 | lctn_utils.merge_lctn_dicts(ld1=lctns_dict, ld2=lctns_dict), 35 | lctn_utils.LocationsDict, 36 | ) 37 | 38 | 39 | def test_LocationsDict(ents_dict, lctns_dict): 40 | assert len(lctns_dict.key_lbls_list()) == 17 # Germany and all its states 41 | assert lctns_dict.get_depth() == 1 42 | assert lctns_dict.get_qids_at_depth(depth=0) == ["Q183"] 43 | assert isinstance(lctns_dict.key_lbls_at_depth(ents_dict=ents_dict, depth=0), list) 44 | -------------------------------------------------------------------------------- /tests/test_time_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Time Utilities Tests 3 | -------------------- 4 | """ 5 | 6 | from datetime import date 7 | 8 | import pandas as pd 9 | import pytest 10 | from wikirepo.data import time_utils 11 | 12 | 13 | def test_interval_to_col_name(): 14 | assert time_utils.interval_to_col_name("yearly") == "year" 15 | assert time_utils.interval_to_col_name("monthly") == "month" 16 | assert time_utils.interval_to_col_name("weekly") == "week" 17 | assert time_utils.interval_to_col_name("daily") == "day" 18 | 19 | 20 | def test_truncate_date(): 21 | d = date(2021, 1, 1) 22 | d_tup = (2021, 1, 1) 23 | assert time_utils.truncate_date(d=d_tup, interval="yearly") == "2021" 24 | assert time_utils.truncate_date(d=d, interval="yearly") == "2021" 25 | assert time_utils.truncate_date(d=d, interval="monthly") == "2021-01" 26 | assert time_utils.truncate_date(d=d, interval="weekly") == "2021-00" 27 | assert time_utils.truncate_date(d=d, interval="daily") == "2021-01-01" 28 | assert time_utils.truncate_date(d="2021", interval="Yearly") == "2021" 29 | assert time_utils.truncate_date(d="2021", interval=None) == "2021" 30 | with pytest.raises(AssertionError): 31 | assert time_utils.truncate_date(d="2021", interval=5) == "2021" 32 | 33 | 34 | def test_truncate_date_col(): 35 | df = pd.DataFrame([date(2021, 1, 1), date(2021, 1, 1)], columns=["time"]) 36 | df = time_utils.truncate_date_col(df=df, col="time", interval="yearly") 37 | 38 | assert list(df["time"]) == ["2021", "2021"] 39 | 40 | 41 | def test_incl_intervals(): 42 | assert time_utils.incl_intervals() == ["yearly", "monthly", "weekly", "daily"] 43 | 44 | 45 | def test_make_timespan(): 46 | assert time_utils.make_timespan(timespan=None, interval=None) == None 47 | 48 | assert time_utils.make_timespan(timespan=date(2021, 1, 1), interval="yearly") == [ 49 | date(2021, 1, 1) 50 | ] 51 | 52 | assert time_utils.make_timespan( 53 | timespan=(date(2021, 1, 1), date(2021, 1, 1)), interval="yearly" 54 | ) == [date(2021, 1, 1)] 55 | 56 | assert time_utils.make_timespan( 57 | timespan=(date(2020, 1, 1), date(2021, 1, 1)), interval="yearly" 58 | ) == [date(2021, 1, 1), date(2020, 1, 1)] 59 | 60 | assert time_utils.make_timespan( 61 | timespan=(date(2021, 1, 1), date(2020, 1, 1)), interval="yearly" 62 | ) == [date(2020, 1, 1), date(2021, 1, 1)] 63 | 64 | with pytest.raises(AssertionError): 65 | assert time_utils.make_timespan( 66 | timespan=(date(2021, 1, 1), date(2020, 1, 1)), interval="year" 67 | ) == [date(2020, 1, 1), date(2021, 1, 1)] 68 | 69 | assert time_utils.make_timespan( 70 | timespan=(date(2021, 1, 1), date(2021, 3, 1)), interval="monthly" 71 | ) == [date(2021, 3, 1), date(2021, 2, 1), date(2021, 1, 1)] 72 | 73 | assert time_utils.make_timespan( 74 | timespan=(date(2021, 1, 1), date(2021, 2, 1)), interval="weekly" 75 | ) == [ 76 | date(2021, 1, 29), 77 | date(2021, 1, 22), 78 | date(2021, 1, 15), 79 | date(2021, 1, 8), 80 | date(2021, 1, 1), 81 | ] 82 | 83 | assert time_utils.make_timespan( 84 | timespan=(date(2021, 1, 1), date(2021, 1, 7)), interval="daily" 85 | ) == [ 86 | date(2021, 1, 7), 87 | date(2021, 1, 6), 88 | date(2021, 1, 5), 89 | date(2021, 1, 4), 90 | date(2021, 1, 3), 91 | date(2021, 1, 2), 92 | date(2021, 1, 1), 93 | ] 94 | 95 | 96 | def test_latest_date(): 97 | timespan = (date(2021, 1, 1), date(2021, 1, 2)) 98 | assert time_utils.latest_date(timespan=timespan) == date(2021, 1, 2) 99 | assert time_utils.latest_date(timespan=timespan[::-1]) == date(2021, 1, 2) 100 | 101 | 102 | def test_earliest_date(): 103 | timespan = (date(2021, 1, 1), date(2021, 1, 2)) 104 | assert time_utils.earliest_date(timespan=timespan) == date(2021, 1, 1) 105 | assert time_utils.earliest_date(timespan=timespan[::-1]) == date(2021, 1, 1) 106 | 107 | 108 | def test_truncated_latest_date(): 109 | timespan = (date(2020, 1, 1), date(2021, 1, 1)) 110 | assert ( 111 | time_utils.truncated_latest_date(timespan=timespan, interval="yearly") == "2021" 112 | ) 113 | 114 | 115 | def test_truncated_earliest_date(): 116 | timespan = (date(2020, 1, 1), date(2021, 1, 1)) 117 | assert ( 118 | time_utils.truncated_earliest_date(timespan=timespan, interval="yearly") 119 | == "2020" 120 | ) 121 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities Tests 3 | --------------- 4 | """ 5 | 6 | import pytest 7 | from wikirepo import utils 8 | 9 | 10 | def test__make_var_list(): 11 | assert utils._make_var_list("word") == (["word"], True) 12 | 13 | 14 | def test__return_given_type(): 15 | assert utils._return_given_type(["word"], var_was_str=False) == ["word"] 16 | assert utils._return_given_type(["word"], var_was_str=True) == "word" 17 | assert utils._return_given_type(["word_0", "word_1"], var_was_str=True) == [ 18 | "word_0", 19 | "word_1", 20 | ] 21 | 22 | 23 | def test_try_float(): 24 | assert utils.try_float("1.0") == 1.0 25 | assert utils.try_float("word") == "word" 26 | 27 | 28 | def test_round_if_int(): 29 | assert utils.round_if_int(1.0) == 1 30 | assert utils.round_if_int(1.5) == 1.5 31 | 32 | 33 | def test_gen_list_of_lists(): 34 | test_list = [0, 1, 2, 3, 4, 5, 6, 7, 8] 35 | assert utils.gen_list_of_lists( 36 | original_list=test_list, new_structure=[3, 3, 3] 37 | ) == [[0, 1, 2], [3, 4, 5], [6, 7, 8]] 38 | 39 | 40 | def test_check_str_similarity(): 41 | assert utils.check_str_similarity("word", "word") == 1 42 | 43 | 44 | def test_check_str_args(): 45 | assert utils.check_str_args("word_0", ["word_0", "word_1"]) == "word_0" 46 | assert utils.check_str_args(["word_0", "word_1"], ["word_0", "word_1"]) == [ 47 | "word_0", 48 | "word_1", 49 | ] 50 | with pytest.raises(ValueError): 51 | utils.check_str_args("word_2", ["word_0", "word_1"]) 52 | -------------------------------------------------------------------------------- /tests/test_wd_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wikidata Utilities Tests 3 | ------------------------ 4 | """ 5 | 6 | from wikirepo.data import wd_utils 7 | 8 | entities_dict = wd_utils.EntitiesDict() 9 | 10 | 11 | def test_load_ent(qid): 12 | wd_utils.load_ent(ents_dict=entities_dict, pq_id=qid) 13 | 14 | 15 | def test_check_in_ents_dict(ents_dict, qid): 16 | wd_utils.check_in_ents_dict(ents_dict=ents_dict, qid=qid) 17 | 18 | 19 | def test_is_wd_id(qid, pop_pid): 20 | assert wd_utils.is_wd_id(qid) == True 21 | 22 | assert wd_utils.is_wd_id(pop_pid) == True 23 | 24 | assert wd_utils.is_wd_id("Not") == False 25 | 26 | 27 | def test_prop_has_many_entries(): 28 | assert wd_utils.prop_has_many_entries(["No"]) == False 29 | assert wd_utils.prop_has_many_entries(["It", "does"]) == True 30 | 31 | 32 | def test_get_lbl(ents_dict, pop_pid): 33 | assert wd_utils.get_lbl(ents_dict=None, pq_id=None) is None 34 | assert isinstance(wd_utils.get_lbl(ents_dict=ents_dict, pq_id=pop_pid), str) 35 | 36 | 37 | def test_get_prop(ents_dict, qid, pop_pid): 38 | assert isinstance( 39 | wd_utils.get_prop(ents_dict=ents_dict, qid=qid, pid=pop_pid)[0], dict 40 | ) 41 | 42 | 43 | def test_get_prop_id(ents_dict, qid, exec_pid): 44 | assert wd_utils.is_wd_id( 45 | wd_utils.get_prop_id(ents_dict=ents_dict, qid=qid, pid=exec_pid, i=0) 46 | ) 47 | 48 | 49 | def test_get_prop_val(ents_dict, qid, pop_pid, exec_pid): 50 | assert isinstance( 51 | wd_utils.get_prop_val( 52 | ents_dict=entities_dict, qid=qid, pid=pop_pid, i=0, ignore_char="" 53 | ), 54 | int, 55 | ) 56 | 57 | assert isinstance( 58 | wd_utils.get_prop_val( 59 | ents_dict=entities_dict, qid=qid, pid=exec_pid, i=0, ignore_char="" 60 | ), 61 | str, 62 | ) 63 | --------------------------------------------------------------------------------