├── datacommons_client
├── models
│ ├── __init__.py
│ ├── resolve.py
│ ├── node.py
│ ├── base.py
│ └── observation.py
├── tests
│ ├── README.MD
│ ├── test_utils.py
│ ├── test_decorators.py
│ ├── models
│ │ ├── test_resolve_models.py
│ │ ├── test_node_models.py
│ │ └── test_observation_models.py
│ ├── test_names.py
│ ├── endpoints
│ │ ├── test_error_handling.py
│ │ ├── test_observation_endpoint.py
│ │ ├── test_payloads.py
│ │ └── test_resolve_endpoint.py
│ ├── test_dataframes.py
│ └── utils
│ │ └── test_graph.py
├── utils
│ ├── __init__.py
│ ├── decorators.py
│ ├── names.py
│ ├── error_handling.py
│ ├── dataframes.py
│ └── data_processing.py
├── endpoints
│ ├── __init__.py
│ ├── payloads.py
│ ├── resolve.py
│ ├── base.py
│ └── observation.py
├── __init__.py
├── README.md
└── client.py
├── datacommons_pandas
├── core.py
├── key.py
├── node.py
├── places.py
├── sparql.py
├── utils.py
├── requests.py
├── stat_vars.py
├── test
│ └── __init__.py
├── examples
│ ├── __init__.py
│ └── df_builder.py
├── README.md
├── __init__.py
├── setup.py
└── CHANGELOG.md
├── requirements.txt
├── notebooks
├── intro_data_science
│ └── README.md
└── README.md
├── cloudbuild.yaml
├── .github
└── ISSUE_TEMPLATE
│ ├── default-template.md
│ └── bug_report.md
├── datacommons
├── examples
│ ├── __init__.py
│ ├── query.py
│ ├── core.py
│ └── places.py
├── test
│ ├── __init__.py
│ ├── set_api_key_test.py
│ ├── sparql_test.py
│ └── node_test.py
├── key.py
├── README.md
├── requests.py
├── __init__.py
├── setup.py
├── node.py
├── sparql.py
├── utils.py
├── core.py
└── stat_vars.py
├── CONTRIBUTING.md
├── docs
├── development.md
└── release.md
├── .gitignore
├── pyproject.toml
└── run_test.sh
/datacommons_client/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datacommons_client/tests/README.MD:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datacommons_client/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datacommons_client/endpoints/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datacommons_pandas/core.py:
--------------------------------------------------------------------------------
1 | ../datacommons/core.py
--------------------------------------------------------------------------------
/datacommons_pandas/key.py:
--------------------------------------------------------------------------------
1 | ../datacommons/key.py
--------------------------------------------------------------------------------
/datacommons_pandas/node.py:
--------------------------------------------------------------------------------
1 | ../datacommons/node.py
--------------------------------------------------------------------------------
/datacommons_pandas/places.py:
--------------------------------------------------------------------------------
1 | ../datacommons/places.py
--------------------------------------------------------------------------------
/datacommons_pandas/sparql.py:
--------------------------------------------------------------------------------
1 | ../datacommons/sparql.py
--------------------------------------------------------------------------------
/datacommons_pandas/utils.py:
--------------------------------------------------------------------------------
1 | ../datacommons/utils.py
--------------------------------------------------------------------------------
/datacommons_pandas/requests.py:
--------------------------------------------------------------------------------
1 | ../datacommons/requests.py
--------------------------------------------------------------------------------
/datacommons_pandas/stat_vars.py:
--------------------------------------------------------------------------------
1 | ../datacommons/stat_vars.py
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | isort==5.13.2
2 | mock
3 | pandas
4 | pytest
5 | requests==2.32.0
6 | typing_extensions==4.12.2
7 | yapf==0.40.2
8 | pydantic>=2.11
--------------------------------------------------------------------------------
/notebooks/intro_data_science/README.md:
--------------------------------------------------------------------------------
1 | All notebooks have been updated to use the V2 Python APIs and are found in the `v2/intro_data_science` directory.
--------------------------------------------------------------------------------
/cloudbuild.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 | - id: api_python
3 | name: python:3.10-slim
4 | entrypoint: /bin/bash
5 | args:
6 | - -c
7 | - "./run_test.sh -s && hatch run test:all"
8 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/default-template.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Default template
3 | about: 'Create an issue for all other questions about the API '
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 |
11 |
--------------------------------------------------------------------------------
/datacommons_client/utils/decorators.py:
--------------------------------------------------------------------------------
1 | from functools import wraps
2 |
3 | try:
4 | import pandas as pd
5 | except ImportError:
6 | pd = None
7 |
8 |
9 | def requires_pandas(func):
10 | """Decorator to check if Pandas is available before executing a method."""
11 |
12 | @wraps(func)
13 | def wrapper(*args, **kwargs):
14 | if pd is None:
15 | raise ImportError("Pandas is required for this method")
16 | return func(*args, **kwargs)
17 |
18 | return wrapper
19 |
--------------------------------------------------------------------------------
/datacommons/examples/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/datacommons/test/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/datacommons_pandas/test/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/datacommons_pandas/examples/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/datacommons_client/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "2.1.4"
2 | """
3 | Data Commons Client Package
4 |
5 | This package provides a Python client for interacting with the Data Commons API.
6 | """
7 |
8 | from datacommons_client.client import DataCommonsClient
9 | from datacommons_client.endpoints.base import API
10 | from datacommons_client.endpoints.node import NodeEndpoint
11 | from datacommons_client.endpoints.observation import ObservationEndpoint
12 | from datacommons_client.endpoints.resolve import ResolveEndpoint
13 |
14 | __all__ = [
15 | "DataCommonsClient",
16 | "API",
17 | "NodeEndpoint",
18 | "ObservationEndpoint",
19 | "ResolveEndpoint",
20 | ]
21 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve the API
4 | title: "[BUG] Description of bug"
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Additional context**
27 | Add any other context about the problem here.
28 |
--------------------------------------------------------------------------------
/datacommons/key.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ API key related functions.
15 | """
16 |
17 | import os
18 |
19 | # Environment variable for API key.
20 | _KEY_ENV = 'DC_API_KEY'
21 |
22 |
23 | def set_api_key(api_key):
24 | os.environ[_KEY_ENV] = api_key
25 |
26 |
27 | def get_api_key():
28 | return os.environ.get(_KEY_ENV, '')
29 |
--------------------------------------------------------------------------------
/datacommons/test/set_api_key_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API unit tests.
15 |
16 | Unit tests setting the API Key.
17 | """
18 | import unittest
19 |
20 | import datacommons.key as key
21 |
22 | _KEY = "test-api-key"
23 |
24 |
25 | class TestApiKey(unittest.TestCase):
26 | """Unit test for setting or not setting the API Key."""
27 |
28 | def test_set_api_key(self):
29 | key.set_api_key(_KEY)
30 | self.assertEqual(key.get_api_key(), _KEY)
31 |
32 |
33 | if __name__ == '__main__':
34 | unittest.main()
35 |
--------------------------------------------------------------------------------
/datacommons_client/README.md:
--------------------------------------------------------------------------------
1 | # Data Commons Python API
2 |
3 | This is a Python library for accessing data in the Data Commons Graph.
4 |
5 | To get started, install this package from pip.
6 |
7 | ```bash
8 | pip install datacommons-client
9 | ```
10 |
11 | To get additional functionality to work with Pandas DataFrames, install the package
12 | with the optional Pandas dependency.
13 |
14 | ```bash
15 | pip install "datacommons-client[Pandas]"
16 | ```
17 |
18 | Once the package is installed, import `datacommons_client`.
19 |
20 | ```python
21 | import datacommons_client as dc
22 | ```
23 |
24 | For more detail on getting started with the API, please visit .
25 |
26 | ## About Data Commons
27 |
28 | [Data Commons](https://datacommons.org/) is an open knowledge repository that
29 | provides a unified view across multiple public data sets and statistics. You can
30 | view what [datasets](https://datacommons.org/datasets) are currently ingested
31 | and browse the graph using our [browser](https://datacommons.org/browser).
32 |
33 | ## License
34 |
35 | Apache 2.0
36 |
37 | ## Support
38 |
39 | For questions, please send an email to `support@datacommons.org`.
40 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
25 | ## Community Guidelines
26 |
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 |
--------------------------------------------------------------------------------
/docs/development.md:
--------------------------------------------------------------------------------
1 | # Python API Development
2 |
3 | This client library supports `python>=3.10`.
4 |
5 | ## Set up
6 | If you haven't already, clone this repository.
7 |
8 | ```bash
9 | git clone https://github.com/datacommonsorg/api-python.git
10 | cd api-python
11 | ```
12 |
13 | To set up the Python environment for development, run:
14 |
15 | ```bash
16 | ./run_test.sh -s
17 | ```
18 |
19 | This will install `hatch`, which is the main tool used to manage the
20 | environment, dependencies, and development tools. You can also manually install
21 | `hatch` and create a virtual environment.
22 |
23 | ```bash
24 | pip install hatch
25 | hatch env create
26 | ```
27 |
28 | ## Code style and linting
29 | We use `isort` and `yapf` for code formatting. Check formatting with:
30 |
31 | ```bash
32 | hatch run lint:check
33 | ```
34 |
35 | To automatically fix formatting run:
36 |
37 | ```bash
38 | hatch run lint:format
39 | ```
40 |
41 | ## Running tests
42 |
43 | To test, run:
44 |
45 | ```bash
46 | hatch run test:all
47 | ```
48 |
49 | To debug the continuous integration tests, run:
50 |
51 | ```bash
52 | gcloud builds submit . --project=datcom-ci --config=cloudbuild.yaml
53 | ```
54 |
55 | Both commands will run the same set of tests.
--------------------------------------------------------------------------------
/datacommons_client/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | from datacommons_client.utils.data_processing import group_variables_by_entity
2 |
3 |
4 | def test_group_variables_by_entity_basic():
5 | """Test grouping with simple variable-entity mapping."""
6 | input_data = {
7 | "var1": ["ent1", "ent2"],
8 | "var2": ["ent2", "ent3"],
9 | "var3": ["ent1"],
10 | }
11 | expected_output = {
12 | "ent1": ["var1", "var3"],
13 | "ent2": ["var1", "var2"],
14 | "ent3": ["var2"],
15 | }
16 |
17 | result = group_variables_by_entity(input_data)
18 | assert result == expected_output
19 |
20 |
21 | def test_group_variables_by_entity_duplicate_entities():
22 | """Test grouping when a variable has duplicate entities."""
23 | input_data = {
24 | "var1": ["ent1", "ent1", "ent2"],
25 | }
26 | result = group_variables_by_entity(input_data)
27 | assert result["ent1"].count("var1") == 2 # duplicates are preserved
28 | assert "ent2" in result
29 | assert result["ent2"] == ["var1"]
30 |
31 |
32 | def test_group_variables_by_entity_preserves_order():
33 | """Test if the order of variables is preserved in the resulting entity lists."""
34 | input_data = {
35 | "var1": ["ent1"],
36 | "var2": ["ent1"],
37 | "var3": ["ent1"],
38 | }
39 | result = group_variables_by_entity(input_data)
40 | assert result["ent1"] == ["var1", "var2", "var3"]
41 |
--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | # Python API Notebooks
2 |
3 | This directory contains Colab notebooks that use the V1 Python API. For current notebooks, see the `v2` directory.
4 |
5 | Notebook | Description
6 | -------- | -----------
7 |
8 | [`Place Similarity with Data Commons.ipynb`](https://colab.research.google.com/drive/1t7dFDSpCT16QDkNuD933QgLUL9BOdCAS) | A notebook that identifies similar places given a place and one or more statistical variables from Data Commons.
9 | [`Missing Data Imputation Tutorial.ipynb`](https://colab.research.google.com/drive/1S_rMCyRsgygd8sV-r8aLRPcKwZPFcEGb) | A notebook that analyzes the different types of time series holes and different methods of imputing those holes.
10 | [`analyzing_genomic_data.ipynb`](https://colab.research.google.com/drive/1Io7EDr4LjfPLl_l2JYY8__WbfitfNlOf) | A notebook that analyzes genetic variants within RUNX1 (provided by multiple datasets from UCSC Genome Browser, NCBI/gene, and ClinVar).
11 | [`Drug_Discovery_With_Data_Commons.ipynb`](https://colab.research.google.com/drive/1dSKYiRMn3mbDsInorQzYM0yk7sqv6fIV) | A notebook performing drug discovery by identifying novel applications of previously approved drugs using Biomedical Data Commons.
12 | [`protein-charts.ipynb`](https://colab.research.google.com/drive/1Kh-ufqobdChZ2qQgEY0rdPA2_DBmOiSG) | A notebook summarizing various protein properties and interactions using graphical visualizations.
13 |
14 |
--------------------------------------------------------------------------------
/datacommons_pandas/README.md:
--------------------------------------------------------------------------------
1 | # Data Commons Pandas API
2 |
3 | This is a Python library for creating pandas objects with data in the
4 | Data Commons Graph.
5 |
6 | To get started, install this package from pip.
7 |
8 | ```bash
9 | pip install datacommons_pandas
10 | ```
11 |
12 | Once the package is installed, import `datacommons_pandas`.
13 |
14 | ```python
15 | import datacommons_pandas as dcpd
16 | ```
17 |
18 | For more detail on getting started with the API, please visit our
19 | [API Overview](https://docs.datacommons.org/api/pandas/).
20 |
21 | When you are ready to use the API, you can refer to `examples` for
22 | examples on how to use this package to perform various tasks. More tutorials and
23 | documentation can be found on our [tutorials page](https://docs.datacommons.org/tutorials/)!
24 |
25 | ## About Data Commons
26 |
27 | [Data Commons](https://datacommons.org/) is an open knowledge repository that
28 | provides a unified view across multiple public data sets and statistics. You can
29 | view what [datasets](https://datacommons.org/datasets) are currently ingested
30 | and browse the graph using our [browser](https://datacommons.org/browser).
31 |
32 | ## License
33 |
34 | Apache 2.0
35 |
36 | ## Support
37 |
38 | For general questions or issues about the API, please open an issue on our
39 | [issues](https://github.com/datacommonsorg/api-python/issues) page. For all other
40 | questions, please send an email to `support@datacommons.org`.
41 |
--------------------------------------------------------------------------------
/datacommons/README.md:
--------------------------------------------------------------------------------
1 | # Data Commons Python API
2 |
3 | This is a Python library for accessing data in the Data Commons Graph.
4 |
5 | > See also: [Data Commons Pandas API](../datacommons_pandas/README.md).
6 |
7 | To get started, install this package from pip.
8 |
9 | ```bash
10 | pip install datacommons
11 | ```
12 |
13 | Once the package is installed, import `datacommons`.
14 |
15 | ```python
16 | import datacommons as dc
17 | ```
18 |
19 | For more detail on getting started with the API, please visit our
20 | [API Overview](https://docs.datacommons.org/api/).
21 |
22 | When you are ready to use the API, you can refer to `examples` for
23 | examples on how to use this package to perform various tasks. More tutorials and
24 | documentation can be found on our [tutorials page](https://docs.datacommons.org/tutorials/)!
25 |
26 | ## About Data Commons
27 |
28 | [Data Commons](https://datacommons.org/) is an open knowledge repository that
29 | provides a unified view across multiple public data sets and statistics. You can
30 | view what [datasets](https://datacommons.org/datasets) are currently ingested
31 | and browse the graph using our [browser](https://datacommons.org/browser).
32 |
33 | ## License
34 |
35 | Apache 2.0
36 |
37 | ## Support
38 |
39 | For general questions or issues about the API, please open an issue on our
40 | [issues](https://github.com/google/datacommons/issues) page. For all other
41 | questions, please send an email to `support@datacommons.org`.
42 |
--------------------------------------------------------------------------------
/datacommons/examples/query.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API examples.
15 |
16 | Example on how to use the Client API SPARQL query wrapper.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 |
23 | import datacommons as dc
24 |
25 |
26 | def main():
27 | # Create a SPARQL query querying for the name of some states
28 | query = ('''
29 | SELECT ?name ?dcid
30 | WHERE {
31 | ?a typeOf Place .
32 | ?a name ?name .
33 | ?a dcid ("geoId/06" "geoId/21" "geoId/24") .
34 | ?a dcid ?dcid
35 | }
36 | ''')
37 | print('> Issuing query.\n{}'.format(query))
38 |
39 | # Iterate through all the rows in the results.
40 | print('> Printing results.\n')
41 | for row in dc.query(query_string=query):
42 | print(' {}'.format(row))
43 |
44 |
45 | if __name__ == '__main__':
46 | main()
47 |
--------------------------------------------------------------------------------
/datacommons_client/models/resolve.py:
--------------------------------------------------------------------------------
1 | from typing import List, Optional
2 |
3 | from pydantic import Field
4 |
5 | from datacommons_client.models.base import BaseDCModel
6 | from datacommons_client.models.base import DictLikeRootModel
7 | from datacommons_client.models.base import DominantType
8 | from datacommons_client.models.base import NodeDCID
9 | from datacommons_client.models.base import Query
10 |
11 |
12 | class Candidate(BaseDCModel):
13 | """Represents a candidate in the resolution response.
14 |
15 | Attributes:
16 | dcid (DCID): The Data Commons ID for the candidate.
17 | dominantType (Optional[DominantType]): The dominant type of the candidate,
18 | if available. This represents the primary type associated with the DCID.
19 | """
20 |
21 | dcid: NodeDCID = Field(default_factory=str)
22 | dominantType: Optional[DominantType] = None
23 |
24 |
25 | class Entity(BaseDCModel):
26 | """Represents an entity with its resolution candidates.
27 |
28 | Attributes:
29 | node (Query): The query string or node being resolved.
30 | candidates (List[Candidate]): A list of candidates that match the query.
31 | """
32 |
33 | node: Query
34 | candidates: list[Candidate] = Field(default_factory=list)
35 |
36 |
37 | class FlatCandidateMapping(BaseDCModel,
38 | DictLikeRootModel[dict[Query,
39 | list[NodeDCID] | NodeDCID]]):
40 | """A model to represent a mapping of queries to candidates."""
41 |
--------------------------------------------------------------------------------
/datacommons/requests.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Send http requests to Data Commons REST API endpoints.
15 | """
16 |
17 | from typing import Dict
18 |
19 | import requests
20 |
21 | import datacommons.key as key
22 |
23 | # REST API endpoint root
24 | _API_ROOT = "https://api.datacommons.org"
25 |
26 |
27 | def _post(path: str, data={}) -> Dict:
28 | url = _API_ROOT + path
29 | headers = {'Content-Type': 'application/json'}
30 | api_key = key.get_api_key()
31 | if api_key:
32 | headers['x-api-key'] = api_key
33 | try:
34 | resp = requests.post(url, json=data, headers=headers)
35 | if resp.status_code != 200:
36 | raise Exception(
37 | f'{resp.status_code}: {resp.reason}\n{resp.json()["message"]}')
38 | return resp.json()
39 | except requests.exceptions.Timeout:
40 | raise Exception('Data request timed out, please try again.')
41 | except requests.exceptions.RequestException as e:
42 | raise e
43 |
--------------------------------------------------------------------------------
/datacommons_client/tests/test_decorators.py:
--------------------------------------------------------------------------------
1 | from unittest import mock
2 |
3 | import pytest
4 |
5 | from datacommons_client.utils.decorators import requires_pandas
6 |
7 | try:
8 | import pandas as pd
9 |
10 | PANDAS_AVAILABLE = True
11 | except ImportError:
12 | PANDAS_AVAILABLE = False
13 |
14 |
15 | @requires_pandas
16 | def function_requiring_pandas():
17 | return "Pandas is available"
18 |
19 |
20 | def test_requires_pandas_with_pandas():
21 | """Test that the function executes normally when Pandas is available."""
22 | if PANDAS_AVAILABLE:
23 | assert function_requiring_pandas() == "Pandas is available"
24 |
25 |
26 | def test_requires_pandas_without_pandas(monkeypatch):
27 | """Test that the decorator raises ImportError when Pandas is not available."""
28 | # Simulate Pandas being unavailable
29 | monkeypatch.setattr("datacommons_client.utils.decorators.pd", None)
30 | with pytest.raises(ImportError, match="Pandas is required for this method"):
31 | function_requiring_pandas()
32 |
33 |
34 | def test_importerror_handling(monkeypatch):
35 | """Test that the ImportError block is executed when Pandas is not installed."""
36 |
37 | # Simulate pandas not being available
38 | with mock.patch.dict("sys.modules", {"pandas": None}):
39 | import importlib
40 |
41 | # Reload the module so that a new check of Pandas is performed
42 | import datacommons_client.utils.decorators
43 | importlib.reload(datacommons_client.utils.decorators)
44 |
45 | # Ensure pd is set to None
46 | assert datacommons_client.utils.decorators.pd is None
47 |
--------------------------------------------------------------------------------
/datacommons/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # isort: skip_file
16 |
17 | ################################## IMPORTANT #################################
18 | # All user-facing functions in this package must be symlinked to the #
19 | # datacommons_pandas pkg. This is so that users do not need to import both #
20 | # libraries for pd support. Please keep the below imports in sync with the #
21 | # __init__.py in the datacommons_pandas/ dir, and add a symlink when #
22 | # creating a new file. #
23 | # TODO: https://github.com/datacommonsorg/api-python/issues/149 #
24 | ##############################################################################
25 |
26 | # Data Commons SPARQL query support
27 | from datacommons.sparql import query
28 |
29 | # Data Commons Python API
30 | from datacommons.core import get_property_labels, get_property_values, get_triples
31 | from datacommons.places import get_places_in, get_related_places, get_stats
32 | from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all
33 |
34 | from datacommons.key import set_api_key
35 | from datacommons.node import properties, property_values, triples
36 |
--------------------------------------------------------------------------------
/datacommons_pandas/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # isort: skip_file
16 |
17 | from datacommons_pandas.df_builder import build_time_series, build_time_series_dataframe, build_multivariate_dataframe
18 |
19 | ################################ SYMLINK FILES ################################
20 | # We include symlinks to all user-facing functions from the datacommons pkg. #
21 | # This is so that users do not need to import both libraries for pd support. #
22 | # Please keep the below in sync with the __init__.py in the datacommons/ dir #
23 | # TODO: enforce this. https://github.com/datacommonsorg/api-python/issues/149 #
24 | ##############################################@################################
25 | # Data Commons SPARQL query support
26 | from datacommons_pandas.sparql import query
27 |
28 | # Data Commons Python API
29 | from datacommons_pandas.core import get_property_labels, get_property_values, get_triples
30 | from datacommons_pandas.places import get_places_in, get_related_places, get_stats
31 | from datacommons_pandas.stat_vars import get_stat_value, get_stat_series, get_stat_all
32 |
33 | from datacommons_pandas.key import set_api_key
34 | from datacommons_pandas.node import properties, property_values, triples
35 |
--------------------------------------------------------------------------------
/datacommons_client/tests/models/test_resolve_models.py:
--------------------------------------------------------------------------------
1 | from datacommons_client.models.resolve import Candidate
2 | from datacommons_client.models.resolve import Entity
3 |
4 |
5 | def test_candidate_model_validation():
6 | """Test that Candidate.model_validate parses full data correctly."""
7 | json_data = {"dcid": "dcid123", "dominantType": "Place"}
8 | candidate = Candidate.model_validate(json_data)
9 | assert candidate.dcid == "dcid123"
10 | assert candidate.dominantType == "Place"
11 |
12 |
13 | def test_candidate_model_validation_partial():
14 | """Test Candidate.model_validate with missing optional dominantType."""
15 | json_data = {"dcid": "dcid456"}
16 | candidate = Candidate.model_validate(json_data)
17 | assert candidate.dcid == "dcid456"
18 | assert candidate.dominantType is None
19 |
20 |
21 | def test_entity_model_validation():
22 | """Test that Entity.model_validate handles multiple candidates."""
23 | json_data = {
24 | "node":
25 | "test_query",
26 | "candidates": [
27 | {
28 | "dcid": "dcid123",
29 | "dominantType": "Place"
30 | },
31 | {
32 | "dcid": "dcid456",
33 | "dominantType": "Event"
34 | },
35 | ],
36 | }
37 | entity = Entity.model_validate(json_data)
38 | assert entity.node == "test_query"
39 | assert len(entity.candidates) == 2
40 | assert entity.candidates[0].dcid == "dcid123"
41 | assert entity.candidates[0].dominantType == "Place"
42 | assert entity.candidates[1].dcid == "dcid456"
43 | assert entity.candidates[1].dominantType == "Event"
44 |
45 |
46 | def test_entity_model_validation_empty_candidates():
47 | """Test Entity.model_validate with no candidates."""
48 | json_data = {"node": "test_query", "candidates": []}
49 | entity = Entity.model_validate(json_data)
50 | assert entity.node == "test_query"
51 | assert len(entity.candidates) == 0
52 |
--------------------------------------------------------------------------------
/datacommons/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Build and distribute the datacommons package to PyPI."""
15 | import os
16 |
17 | from setuptools import setup
18 |
19 | dir_path = os.path.dirname(os.path.realpath(__file__))
20 | with open(os.path.join(dir_path, 'README.md'), 'r') as fh:
21 | long_description = fh.read()
22 |
23 | # Package metadata.
24 | NAME = 'datacommons'
25 | DESCRIPTION = 'A library to access Data Commons Python API.'
26 | URL = 'https://github.com/datacommonsorg/api-python'
27 | EMAIL = 'support@datacommons.org'
28 | AUTHOR = 'datacommons.org'
29 | REQUIRES_PYTHON = '>=3.7'
30 | VERSION = '1.4.3'
31 | REQUIRED = ['six', 'requests']
32 | PACKAGES = ['datacommons']
33 |
34 | setup(
35 | name=NAME,
36 | version=VERSION,
37 | description=DESCRIPTION,
38 | long_description=long_description,
39 | long_description_content_type='text/markdown',
40 | author=AUTHOR,
41 | author_email=EMAIL,
42 | maintainer=AUTHOR,
43 | maintainer_email=EMAIL,
44 | python_requires=REQUIRES_PYTHON,
45 | url=URL,
46 | packages=PACKAGES,
47 | install_requires=REQUIRED,
48 | include_package_data=True,
49 | license='Apache 2.0',
50 | classifiers=[
51 | 'Intended Audience :: Developers',
52 | 'License :: OSI Approved :: Apache Software License',
53 | 'Programming Language :: Python',
54 | 'Programming Language :: Python :: 3.7',
55 | 'Programming Language :: Python :: Implementation :: CPython',
56 | 'Topic :: Software Development',
57 | ],
58 | )
59 |
--------------------------------------------------------------------------------
/datacommons_pandas/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Build and distribute the datacommons_pandas package to PyPI."""
15 | import os
16 |
17 | from setuptools import setup
18 |
19 | dir_path = os.path.dirname(os.path.realpath(__file__))
20 | with open(os.path.join(dir_path, 'README.md'), 'r') as fh:
21 | long_description = fh.read()
22 |
23 | # Package metadata.
24 | NAME = 'datacommons_pandas'
25 | DESCRIPTION = 'A library to create pandas objects using the Data Commons Python API.'
26 | URL = 'https://github.com/datacommonsorg/api-python'
27 | EMAIL = 'support@datacommons.org'
28 | AUTHOR = 'datacommons.org'
29 | REQUIRES_PYTHON = '>=3.7'
30 | VERSION = '0.0.3'
31 | REQUIRED = ['pandas', 'six', 'requests']
32 | PACKAGES = ['datacommons_pandas']
33 |
34 | setup(
35 | name=NAME,
36 | version=VERSION,
37 | description=DESCRIPTION,
38 | long_description=long_description,
39 | long_description_content_type='text/markdown',
40 | author=AUTHOR,
41 | author_email=EMAIL,
42 | maintainer=AUTHOR,
43 | maintainer_email=EMAIL,
44 | python_requires=REQUIRES_PYTHON,
45 | url=URL,
46 | packages=PACKAGES,
47 | install_requires=REQUIRED,
48 | include_package_data=True,
49 | license='Apache 2.0',
50 | classifiers=[
51 | 'Intended Audience :: Developers',
52 | 'License :: OSI Approved :: Apache Software License',
53 | 'Programming Language :: Python',
54 | 'Programming Language :: Python :: 3.7',
55 | 'Programming Language :: Python :: Implementation :: CPython',
56 | 'Topic :: Software Development',
57 | ],
58 | )
59 |
--------------------------------------------------------------------------------
/datacommons_pandas/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## 0.0.3
4 |
5 | **Date** - 11/10/2020
6 |
7 | **Release Tag** - [pd.0.0.3](https://github.com/datacommonsorg/api-python/releases/tag/pd0.0.3)
8 |
9 | **Release Status** - Current head of branch [`master`](https://github.com/datacommonsorg/api-python/tree/master)
10 |
11 | Update to use datacommons Python API 1.4.3, which returns empty data structures instead of erroring when no data is available.
12 |
13 | ## 0.0.2
14 |
15 | **Date** - 09/16/2020
16 |
17 | **Release Tag** - [pd.0.0.2](https://github.com/datacommonsorg/api-python/releases/tag/pd0.0.2)
18 |
19 | **Release Status** - Current head of branch [`master`](https://github.com/datacommonsorg/api-python/tree/master)
20 |
21 | Update to use datacommons Python API 1.4.2, which adds batching to the get_stat_all function used by build_time_series_dataframe and build_multivariate_dataframe.
22 |
23 | ## 0.0.1
24 |
25 | **Date** - 08/25/2020
26 |
27 | **Release Tag** - [pd.0.0.1](https://github.com/datacommonsorg/api-python/releases/tag/pd0.0.1)
28 |
29 | **Release Status** - Current head of branch [`master`](https://github.com/datacommonsorg/api-python/tree/master)
30 |
31 | Added pandas wrapper functions.
32 |
33 | - `build_time_series` constructs a pd.Series for a given StatisticalVariable and Place, where the time series are indexed by date.
34 | - `build_time_series_dataframe` constructs a pd.DataFrame for a given StatisticalVariable and a set of Places. The DataFrame will have Places as the index and dates as the columns.
35 | - `build_multivariate_dataframe` constructs a pd.DataFrame for a set of StatisticalVariables and a set of Places. The DataFrame will have Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
36 |
37 | For multi-place functions, when a StatisticalVariable has multiple StatVarObservation options,
38 | Data Commons chooses a set of StatVarObservation options that covers the most places. This
39 | ensures that the data fetched for a StatisticalVariable is comparable across places.
40 | When there is a tie, we select the StatVarObservation options set with the latest date
41 | data is available for any place.
42 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .dat
3 |
4 | ### Python ###
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | pip-wheel-metadata/
25 | share/python-wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 |
31 | # Unit test / coverage reports
32 | htmlcov/
33 | .tox/
34 | .nox/
35 | .coverage
36 | .coverage.*
37 | .cache
38 | nosetests.xml
39 | coverage.xml
40 | *.cover
41 | .hypothesis/
42 | .pytest_cache/
43 |
44 | # Translations
45 | *.mo
46 | *.pot
47 |
48 | # Django stuff:
49 | *.log
50 | local_settings.py
51 | db.sqlite3
52 | db.sqlite3-journal
53 |
54 | # Flask stuff:
55 | instance/
56 | .webassets-cache
57 |
58 | # Scrapy stuff:
59 | .scrapy
60 |
61 | # Sphinx documentation
62 | docs/_build/
63 |
64 | # PyBuilder
65 | target/
66 |
67 | # Jupyter Notebook
68 | .ipynb_checkpoints
69 |
70 | # IPython
71 | profile_default/
72 | ipython_config.py
73 |
74 | # pyenv
75 | .python-version
76 |
77 | # Environments
78 | .env
79 | .venv
80 | env/
81 | venv/
82 | ENV/
83 | env.bak/
84 | venv.bak/
85 |
86 | ### Ignore MAC OS System files ###
87 | # General
88 | .DS_Store
89 | .AppleDouble
90 | .LSOverride
91 | .profraw
92 |
93 | # Icon must end with two \r
94 | Icon
95 |
96 | # Thumbnails
97 | ._*
98 |
99 | # Files that might appear in the root of a volume
100 | .DocumentRevisions-V100
101 | .fseventsd
102 | .Spotlight-V100
103 | .TemporaryItems
104 | .Trashes
105 | .VolumeIcon.icns
106 | .com.apple.timemachine.donotpresent
107 |
108 | # Directories potentially created on remote AFP share
109 | .AppleDB
110 | .AppleDesktop
111 | Network Trash Folder
112 | Temporary Items
113 | .apdisk
114 |
115 | ### Ignore BAZEL BUILD System files ###
116 | /bazel-*
117 |
118 | ### R and RStudio ###
119 | .Rproj.user
120 | .Rhistory
121 | .RData
122 | .Ruserdata
123 | datacommons.RCheck
124 | *tar.gz
125 |
126 | ## VSCode
127 | .vscode/
128 |
129 | ## JetBrains
130 | .idea/
131 |
132 | # Gemini
133 | GEMINI.md
134 | .gemini/
135 |
136 | # Temp files
137 | tmp/
--------------------------------------------------------------------------------
/datacommons_client/utils/names.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from datacommons_client.models.node import Node
4 |
5 | DEFAULT_NAME_PROPERTY: str = "name"
6 | NAME_WITH_LANGUAGE_PROPERTY: str = "nameWithLanguage"
7 | DEFAULT_NAME_LANGUAGE: str = "en"
8 |
9 |
10 | def extract_name_from_english_name_property(properties: list | Node) -> str:
11 | """
12 | Extracts the name from a list of properties with English names.
13 | Args:
14 | properties (list): A list of properties with English names.
15 | Returns:
16 | str: The extracted name.
17 | """
18 | if not properties:
19 | return ''
20 |
21 | if isinstance(properties, Node):
22 | properties = [properties]
23 |
24 | return properties[0].value
25 |
26 |
27 | def extract_name_from_property_with_language(
28 | properties: list,
29 | language: str,
30 | fallback_language: Optional[str] = None) -> tuple[str | None, str | None]:
31 | """
32 | Extracts the name from a list of properties with language tags.
33 | Args:
34 | properties (list): A list of properties with language tags.
35 | language (str): The desired language code.
36 | fallback_language: If provided, this language will be used as a fallback if the requested
37 | language is not available. If not provided, no fallback will be used.
38 |
39 | Returns:
40 | tuple[str,str]: A tuple containing the extracted name and its language.
41 | """
42 | # If a non-English language is requested, unpack the response to get it.
43 | fallback_name = None
44 |
45 | # Iterate through the properties to find the name in the specified language
46 | for candidate in properties:
47 | # If no language is specified, skip the candidate
48 | if "@" not in candidate.value:
49 | continue
50 |
51 | # Split the candidate value into name and language
52 | name, lang = candidate.value.rsplit("@", 1)
53 |
54 | # If the language matches, add the name to the dictionary.
55 | if lang == language:
56 | return name, lang
57 | # If language is 'en', store the name as a fallback
58 | if fallback_language and (lang == fallback_language):
59 | fallback_name = name
60 |
61 | # If no name was found in the specified language, use the fallback name (if available)
62 | return fallback_name, fallback_language if fallback_language else None
63 |
--------------------------------------------------------------------------------
/datacommons/examples/core.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API examples.
15 |
16 | Basic demo for get_property_labels, get_property_values, and get_triples.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 |
23 | import datacommons as dc
24 |
25 |
26 | def main():
27 | # Set the dcid to be that of Santa Clara County.
28 | dcids = ['geoId/06085', 'dc/p/zsb968m3v1f97']
29 |
30 | # Print all incoming and outgoing properties from Santa Clara County.
31 | print('Property Labels for Santa Clara County')
32 | in_labels = dc.get_property_labels(dcids)
33 | out_labels = dc.get_property_labels(dcids, out=False)
34 | print('> Printing properties for {}'.format(dcids))
35 | print('> Incoming properties: {}'.format(in_labels))
36 | print('> Outgoing properties: {}'.format(out_labels))
37 |
38 | # Print all property values for "containedInPlace" for Santa Clara County.
39 | print('Property Values for "containedInPlace" of Santa Clara County')
40 | prop_vals = dc.get_property_values(dcids,
41 | 'containedInPlace',
42 | out=False,
43 | value_type='City')
44 | print('> Cities contained in {}'.format(dcids))
45 | for dcid in dcids:
46 | for city_dcid in prop_vals[dcid]:
47 | print(' - {}'.format(city_dcid))
48 |
49 | # Print the first 10 triples associated with Santa Clara County
50 | print('Triples for Santa Clara County')
51 | triples = dc.get_triples(dcids)
52 | for dcid in dcids:
53 | print('> Triples for {}'.format(dcid))
54 | for s, p, o in triples[dcid][:5]:
55 | print(' - ("{}", {}, "{}")'.format(s, p, o))
56 |
57 |
58 | if __name__ == '__main__':
59 | main()
60 |
--------------------------------------------------------------------------------
/datacommons_client/tests/test_names.py:
--------------------------------------------------------------------------------
1 | from datacommons_client.models.node import Node
2 | from datacommons_client.utils.names import extract_name_from_english_name_property
3 | from datacommons_client.utils.names import extract_name_from_property_with_language
4 |
5 |
6 | def test_extract_name_from_english_name_property_with_list():
7 | """Test extracting name from a list of Nodes."""
8 | properties = [Node(value="Test Name")]
9 | result = extract_name_from_english_name_property(properties)
10 | assert result == "Test Name"
11 |
12 |
13 | def test_extract_name_from_english_empty_list():
14 | """Test extracting name from an empty list."""
15 | result = extract_name_from_english_name_property([])
16 | assert result == ""
17 |
18 |
19 | def test_extract_name_from_english_not_list():
20 | """Test extracting name from a single Node (not in a list)."""
21 | property_node = Node(value="Single Node Name")
22 | result = extract_name_from_english_name_property(property_node)
23 | assert result == "Single Node Name"
24 |
25 |
26 | def test_extract_name_from_property_with_language_match():
27 | """Test extracting name when desired language is present."""
28 | properties = [
29 | Node(value="Nombre@es"),
30 | Node(value="Name@en"),
31 | ]
32 | result = extract_name_from_property_with_language(properties,
33 | language="es",
34 | fallback_language="en")
35 | assert result[0] == "Nombre"
36 | assert result[1] == "es"
37 |
38 |
39 | def test_extract_name_from_property_with_language_fallback():
40 | """Test fallback to English when desired language is not found."""
41 | properties = [
42 | Node(value="Name@en"),
43 | Node(value="Nom@fr"),
44 | Node(value="Nome@it"),
45 | ]
46 | result = extract_name_from_property_with_language(properties,
47 | language="de",
48 | fallback_language="it")
49 | assert result[0] == "Nome"
50 | assert result[1] == "it"
51 |
52 |
53 | def test_extract_name_from_property_with_language_no_fallback():
54 | """Test no result when language is not found and fallback is disabled."""
55 | properties = [
56 | Node(value="Name@en"),
57 | Node(value="Nom@fr"),
58 | ]
59 | result = extract_name_from_property_with_language(properties, language="de")
60 | assert result[0] is None
61 | assert result[1] is None
62 |
63 |
64 | def test_extract_name_from_property_without_language_tags():
65 | """Test that properties without language tags are skipped."""
66 | properties = [
67 | Node(value="Plain str"),
68 | Node(value="Name@en"),
69 | ]
70 | result = extract_name_from_property_with_language(properties, language="en")
71 | assert result[0] == "Name"
72 | assert result[1] == "en"
73 |
--------------------------------------------------------------------------------
/docs/release.md:
--------------------------------------------------------------------------------
1 | # Python API Release
2 |
3 | ## Releasing the `datacommons_client` package
4 | Support for V2 of the Data Commons API is being released as a new client library
5 | called `datacommons_client`.
6 |
7 | To release:
8 | 1. Update [CHANGELOG.md](../CHANGELOG.md) with relevant changes.
9 | 2. Bump the version by running `hatch version` followed by `patch`, `minor`, `major`, a
10 | specific version number, or `--pre beta` for a beta version, for example.
11 | 3. Build the package
12 | ```bash
13 | hatch build
14 | ```
15 | 4. (optionally) Test the deployment process locally
16 | ```bash
17 | hatch run release:localtest
18 | ```
19 | 5. Test the deployment process on Test PyPi
20 | ```bash
21 | hatch run release:testpypi
22 | ```
23 |
24 | 6. Once verified, upload to PyPI:
25 | ```bash
26 | hatch run release:pypi
27 | ```
28 |
29 | 7. Create a version tag on Git:
30 | ```bash
31 | hatch run release:tag
32 | ```
33 |
34 | ---
35 |
36 | ## Releasing the legacy packages
37 |
38 |
39 | Note: Always release `datacommons_pandas` when `datacommons` is released.
40 |
41 | **If this is your first time releasing to PyPI**, please review the PyPI guide
42 | starting from the
43 | [setup
44 | section](https://packaging.python.org/tutorials/packaging-projects/#creating-setup-py).
45 |
46 | ## Prepare release tools
47 |
48 | ```bash
49 | python3 -m venv .env
50 | source .env/bin/activate
51 | python3 -m pip install --upgrade setuptools wheel
52 | python3 -m pip install --upgrade twine
53 | ```
54 |
55 | ## Release to Test PyPI
56 |
57 | 1. In [datacommons/setup.py](../datacommons/setup.py) and [datacommons_pandas/setup.py](../datacommons_pandas/setup.py):
58 |
59 | - Append "-USERNAME" to the package "NAME". For example,
60 | `NAME = 'foo_package-janedoe123'`.
61 | - Increment the "VERSION" codes to something that has not been used in your
62 | test project. This will not affect the production PyPI versioning.
63 |
64 | 1. In the repo root directly, build the dists and release to TestPyPI:
65 |
66 | ```bash
67 | rm dist/*
68 | python3 datacommons/setup.py sdist bdist_wheel
69 | python3 datacommons_pandas/setup.py sdist bdist_wheel
70 | python3 -m twine upload --repository testpypi dist/*
71 | ```
72 |
73 | ## Release to Production PyPI
74 |
75 | 1. In [datacommons/setup.py](../datacommons/setup.py) and
76 | [datacommons_pandas/setup.py](../datacommons_pandas/setup.py):
77 |
78 | - Revert the package name to `datacommons` and `datacommons_pandas`
79 | - Update and double check "VERSION"
80 |
81 | 1. Update [datacommons/CHANGELOG.md](../datacommons/CHANGELOG.md) and [datacommons_pandas/CHANGELOG.md](../datacommons_pandas/CHANGELOG.md)
82 |
83 | 1. Build the dists and release to PyPI:
84 |
85 | ```bash
86 | rm dist/*
87 | python3 datacommons/setup.py sdist bdist_wheel
88 | python3 datacommons_pandas/setup.py sdist bdist_wheel
89 | python3 -m twine upload dist/*
90 | ```
91 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "datacommons-client"
3 | dynamic = ["version"]
4 | description = "A library to access Data Commons Python API."
5 | readme = "datacommons_client/README.md"
6 | authors = [
7 | { name = "datacommons.org", email = "support@datacommons.org" },
8 | { name = "one.org", email= "data@one.org"}
9 | ]
10 | maintainers = [
11 | { name = "datacommons.org", email = "support@datacommons.org" }
12 | ]
13 | license = { file = "LICENSE" }
14 | dependencies = [
15 | "requests>=2.32",
16 | "typing_extensions",
17 | "pydantic>=2.11"
18 | ]
19 | requires-python = ">=3.10"
20 | keywords = ["data commons", "api", "data", "development"]
21 | classifiers = [
22 | "Intended Audience :: Developers",
23 | "License :: OSI Approved :: Apache Software License",
24 | "Programming Language :: Python",
25 | "Programming Language :: Python :: 3.10",
26 | "Programming Language :: Python :: 3.11",
27 | "Programming Language :: Python :: 3.12",
28 | "Programming Language :: Python :: 3.13",
29 | "Programming Language :: Python :: Implementation :: CPython",
30 | "Topic :: Software Development"
31 | ]
32 | urls = { "Homepage" = "https://github.com/datacommonsorg/api-python" }
33 |
34 | [project.optional-dependencies]
35 | pandas = ["pandas"]
36 | dev = [
37 | "pytest",
38 | "isort",
39 | "yapf",
40 | "mock",
41 | "hatch"
42 | ]
43 |
44 | [tool.hatch.version]
45 | path = "datacommons_client/__init__.py"
46 |
47 |
48 | [tool.hatch.build.targets.sdist]
49 | include = [
50 | "datacommons_client",
51 | "README.md",
52 | "LICENSE",
53 | "CHANGELOG.md"
54 | ]
55 |
56 | [tool.hatch.build.targets.wheel]
57 | include = [
58 | "datacommons_client"
59 | ]
60 |
61 | [tool.hatch.envs.default]
62 | dependencies = [
63 | "pytest",
64 | "isort",
65 | "yapf",
66 | "hatch",
67 | ]
68 |
69 | [tool.hatch.envs.test]
70 | dependencies = [
71 | "pytest",
72 | "mock",
73 | "pandas",
74 | "isort",
75 | "yapf"
76 | ]
77 |
78 |
79 | [tool.hatch.envs.test.scripts]
80 | setup = "./run_test.sh -s"
81 | all = "./run_test.sh -a"
82 | python = "./run_test.sh -p"
83 | lint = "./run_test.sh -l"
84 |
85 | [tool.hatch.envs.lint]
86 | dependencies = [
87 | "isort",
88 | "yapf"
89 | ]
90 |
91 | [tool.hatch.envs.lint.scripts]
92 | check = "./run_test.sh -l"
93 | format = "./run_test.sh -f"
94 |
95 | [tool.hatch.envs.release]
96 | dependencies = [
97 | "twine"
98 | ]
99 |
100 | [tool.hatch.envs.release.scripts]
101 | localtest = "hatch build && twine check dist/*"
102 | testpypi = "hatch build && twine upload --repository testpypi dist/*"
103 | pypi = "hatch build && twine upload dist/*"
104 | tag = "git commit -am 'Bump version to {version}' && git tag v{version}"
105 |
106 |
107 | [build-system]
108 | requires = ["hatchling"]
109 | build-backend = "hatchling.build"
110 |
--------------------------------------------------------------------------------
/datacommons_client/tests/endpoints/test_error_handling.py:
--------------------------------------------------------------------------------
1 | from requests import Request
2 | from requests import Response
3 |
4 | from datacommons_client.utils.error_handling import APIError
5 | from datacommons_client.utils.error_handling import DataCommonsError
6 | from datacommons_client.utils.error_handling import DCAuthenticationError
7 | from datacommons_client.utils.error_handling import DCConnectionError
8 | from datacommons_client.utils.error_handling import DCStatusError
9 | from datacommons_client.utils.error_handling import InvalidDCInstanceError
10 | from datacommons_client.utils.error_handling import NoDataForPropertyError
11 |
12 |
13 | def test_data_commons_error_default_message():
14 | """Tests that DataCommonsError uses the default message."""
15 | error = DataCommonsError()
16 | assert str(error) == DataCommonsError.default_message
17 |
18 |
19 | def test_data_commons_error_custom_message():
20 | """Tests that DataCommonsError uses a custom message when provided."""
21 | error = DataCommonsError("Custom message")
22 | assert str(error) == "Custom message"
23 |
24 |
25 | def test_api_error_without_response():
26 | """Tests APIError initialization without a Response object."""
27 | error = APIError()
28 | assert str(error) == f"\n{APIError.default_message}"
29 |
30 |
31 | def test_api_error_with_response():
32 | """Tests APIError initialization with a mocked Response object.
33 |
34 | Verifies that the string representation includes status code,
35 | request URL, and response text.
36 | """
37 | mock_request = Request("GET", "http://example.com").prepare()
38 | mock_response = Response()
39 | mock_response.request = mock_request
40 | mock_response.status_code = 404
41 | mock_response._content = b"Not Found"
42 |
43 | error = APIError(response=mock_response)
44 | assert "Status Code: 404" in str(error)
45 | assert "Request URL: http://example.com" in str(error)
46 | assert "Not Found" in str(error)
47 |
48 |
49 | def test_subclass_default_messages():
50 | """Tests that subclasses use their default messages."""
51 | connection_error = DCConnectionError()
52 | assert DCConnectionError.default_message in str(connection_error)
53 |
54 | status_error = DCStatusError()
55 | assert DCStatusError.default_message in str(status_error)
56 |
57 | auth_error = DCAuthenticationError()
58 | assert DCAuthenticationError.default_message in str(auth_error)
59 |
60 | instance_error = InvalidDCInstanceError()
61 | assert InvalidDCInstanceError.default_message in str(instance_error)
62 |
63 | filter_error = NoDataForPropertyError()
64 | assert NoDataForPropertyError.default_message in str(filter_error)
65 |
66 |
67 | def test_subclass_custom_message():
68 | """Tests that subclasses use custom messages when provided."""
69 | error = DCAuthenticationError(response=Response(),
70 | message="Custom auth error")
71 | assert str(error) == "\nCustom auth error"
72 |
--------------------------------------------------------------------------------
/datacommons/examples/places.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API examples.
15 |
16 | Basic demo for get_places_in
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 |
23 | import datacommons as dc
24 |
25 |
26 | def main():
27 | # Create a list of dcids for Santa Clara and Montgomery County.
28 | sc, mc = 'geoId/06085', 'geoId/24031'
29 | dcids = [sc, mc]
30 |
31 | # Get all CensusTracts in these two counties.
32 | print('Get Census Tracts')
33 | tracts = dc.get_places_in(dcids, 'CensusTract')
34 | if sc in tracts:
35 | print('> 10 CensusTracts in Santa Clara County')
36 | for dcid in tracts[sc][:10]:
37 | print(' - {}'.format(dcid))
38 | if mc in tracts:
39 | print('> 10 CensusTracts in Montgomery County')
40 | for dcid in tracts[mc][:10]:
41 | print(' - {}'.format(dcid))
42 |
43 | # Get place stats.
44 | print('Get place stats -- all')
45 | stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
46 | 'dc/0hyp6tkn18vcb',
47 | obs_dates='all')
48 | print(stats)
49 |
50 | print('Get place stats -- latest')
51 | stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
52 | 'dc/0hyp6tkn18vcb')
53 | print(stats)
54 |
55 | print('Get place stats -- 2014')
56 | stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
57 | 'dc/0hyp6tkn18vcb',
58 | obs_dates=['2014'])
59 | print(stats)
60 |
61 | print('Get place stats -- 2014 badly formatted')
62 | stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
63 | 'dc/0hyp6tkn18vcb',
64 | obs_dates='2014')
65 | print(stats)
66 |
67 | print('Get place stats -- 2015-2016')
68 | stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
69 | 'dc/0hyp6tkn18vcb',
70 | obs_dates=['2015', '2016'])
71 | print(stats)
72 |
73 | # Get related places.
74 |
75 |
76 | # TODO(*): Fix the related places example.
77 | # print('Get related places')
78 | # related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count',
79 | # 'CensusACS5yrSurvey', "measuredValue", {"gender": "Female"})
80 | # print(related_places)
81 |
82 | if __name__ == '__main__':
83 | main()
84 |
--------------------------------------------------------------------------------
/datacommons_client/utils/error_handling.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from requests import Response
4 |
5 |
6 | class DataCommonsError(Exception):
7 | """Base exception for all Data Commons-related errors."""
8 |
9 | default_message = "An error occurred getting data from Data Commons API."
10 |
11 | def __init__(self, message: Optional[str] = None):
12 | """Initializes a DataCommonsError with a default or custom message."""
13 | super().__init__(message or self.default_message)
14 |
15 |
16 | class APIError(DataCommonsError):
17 | """Represents an error interacting with Data Commons API."""
18 |
19 | default_message = "An API error occurred."
20 |
21 | def __init__(
22 | self,
23 | response: Optional[Response] = None,
24 | message: Optional[str] = None,
25 | ):
26 | """Initializes an APIError.
27 |
28 | Args:
29 | response (Optional[Response]): The response, if available.
30 | message (Optional[str]): A descriptive error message.
31 | """
32 | super().__init__(message or self.default_message)
33 | self.response = response
34 | self.request = getattr(response, "request", None)
35 | self.status_code = getattr(response, "status_code", None)
36 |
37 | def __str__(self) -> str:
38 | """Returns a detailed string representation of the error.
39 |
40 | Returns:
41 | str: A string describing the error, including the request URL if available.
42 | """
43 |
44 | details = f"\n{self.args[0]}"
45 | if self.status_code:
46 | details += f"\nStatus Code: {self.status_code}"
47 | if getattr(self.request, "url", None):
48 | details += f"\nRequest URL: {self.request.url}"
49 | if getattr(self.response, "text", None):
50 | details += f"\nResponse: {self.response.text}"
51 |
52 | return details
53 |
54 |
55 | class DCConnectionError(APIError):
56 | """Raised for network-related errors in the Data Commons API."""
57 |
58 | default_message = (
59 | "A network error occurred while connecting to the Data Commons API.")
60 |
61 |
62 | class DCStatusError(APIError):
63 | """Raised for non-2xx HTTP status code errors in the Data Commons API."""
64 |
65 | default_message = "The Data Commons API returned a non-2xx status code."
66 |
67 |
68 | class DCAuthenticationError(APIError):
69 | """Raised for 401 Unauthorized errors in the Data Commons API."""
70 |
71 | default_message = "Authentication failed. Please check your API key."
72 |
73 |
74 | class InvalidDCInstanceError(DataCommonsError):
75 | """Raised when an invalid Data Commons instance is provided."""
76 |
77 | default_message = "The specified Data Commons instance is invalid."
78 |
79 |
80 | class InvalidObservationSelectError(DataCommonsError):
81 | """Raised when an invalid ObservationSelect field is provided."""
82 |
83 | default_message = "The ObservationSelect field is invalid."
84 |
85 |
86 | class NoDataForPropertyError(DataCommonsError):
87 | """Raised when there is no data that meets the specified property filters."""
88 |
89 | default_message = "No available data for the specified property filters."
90 |
--------------------------------------------------------------------------------
/run_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2020 Google LLC
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | set -e # Immediately exit with failure if any command fails.
17 |
18 | YAPF_STYLE='{based_on_style: google, indent_width: 2}'
19 | FORMAT_INCLUDE_PATHS="datacommons/ datacommons_client/ datacommons_pandas/"
20 | FORMAT_EXCLUDE_PATH="**/.env/**"
21 |
22 | function setup_python {
23 | python3 -m pip install --upgrade pip hatch
24 | # here temporarily while there is an incompatibility with hatch and the newest click version
25 | # see https://github.com/pypa/hatch/pull/2051 for status updates from Hatch
26 | python3 -m pip uninstall uninstall click -y
27 | python3 -m pip install click==8.2.1
28 | hatch env create
29 | }
30 |
31 | function run_py_test {
32 | pytest -vv
33 | }
34 |
35 | function run_yapf {
36 | EXTRA_ARGS=$@
37 | yapf $EXTRA_ARGS --recursive --parallel --style="$YAPF_STYLE" \
38 | --exclude="$FORMAT_EXCLUDE_PATH" $FORMAT_INCLUDE_PATHS
39 | }
40 |
41 | function run_isort {
42 | EXTRA_ARGS=$@
43 | isort $EXTRA_ARGS --profile=google --skip-glob="$FORMAT_EXCLUDE_PATH" \
44 | $FORMAT_INCLUDE_PATHS
45 | }
46 |
47 | function run_lint_test {
48 | if ! run_yapf --diff; then
49 | echo "Fix lint errors by running: ./run_test.sh -f"
50 | exit 1
51 | fi
52 | if ! run_isort --check-only; then
53 | echo "Fix Python import sort orders by running ./run_test.sh -f"
54 | exit 1
55 | fi
56 | echo "Python style checks passed."
57 | }
58 |
59 | function run_lint_fix {
60 | run_yapf --in-place
61 | run_isort
62 | }
63 |
64 | function run_all_tests {
65 | run_py_test
66 | run_lint_test
67 | }
68 |
69 | function help {
70 | echo "Usage: $0 -asplf"
71 | echo "-a Run all tests"
72 | echo "-s Set up python environment"
73 | echo "-p Run python tests"
74 | echo "-l Run lint tests"
75 | echo "-f Fix lint"
76 | exit 1
77 | }
78 |
79 | while getopts asplf OPTION; do
80 | case $OPTION in
81 | a)
82 | echo -e "### Running all tests"
83 | run_all_tests
84 | ;;
85 | s)
86 | echo -e "### Setting up python environment"
87 | setup_python
88 | ;;
89 | p)
90 | echo -e "### Running python tests"
91 | run_py_test
92 | ;;
93 | l)
94 | echo -e "### Running lint tests"
95 | run_lint_test
96 | ;;
97 | f)
98 | echo -e "### Fix lint errors"
99 | run_lint_fix
100 | ;;
101 | *)
102 | help
103 | esac
104 | done
105 |
106 | if [ $OPTIND -eq 1 ]
107 | then
108 | help
109 | fi
110 |
--------------------------------------------------------------------------------
/datacommons/node.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ API to request node information.
15 | """
16 |
17 | from typing import Dict, List
18 |
19 | from datacommons.requests import _post
20 | from datacommons.utils import _get_arrow
21 | from datacommons.utils import _get_direction
22 |
23 |
24 | def properties(nodes: List[str], is_out: bool = True) -> Dict[str, List[str]]:
25 | """Retrieves all the properties for a list of nodes.
26 |
27 | Note this only returns the property labels, not the values.
28 | Args:
29 | nodes: List of DCIDs.
30 | is_out: Whether to return out going properties.
31 | Returns:
32 | A dict keyed by node DCID, with the values being a list of properties
33 | for the queried node.
34 | """
35 | resp = _post('/v2/node', {'nodes': nodes, 'property': _get_arrow(is_out)})
36 | result = {}
37 | for node, item in resp.get('data', {}).items():
38 | properties = item.get('properties', [])
39 | result[node] = properties
40 | return result
41 |
42 |
43 | def property_values(nodes: List[str],
44 | property: str,
45 | is_out: bool = True) -> Dict[str, List[str]]:
46 | """Retrieves the property values for a list of nodes.
47 | Args:
48 | nodes: List of DCIDs.
49 | property: The property label to query for.
50 | is_out: Whether the property is out going.
51 | Returns:
52 | A dict keyed by node DCID, with the values being a list of values
53 | for the queried property.
54 | """
55 | resp = _post(f'/v1/bulk/property/values/{_get_direction(is_out)}', {
56 | 'nodes': nodes,
57 | 'property': property,
58 | })
59 | result = {}
60 | for item in resp.get('data', []):
61 | node, values = item['node'], item.get('values', [])
62 | result[node] = []
63 | for v in values:
64 | if 'dcid' in v:
65 | result[node].append(v['dcid'])
66 | else:
67 | result[node].append(v['value'])
68 | return result
69 |
70 |
71 | def triples(nodes: List[str],
72 | is_out: bool = True) -> Dict[str, Dict[str, List[object]]]:
73 | """Retrieves the triples for a node.
74 | Args:
75 | nodes: List of DCIDs.
76 | is_out: Whether the returned property is out going for the queried
77 | nodes.
78 | Returns:
79 | A two level dict keyed by node DCID, then by the arc property, with
80 | a list of values or DCIDs.
81 | """
82 | resp = _post(f'/v1/bulk/triples/{_get_direction(is_out)}',
83 | data={'nodes': nodes})
84 | result = {}
85 | for item in resp.get('data', []):
86 | node, triples = item['node'], item.get('triples', {})
87 | result[node] = {}
88 | for property, other_nodes in triples.items():
89 | result[node][property] = other_nodes.get('nodes', [])
90 | return result
91 |
--------------------------------------------------------------------------------
/datacommons_client/tests/test_dataframes.py:
--------------------------------------------------------------------------------
1 | from unittest.mock import MagicMock
2 |
3 | import pandas as pd
4 |
5 | from datacommons_client.endpoints.node import NodeEndpoint
6 | from datacommons_client.models.node import StatVarConstraint
7 | from datacommons_client.models.node import StatVarConstraints
8 | from datacommons_client.utils.dataframes import add_property_constraints_to_observations_dataframe
9 |
10 |
11 | def test_add_property_constraints_to_observations_dataframe_adds_columns():
12 | """Adds constraint id and name columns based on statvar metadata."""
13 | # Input observations
14 | df = pd.DataFrame([
15 | {
16 | "date": "2020",
17 | "entity": "geo/1",
18 | "variable": "sv/A",
19 | "value": 10,
20 | "unit": "Count",
21 | },
22 | {
23 | "date": "2020",
24 | "entity": "geo/2",
25 | "variable": "sv/B",
26 | "value": 20,
27 | "unit": "Count",
28 | },
29 | ])
30 |
31 | endpoint = MagicMock(spec=NodeEndpoint)
32 |
33 | endpoint.fetch_statvar_constraints.return_value = StatVarConstraints.model_validate(
34 | {
35 | "sv/A": [
36 | StatVarConstraint(
37 | constraintId="DevelopmentFinanceScheme",
38 | constraintName="Development Finance Scheme",
39 | valueId="ODAGrants",
40 | valueName="Official Development Assistance Grants",
41 | ),
42 | StatVarConstraint(
43 | constraintId="DevelopmentFinanceRecipient",
44 | constraintName="Development Finance Recipient",
45 | valueId="country/GTM",
46 | valueName="Guatemala",
47 | ),
48 | ],
49 | "sv/B": [
50 | StatVarConstraint(
51 | constraintId="sex",
52 | constraintName="Sex",
53 | valueId="Female",
54 | valueName="Female",
55 | )
56 | ],
57 | })
58 |
59 | out = add_property_constraints_to_observations_dataframe(endpoint=endpoint,
60 | observations_df=df)
61 |
62 | # Columns for constraints should be present and filled per variable
63 | assert "DevelopmentFinanceScheme" in out.columns
64 | assert "DevelopmentFinanceScheme_name" in out.columns
65 | assert ("DevelopmentFinanceRecipient" in out.columns and
66 | "DevelopmentFinanceRecipient_name" in out.columns)
67 | assert "sex" in out.columns and "sex_name" in out.columns
68 |
69 | # Row-wise checks
70 | row_a = out[out["variable"] == "sv/A"].iloc[0]
71 | assert row_a["DevelopmentFinanceScheme"] == "ODAGrants"
72 | assert row_a[
73 | "DevelopmentFinanceScheme_name"] == "Official Development Assistance Grants"
74 | assert row_a["DevelopmentFinanceRecipient"] == "country/GTM"
75 | assert row_a["DevelopmentFinanceRecipient_name"] == "Guatemala"
76 |
77 | row_b = out[out["variable"] == "sv/B"].iloc[0]
78 | assert row_b["sex"] == "Female"
79 | assert row_b["sex_name"] == "Female"
80 |
81 |
82 | def test_add_property_constraints_to_observations_dataframe_empty():
83 | """Empty DataFrame returns unchanged."""
84 | endpoint = MagicMock(spec=NodeEndpoint)
85 | empty_df = pd.DataFrame([])
86 | out = add_property_constraints_to_observations_dataframe(
87 | endpoint=endpoint, observations_df=empty_df)
88 | assert out.empty
89 |
--------------------------------------------------------------------------------
/datacommons_client/utils/dataframes.py:
--------------------------------------------------------------------------------
1 | from datacommons_client.endpoints.node import NodeEndpoint
2 | from datacommons_client.utils.data_processing import flatten_names_dictionary
3 |
4 | try:
5 | import pandas as pd
6 | except ImportError:
7 | pd = None
8 |
9 | from datacommons_client.utils.decorators import requires_pandas
10 |
11 |
12 | @requires_pandas
13 | def add_entity_names_to_observations_dataframe(
14 | endpoint: NodeEndpoint,
15 | observations_df: "pd.DataFrame", # type: ignore[reportInvalidTypeForm]
16 | entity_columns: str | list[str],
17 | ) -> "pd.DataFrame": # type: ignore[reportInvalidTypeForm]
18 | """
19 | Adds entity names to the observations DataFrame.
20 |
21 | Args:
22 | endpoint (NodeEndpoint): The NodeEndpoint instance for fetching entity names.
23 | observations_df (dict): The DataFrame containing observations.
24 | entity_columns (str | list[str]): The column(s) containing entity DCIDs.
25 | """
26 |
27 | # Guard against empty DataFrame
28 | if observations_df.empty:
29 | return observations_df
30 |
31 | if not isinstance(entity_columns, list):
32 | entity_columns = [entity_columns]
33 |
34 | for entity_column in entity_columns:
35 | if entity_column not in observations_df.columns:
36 | raise ValueError(
37 | "The specified entity column does not exist in the DataFrame.")
38 |
39 | # Get unique entity DCIDs from the DataFrame
40 | unique_values = observations_df[entity_column].dropna().unique().tolist()
41 |
42 | # Guard against empty unique values
43 | if not unique_values:
44 | continue
45 |
46 | # Fetch entity names from the endpoint
47 | response = endpoint.fetch_entity_names(entity_dcids=unique_values)
48 |
49 | # Flatten the response to get a dictionary of names
50 | names = flatten_names_dictionary(response)
51 |
52 | # Insert the names into a column next to the entity column
53 | name_column = f"{entity_column}_name"
54 | if name_column not in observations_df.columns:
55 | observations_df.insert(
56 | loc=observations_df.columns.get_loc(entity_column) + 1,
57 | column=name_column,
58 | value=observations_df[entity_column].map(names),
59 | )
60 |
61 | return observations_df
62 |
63 |
64 | @requires_pandas
65 | def add_property_constraints_to_observations_dataframe(
66 | endpoint: NodeEndpoint,
67 | observations_df: "pd.DataFrame", # type: ignore[reportInvalidTypeForm]
68 | ) -> "pd.DataFrame": # type: ignore[reportInvalidTypeForm]
69 | """
70 | Adds property constraint dcids and names to the observations DataFrame.
71 |
72 | Args:
73 | endpoint (NodeEndpoint): The NodeEndpoint instance for fetching entity names.
74 | observations_df (dict): The DataFrame containing observations.
75 | """
76 |
77 | # Guard against empty DataFrame
78 | if observations_df.empty:
79 | return observations_df
80 |
81 | # Get constraints
82 | constraints_data = endpoint.fetch_statvar_constraints(
83 | variable_dcids=observations_df.variable.unique().tolist())
84 |
85 | for statvar, constraints in constraints_data.items():
86 | for constraint in constraints:
87 | # Fill the columns with the corresponding values
88 | observations_df.loc[observations_df.variable == statvar,
89 | constraint.constraintId] = constraint.valueId
90 |
91 | observations_df.loc[observations_df.variable == statvar,
92 | constraint.constraintId +
93 | "_name"] = constraint.valueName
94 |
95 | return observations_df
96 |
--------------------------------------------------------------------------------
/datacommons/test/sparql_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API unit tests.
15 |
16 | Unit tests for the SPARQL query wrapper.
17 | """
18 |
19 | import unittest
20 | from unittest.mock import patch
21 |
22 | import datacommons
23 |
24 | _QUERY1 = ('''
25 | SELECT ?name ?dcid
26 | WHERE {
27 | ?a typeOf Place .
28 | ?a name ?name .
29 | ?a dcid ("geoId/06" "geoId/21" "geoId/24") .
30 | ?a dcid ?dcid
31 | }
32 | ''')
33 |
34 | _QUERY2 = ('''
35 | SELECT ?name ?dcid
36 | WHERE {
37 | ?a typeOf Place .
38 | ?a name ?name .
39 | ?a dcid ("geoId/DNE") .
40 | ?a dcid ?dcid
41 | }
42 | ''')
43 |
44 |
45 | def _post_mock(path, data):
46 | """ A mock function for _post. """
47 | if path == "/query" and data['sparql'] == _QUERY1:
48 | return {
49 | 'header': ['?name', '?dcid'],
50 | 'rows': [{
51 | 'cells': [{
52 | 'value': 'California'
53 | }, {
54 | 'value': 'geoId/06'
55 | }]
56 | }, {
57 | 'cells': [{
58 | 'value': 'Kentucky'
59 | }, {
60 | 'value': 'geoId/21'
61 | }]
62 | }, {
63 | 'cells': [{
64 | 'value': 'Maryland'
65 | }, {
66 | 'value': 'geoId/24'
67 | }]
68 | }]
69 | }
70 | if path == "/query" and data['sparql'] == _QUERY2:
71 | return {
72 | 'header': ['?name', '?dcid'],
73 | }
74 |
75 | # Otherwise, return an empty response and a 404.
76 | return Exception('mock exception')
77 |
78 |
79 | class TestQuery(unittest.TestCase):
80 | """ Unit tests for the Query object. """
81 |
82 | @patch('datacommons.sparql._post')
83 | def test_rows(self, _post):
84 | """ Sending a valid query returns the correct response. """
85 | _post.side_effect = _post_mock
86 | # Create the SPARQL query
87 | selector = lambda row: row['?name'] != 'California'
88 | # Issue the query
89 | results = datacommons.query(_QUERY1)
90 | selected_results = datacommons.query(_QUERY2, select=selector)
91 | # Execute the query and iterate through the results.
92 | for idx, row in enumerate(results):
93 | if idx == 0:
94 | self.assertDictEqual(row, {'?name': 'California', '?dcid': 'geoId/06'})
95 | if idx == 1:
96 | self.assertDictEqual(row, {'?name': 'Kentucky', '?dcid': 'geoId/21'})
97 | if idx == 2:
98 | self.assertDictEqual(row, {'?name': 'Maryland', '?dcid': 'geoId/24'})
99 |
100 | # Verify that the select function works.
101 | for idx, row in enumerate(selected_results):
102 | if idx == 0:
103 | self.assertDictEqual(row, {'?name': 'Kentucky', '?dcid': 'geoId/21'})
104 | if idx == 1:
105 | self.assertDictEqual(row, {'?name': 'Maryland', '?dcid': 'geoId/24'})
106 |
107 | @patch('datacommons.sparql._post')
108 | def test_no_rows(self, _post):
109 | """ Handles row-less response. """
110 | _post.side_effect = _post_mock
111 | # Issue the query
112 | self.assertEqual(datacommons.query(_QUERY2), [])
113 |
114 |
115 | if __name__ == '__main__':
116 | unittest.main()
117 |
--------------------------------------------------------------------------------
/datacommons_client/models/node.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from pydantic import Field
4 |
5 | from datacommons_client.models.base import ArcLabel
6 | from datacommons_client.models.base import BaseDCModel
7 | from datacommons_client.models.base import DictLikeRootModel
8 | from datacommons_client.models.base import ListLikeRootModel
9 | from datacommons_client.models.base import NodeDCID
10 | from datacommons_client.models.base import Property
11 | from datacommons_client.models.base import PropertyList
12 |
13 |
14 | class Node(BaseDCModel):
15 | """Represents an individual node in the Data Commons knowledge graph.
16 |
17 | Attributes:
18 | dcid: The unique identifier for the node.
19 | name: The name of the node.
20 | provenanceId: The provenance ID for the node.
21 | types: The types associated with the node.
22 | value: The value of the node.
23 | """
24 | dcid: Optional[str] = None
25 | name: Optional[str] = None
26 | provenanceId: Optional[str | list[str]] = None
27 | types: Optional[list[str]] = None
28 | value: Optional[str] = None
29 |
30 |
31 | class Name(BaseDCModel):
32 | """Represents a name associated with an Entity (node).
33 |
34 | Attributes:
35 | value: The name of the Entity
36 | language: The language of the name
37 | property: The property used to get the name
38 | """
39 |
40 | value: str
41 | language: str
42 | property: str
43 |
44 |
45 | class NodeGroup(BaseDCModel):
46 | """Represents a group of nodes in the Data Commons knowledge graph.
47 |
48 | Attributes:
49 | nodes: A list of Node objects in the group.
50 | """
51 |
52 | nodes: list[Node] = Field(default_factory=list)
53 |
54 |
55 | class Arcs(BaseDCModel):
56 | """Represents arcs in the Data Commons knowledge graph.
57 |
58 | Attributes:
59 | arcs: A dictionary mapping arc labels to NodeGroup objects.
60 | """
61 |
62 | arcs: dict[ArcLabel, NodeGroup] = Field(default_factory=dict)
63 |
64 |
65 | class Properties(BaseDCModel):
66 | """Represents a group of properties in the Data Commons knowledge graph.
67 |
68 | Attributes:
69 | properties: A list of property strings.
70 | """
71 |
72 | properties: Optional[PropertyList] = None
73 |
74 |
75 | class FlattenedPropertiesMapping(BaseDCModel,
76 | DictLikeRootModel[dict[NodeDCID,
77 | PropertyList]]):
78 | """A model to represent a mapping of node DCIDs to their properties."""
79 |
80 |
81 | class FlattenedArcsMapping(BaseDCModel,
82 | DictLikeRootModel[dict[NodeDCID, dict[Property,
83 | list[Node]]]]):
84 | """A model to represent a mapping of node DCIDs to their arcs."""
85 |
86 |
87 | class NodeList(BaseDCModel, ListLikeRootModel[list[Node]]):
88 | """A root model whose value is a list of Node objects."""
89 |
90 |
91 | class NodeDCIDList(BaseDCModel, ListLikeRootModel[list[NodeDCID]]):
92 | """A root model whose value is a list of NodeDCID strings."""
93 |
94 |
95 | class StatVarConstraint(BaseDCModel):
96 | """Represents a constraint for a statistical variable."""
97 |
98 | constraintId: NodeDCID
99 | constraintName: Optional[str] = None
100 | valueId: NodeDCID
101 | valueName: Optional[str] = None
102 |
103 |
104 | class StatVarConstraints(BaseDCModel,
105 | DictLikeRootModel[dict[NodeDCID,
106 | list[StatVarConstraint]]]):
107 | """A root model whose value is a dictionary of statvar ids - a list of StatVarConstraint objects.
108 | This model is used to represent constraints associated with statistical variables.
109 | """
110 |
--------------------------------------------------------------------------------
/datacommons/sparql.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API Query Module.
15 |
16 | Implements functions for sending graph queries to the Data Commons Graph.
17 | """
18 |
19 | from datacommons.requests import _post
20 |
21 |
22 | def query(query_string, select=None):
23 | """ Returns the results of executing a SPARQL query on the Data Commons graph.
24 |
25 | Args:
26 | query_string (:obj:`str`): The SPARQL query string.
27 | select (:obj:`func` accepting a row of the query result): A function that
28 | selects rows to be returned by :code:`query`. This function accepts a row
29 | on the results of executing :code:`query_string` and returns True if and
30 | only if the row is to be returned by :code:`query`. The row passed in as
31 | an argument is represented as a :obj:`dict` that maps a query variable in
32 | :code:`query_string` to its value in the given row.
33 |
34 | Returns:
35 | A table, represented as a :obj:`list` of rows, resulting from executing the
36 | given SPARQL query. Each row is a :obj:`dict` mapping query variable to its
37 | value in the row. If `select` is not `None`, then a row is included in the
38 | returned :obj:`list` if and only if `select` returns :obj:`True` for that
39 | row.
40 |
41 | Raises:
42 | ValueError: If the payload returned by the Data Commons REST API is
43 | malformed.
44 |
45 | Examples:
46 | We would like to query for the name associated with three states identified
47 | by their dcids
48 | `California `_,
49 | `Kentucky `_, and
50 | `Maryland `_.
51 |
52 | >>> query_str = '''
53 | ... SELECT ?name ?dcid
54 | ... WHERE {
55 | ... ?a typeOf Place .
56 | ... ?a name ?name .
57 | ... ?a dcid ("geoId/06" "geoId/21" "geoId/24") .
58 | ... ?a dcid ?dcid
59 | ... }
60 | ... '''
61 | >>> result = query(query_str)
62 | >>> for r in result:
63 | ... print(r)
64 | {"?name": "Maryland", "?dcid": "geoId/24"}
65 | {"?name": "Kentucky", "?dcid": "geoId/21"}
66 | {"?name": "California", "?dcid": "geoId/06"}
67 |
68 | Optionally, we can specify which rows are returned by setting :code:`select`
69 | like so. The following returns all rows where the name is "Maryland".
70 |
71 | >>> selector = lambda row: row['?name'] == 'Maryland'
72 | >>> result = query(query_str, select=selector)
73 | >>> for r in result:
74 | ... print(r)
75 | {"?name": "Maryland", "?dcid": "geoId/24"}
76 | """
77 | resp = _post('/query', {'sparql': query_string})
78 | # Iterate through the query results
79 | header = resp.get('header')
80 | if header is None:
81 | raise ValueError('Ill-formatted response: does not contain a header.')
82 | result_rows = []
83 | for row in resp.get('rows', []):
84 | # Construct the map from query variable to cell value.
85 | row_map = {}
86 | for idx, cell in enumerate(row.get('cells', [])):
87 | if idx > len(header):
88 | raise ValueError('Query error: unexpected cell {}'.format(cell))
89 | if 'value' not in cell:
90 | raise ValueError('Query error: cell missing value {}'.format(cell))
91 | cell_var = header[idx]
92 | row_map[cell_var] = cell['value']
93 | # Add the row to the result rows if it is selected
94 | if select is None or select(row_map):
95 | result_rows.append(row_map)
96 | return result_rows
97 |
--------------------------------------------------------------------------------
/datacommons_client/models/base.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Mapping, MutableSequence
2 | from pprint import pformat
3 | from typing import Annotated, Any, Iterable, Optional, TypeAlias
4 |
5 | from pydantic import BaseModel
6 | from pydantic import BeforeValidator
7 | from pydantic import ConfigDict
8 | from pydantic import RootModel
9 |
10 |
11 | def listify(v: Any) -> list[str]:
12 | if isinstance(v, (str, bytes)):
13 | return [v]
14 | if not isinstance(v, Iterable):
15 | return [v]
16 | return list(v)
17 |
18 |
19 | variableDCID: TypeAlias = str
20 | entityDCID: TypeAlias = str
21 | facetID: TypeAlias = str
22 | ListOrStr = Annotated[list[str] | str, BeforeValidator(listify)]
23 | NextToken: TypeAlias = Optional[str]
24 | NodeDCID: TypeAlias = str
25 | ArcLabel: TypeAlias = str
26 | Property: TypeAlias = str
27 | PropertyList: TypeAlias = list[Property]
28 | Query: TypeAlias = str
29 | DominantType: TypeAlias = str
30 |
31 |
32 | class BaseDCModel(BaseModel):
33 | """Provides serialization methods for the Pydantic models used by the client."""
34 |
35 | model_config = ConfigDict(validate_by_name=True,
36 | validate_default=True,
37 | validate_by_alias=True,
38 | use_enum_values=True,
39 | serialize_by_alias=True)
40 |
41 | def __str__(self) -> str:
42 | """Returns a string representation of the instance."""
43 | return self.to_json()
44 |
45 | def to_dict(self, exclude_none: bool = True) -> dict[str, Any]:
46 | """Converts the instance to a dictionary.
47 |
48 | Args:
49 | exclude_none: If True, only include non-empty values in the response.
50 |
51 | Returns:
52 | Dict[str, Any]: The dictionary representation of the instance.
53 | """
54 |
55 | return self.model_dump(mode="python", exclude_none=exclude_none)
56 |
57 | def to_json(self, exclude_none: bool = True) -> str:
58 | """Converts the instance to a JSON string.
59 |
60 | Args:
61 | exclude_none: If True, only include non-empty values in the response.
62 |
63 | Returns:
64 | str: The JSON string representation of the instance.
65 | """
66 | return self.model_dump_json(exclude_none=exclude_none, indent=2)
67 |
68 |
69 | class DictLikeRootModel(RootModel, Mapping):
70 | """A base class for models that can be treated as dictionaries."""
71 |
72 | def __repr__(self) -> str:
73 | return f"{self.__class__.__name__}({self.root})"
74 |
75 | def __str__(self) -> str:
76 | return pformat(self.root, compact=True, width=80)
77 |
78 | def __getitem__(self, key: str) -> Any:
79 | return self.root[key]
80 |
81 | def __iter__(self) -> Iterable[Any]:
82 | return iter(self.root)
83 |
84 | def __len__(self) -> int:
85 | return len(self.root)
86 |
87 | def __eq__(self, other: Any) -> bool:
88 | if isinstance(other, DictLikeRootModel):
89 | return self.root == other.root
90 | else:
91 | return self.root == other
92 |
93 |
94 | class ListLikeRootModel(MutableSequence, RootModel):
95 | """A base class for models that can be treated as lists."""
96 |
97 | def __repr__(self) -> str:
98 | return f"{self.__class__.__name__}({self.root})"
99 |
100 | def __str__(self) -> str:
101 | return pformat(self.root, compact=True, width=80)
102 |
103 | def __getitem__(self, index: int) -> Any:
104 | return self.root[index]
105 |
106 | def __setitem__(self, index: int, value: Any) -> None:
107 | self.root[index] = value
108 |
109 | def __delitem__(self, index: int) -> None:
110 | del self.root[index]
111 |
112 | def __len__(self) -> int:
113 | return len(self.root)
114 |
115 | def __eq__(self, other: Any) -> bool:
116 | if isinstance(other, ListLikeRootModel):
117 | return self.root == other.root
118 | else:
119 | return self.root == other
120 |
121 | def insert(self, index: int, item: Any) -> None:
122 | """Inserts an item at a specified index in the root list."""
123 | self.root.insert(index, item)
124 |
--------------------------------------------------------------------------------
/datacommons/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Utilities Library.
15 |
16 | Various functions that can aid in the extension of the Data Commons API.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 |
23 | import base64
24 | from collections import defaultdict
25 | import json
26 | import os
27 | import zlib
28 |
29 | import six.moves.urllib.error
30 | import six.moves.urllib.request
31 |
32 | # --------------------------------- CONSTANTS ---------------------------------
33 |
34 | # REST API endpoint root
35 | _API_ROOT = "https://api.datacommons.org"
36 |
37 | # REST API endpoint paths
38 | _API_ENDPOINTS = {
39 | 'query': '/query',
40 | 'get_property_labels': '/node/property-labels',
41 | 'get_property_values': '/node/property-values',
42 | 'get_triples': '/node/triples',
43 | 'get_places_in': '/node/places-in',
44 | 'get_related_places': '/node/related-places',
45 | 'get_stats': '/bulk/stats',
46 | 'get_stat_value': '/stat/value',
47 | 'get_stat_series': '/stat/series',
48 | 'get_stat_all': '/stat/all',
49 | }
50 |
51 | # The default value to limit to
52 | _MAX_LIMIT = 100
53 |
54 | # Batch size for heavyweight queries.
55 | _QUERY_BATCH_SIZE = 500
56 |
57 | # Environment variable names used by the package
58 | _ENV_VAR_API_KEY = 'DC_API_KEY'
59 |
60 | # ------------------------- INTERNAL HELPER FUNCTIONS -------------------------
61 |
62 |
63 | def _send_request(req_url,
64 | req_json={},
65 | compress=False,
66 | post=True,
67 | use_payload=True):
68 | """ Sends a POST/GET request to req_url with req_json, default to POST.
69 |
70 | Returns:
71 | The payload returned by sending the POST/GET request formatted as a dict.
72 | """
73 | headers = {'Content-Type': 'application/json'}
74 |
75 | # Pass along API key if provided
76 | if os.environ.get(_ENV_VAR_API_KEY):
77 | headers['x-api-key'] = os.environ[_ENV_VAR_API_KEY]
78 |
79 | # Send the request and verify the request succeeded
80 | if post:
81 | req = six.moves.urllib.request.Request(
82 | req_url, data=json.dumps(req_json).encode('utf-8'), headers=headers)
83 | else:
84 | req = six.moves.urllib.request.Request(req_url, headers=headers)
85 | try:
86 | res = six.moves.urllib.request.urlopen(req)
87 | except six.moves.urllib.error.HTTPError as e:
88 | raise ValueError(
89 | 'Response error: An HTTP {} code was returned by the REST API. '
90 | 'Printing response\n\n{}'.format(e.code, e.read()))
91 | if isinstance(res, six.moves.urllib.error.HTTPError):
92 | raise ValueError(
93 | 'Response error: An HTTP {} code was returned by the REST API. '
94 | 'Printing response\n\n{}'.format(res.code, res.reason))
95 | # Get the JSON
96 | res_json = json.loads(res.read())
97 | if not use_payload:
98 | return res_json
99 | if 'payload' not in res_json:
100 | raise ValueError('Response error: Payload not found. Printing response\n\n'
101 | '{}'.format(res.text))
102 |
103 | # If the payload is compressed, decompress and decode it
104 | payload = res_json['payload']
105 | if compress:
106 | payload = zlib.decompress(base64.b64decode(payload), zlib.MAX_WBITS | 32)
107 | return json.loads(payload)
108 |
109 |
110 | def _format_expand_payload(payload, new_key, must_exist=[]):
111 | """ Formats expand type payloads into dicts from dcids to lists of values. """
112 | # Create the results dictionary from payload
113 | results = defaultdict(set)
114 | for entry in payload:
115 | if 'dcid' in entry and new_key in entry:
116 | dcid = entry['dcid']
117 | results[dcid].add(entry[new_key])
118 |
119 | # Ensure all dcids in must_exist have some entry in results.
120 | for dcid in must_exist:
121 | results[dcid]
122 | return {k: sorted(list(v)) for k, v in results.items()}
123 |
124 |
125 | def _get_direction(out: bool):
126 | return "out" if out else "in"
127 |
128 |
129 | def _get_arrow(out: bool):
130 | """Returns the arrow syntax for an arc direction.
131 |
132 | Args:
133 | out: Whether the arc direction is out.
134 | Returns:
135 | The corresponding arrow syntax.
136 | """
137 | return "->" if out else "<-"
138 |
--------------------------------------------------------------------------------
/datacommons_pandas/examples/df_builder.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Basic examples for building pandas objects using the Data Commons Pandas API."""
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | import datacommons_pandas as dcpd
21 |
22 |
23 | def build_time_series_example():
24 |
25 | print("""
26 | # Build a pd.Series of time series for one variable and one place.
27 | $ dcpd.build_time_series('country/CAN', 'Count_WildlandFireEvent')
28 | {}""".format(dcpd.build_time_series('country/CAN', 'Count_WildlandFireEvent')))
29 |
30 | print("""
31 | # Build a pd.Series of time series for one variable and one place and optional args.
32 | $ dcpd.build_time_series('country/USA', 'Count_Person', 'CensusPEPSurvey')
33 | {}""".format(
34 | dcpd.build_time_series('country/USA', 'Count_Person', 'CensusPEPSurvey')))
35 |
36 |
37 | def build_time_series_dataframe_example():
38 |
39 | def demonstrate_build_time_series_dataframe(intro_str,
40 | places,
41 | stat_var,
42 | desc_col=False):
43 | arg_str = "{}, '{}'".format(places, stat_var)
44 | if desc_col:
45 | arg_str += ", desc_col=True"
46 | print("""
47 | # {}
48 | $ dcpd.build_time_series_dataframe({})
49 | {}""".format(intro_str, arg_str,
50 | dcpd.build_time_series_dataframe(places, stat_var, desc_col)))
51 |
52 | build_time_series_dataframe_params = [{
53 | 'intro_str':
54 | 'Build a DataFrame of time series for one variable in multiple places.',
55 | 'places': ['geoId/33', 'geoId/29', 'country/USA'],
56 | 'stat_var':
57 | 'Median_Income_Person'
58 | }, {
59 | 'intro_str':
60 | 'Build a DataFrame of time series with columns sorted in descending order.',
61 | 'places': ['country/USA'],
62 | 'stat_var':
63 | 'Median_Income_Person',
64 | 'desc_col':
65 | True
66 | }]
67 |
68 | for param_set in build_time_series_dataframe_params:
69 | demonstrate_build_time_series_dataframe(**param_set)
70 |
71 |
72 | def build_multivariate_dataframe_example():
73 |
74 | def demonstrate_build_multivariate_dataframe(intro_str, places, stat_vars):
75 | print("""
76 | # {}
77 | $ dcpd.build_multivariate_dataframe({}, {})
78 | {}""".format(intro_str, places, stat_vars,
79 | dcpd.build_multivariate_dataframe(places, stat_vars)))
80 |
81 | build_multivariate_dataframe_params = [{
82 | 'intro_str':
83 | 'Build a DataFrame of latest observations for multiple variables in multiple places.',
84 | 'places': ['geoId/06', 'country/FRA'],
85 | 'stat_vars': ['Median_Age_Person', 'Count_Person', 'Count_Household']
86 | }]
87 |
88 | for param_set in build_multivariate_dataframe_params:
89 | demonstrate_build_multivariate_dataframe(**param_set)
90 |
91 |
92 | def expect_err_examples():
93 |
94 | print("\n\nExpect 6 errors, starting HERE:")
95 | try:
96 | dcpd.build_time_series_dataframe(['geoId/33'],
97 | ['Median_Income_Person', 'Count_Person'])
98 | except ValueError as e:
99 | print("Successfully errored on: ", e)
100 | try:
101 | dcpd.build_time_series_dataframe(24, ['Median_Income_Person'])
102 | except ValueError as e:
103 | print("Successfully errored on: ", e)
104 | try:
105 | dcpd.build_multivariate_dataframe([3],
106 | ['Median_Income_Person', 'Count_Person'])
107 | except ValueError as e:
108 | print("Successfully errored on: ", e)
109 | try:
110 | dcpd.build_multivariate_dataframe('country/USA', True)
111 | except ValueError as e:
112 | print("Successfully errored on: ", e)
113 | # If the following two do not error due to the addition of
114 | # Median_Income_Person statistics for NUTS geos, then please
115 | # replace either the places or the StatVar.
116 | try:
117 | dcpd.build_time_series_dataframe(['nuts/HU2', 'nuts/HU22'],
118 | 'Median_Income_Person')
119 | except ValueError as e:
120 | print("Successfully errored on: ", e)
121 | try:
122 | dcpd.build_multivariate_dataframe(['nuts/HU2', 'nuts/HU22'],
123 | ['Median_Income_Person'])
124 | except ValueError as e:
125 | print("Successfully errored on: ", e)
126 | print("until HERE.")
127 |
128 |
129 | def main():
130 | build_time_series_example()
131 | build_time_series_dataframe_example()
132 | build_multivariate_dataframe_example()
133 | expect_err_examples()
134 |
135 |
136 | if __name__ == '__main__':
137 | main()
138 |
--------------------------------------------------------------------------------
/datacommons_client/endpoints/payloads.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from pydantic import Field
4 | from pydantic import field_serializer
5 | from pydantic import field_validator
6 | from pydantic import model_serializer
7 | from pydantic import model_validator
8 |
9 | from datacommons_client.models.base import BaseDCModel
10 | from datacommons_client.models.base import ListOrStr
11 | from datacommons_client.models.observation import ObservationDate
12 | from datacommons_client.models.observation import ObservationSelect
13 | from datacommons_client.models.observation import ObservationSelectList
14 |
15 |
16 | def normalize_list_to_string(value: str | list[str]) -> str:
17 | """Converts a list of properties to a string."""
18 |
19 | if isinstance(value, list):
20 | return f"[{', '.join(value)}]"
21 |
22 | return value
23 |
24 |
25 | class NodeRequestPayload(BaseDCModel):
26 | """
27 | A Pydantic model to structure, normalize, and validate the payload for a Node V2 API request.
28 |
29 | Attributes:
30 | node_dcids (str | list[str]): The DCID(s) of the nodes to query.
31 | expression (str): The property or relation expression(s) to query.
32 | """
33 |
34 | node_dcids: ListOrStr = Field(..., serialization_alias="nodes")
35 | expression: list | str = Field(..., serialization_alias="property")
36 |
37 |
38 | class ObservationRequestPayload(BaseDCModel):
39 | """
40 | A Pydantic model to structure, normalize, and validate the payload for an Observation V2 API request.
41 |
42 | Attributes:
43 | date (str): The date for which data is being requested.
44 | variable_dcids (str | list[str]): One or more variable IDs for the data.
45 | select (list[ObservationSelect]): Fields to include in the response.
46 | Defaults to ["date", "variable", "entity", "value"].
47 | entity_dcids (Optional[str | list[str]]): One or more entity IDs to filter the data.
48 | entity_expression (Optional[str]): A string expression to filter entities.
49 | filter_facet_domains (Optional[str | list[str]]): One or more domain names to filter the data.
50 | filter_facet_ids (Optional[str | list[str]]): One or more facet IDs to filter the data.
51 | """
52 |
53 | date: ObservationDate | str = Field(default_factory=str,
54 | validate_default=True)
55 | variable_dcids: Optional[ListOrStr] = Field(default=None,
56 | serialization_alias="variable")
57 | select: Optional[list[str]] = None
58 | entity_dcids: Optional[ListOrStr] = None
59 | entity_expression: Optional[str | list[str]] = None
60 | filter_facet_domains: Optional[ListOrStr] = None
61 | filter_facet_ids: Optional[ListOrStr] = None
62 |
63 | @field_validator("date", mode="before")
64 | def _validate_date(cls, v):
65 | try:
66 | return ObservationDate(v)
67 | except ValueError:
68 | return v
69 |
70 | @field_validator("select", mode="before")
71 | def _coerce_select(cls, v):
72 | return ObservationSelectList.model_validate(v).select
73 |
74 | @field_validator("entity_expression", mode="before")
75 | def _coerce_expr(cls, v):
76 | if v is None:
77 | return v
78 | if isinstance(v, list):
79 | return normalize_list_to_string(v)
80 | if isinstance(v, str):
81 | return v
82 | raise TypeError("expression must be a string or list[str]")
83 |
84 | @field_serializer("variable_dcids", "entity_dcids", when_used="unless-none")
85 | def _serialise_dcids_fields(self, v):
86 | return {"dcids": v}
87 |
88 | @field_serializer("entity_expression", when_used="unless-none")
89 | def _serialise_expression_field(self, v):
90 | return {"expression": v}
91 |
92 | @model_validator(mode="after")
93 | def _check_one(self):
94 | if bool(self.entity_dcids) == bool(self.entity_expression):
95 | raise ValueError("Exactly one of dcids or expression must be set")
96 | return self
97 |
98 | @model_serializer(mode="wrap")
99 | def _wrap_filter(self, handler):
100 | # Normal dump
101 | data = handler(self)
102 |
103 | # pull out entity dcid or expression
104 | entity = data.pop("entity_dcids", None) or data.pop("entity_expression",
105 | None)
106 |
107 | # add entity to the data dictionary
108 | data["entity"] = entity
109 |
110 | # pull out the two filter keys if present
111 | domains = data.pop("filter_facet_domains", None)
112 | ids = data.pop("filter_facet_ids", None)
113 |
114 | # only add "filter" if at least one is set
115 | if domains or ids:
116 | filter_dict = {}
117 | if domains is not None:
118 | filter_dict["domains"] = domains
119 | if ids is not None:
120 | filter_dict["facet_ids"] = ids
121 | data["filter"] = filter_dict
122 |
123 | return data
124 |
125 |
126 | class ResolveRequestPayload(BaseDCModel):
127 | """
128 | A Pydantic model to structure, normalize, and validate the payload for a Resolve V2 API request.
129 |
130 | Attributes:
131 | node_dcids (str | list[str]): The DCID(s) of the nodes to query.
132 | expression (str): The relation expression to query.
133 | """
134 |
135 | node_dcids: ListOrStr = Field(..., serialization_alias="nodes")
136 | expression: str | list[str] = Field(..., serialization_alias="property")
137 |
--------------------------------------------------------------------------------
/datacommons_client/tests/models/test_node_models.py:
--------------------------------------------------------------------------------
1 | from datacommons_client.models.node import Arcs
2 | from datacommons_client.models.node import Node
3 | from datacommons_client.models.node import NodeGroup
4 | from datacommons_client.models.node import Properties
5 | from datacommons_client.models.node import StatVarConstraint
6 | from datacommons_client.models.node import StatVarConstraints
7 |
8 |
9 | def test_node_model_validation():
10 | """Test that Node.model_validate parses data correctly."""
11 | json_data = {
12 | "dcid": "node123",
13 | "name": "Test Node",
14 | "provenanceId": "prov123",
15 | "types": ["TypeA", "TypeB"],
16 | "value": "42",
17 | }
18 | node = Node.model_validate(json_data)
19 | assert node.dcid == "node123"
20 | assert node.name == "Test Node"
21 | assert node.provenanceId == "prov123"
22 | assert node.types == ["TypeA", "TypeB"]
23 | assert node.value == "42"
24 |
25 |
26 | def test_node_model_validation_partial():
27 | """Test Node.model_validate with partial data."""
28 | json_data = {
29 | "dcid": "node123",
30 | }
31 | node = Node.model_validate(json_data)
32 | assert node.dcid == "node123"
33 | assert node.name is None
34 | assert node.provenanceId is None
35 | assert node.types is None
36 | assert node.value is None
37 |
38 |
39 | def test_nodegroup_model_validation():
40 | """Test that NodeGroup.model_validate parses data correctly."""
41 | json_data = {
42 | "nodes": [
43 | {
44 | "dcid": "node1",
45 | "name": "Node 1"
46 | },
47 | {
48 | "dcid": "node2",
49 | "name": "Node 2"
50 | },
51 | ]
52 | }
53 | node_group = NodeGroup.model_validate(json_data)
54 | assert len(node_group.nodes) == 2
55 | assert node_group.nodes[0].dcid == "node1"
56 | assert node_group.nodes[1].name == "Node 2"
57 |
58 |
59 | def test_nodegroup_model_validation_empty():
60 | """Test NodeGroup.model_validate with empty data."""
61 | json_data = {}
62 | node_group = NodeGroup.model_validate(json_data)
63 | assert len(node_group.nodes) == 0
64 |
65 |
66 | def test_arcs_model_validation():
67 | """Test that Arcs.model_validate parses data correctly."""
68 | json_data = {
69 | "arcs": {
70 | "label1": {
71 | "nodes": [{
72 | "dcid": "node1"
73 | }, {
74 | "dcid": "node2"
75 | }]
76 | },
77 | "label2": {
78 | "nodes": [{
79 | "dcid": "node3"
80 | }]
81 | },
82 | }
83 | }
84 | arcs = Arcs.model_validate(json_data)
85 | assert len(arcs.arcs) == 2
86 | assert "label1" in arcs.arcs
87 | assert len(arcs.arcs["label1"].nodes) == 2
88 | assert arcs.arcs["label1"].nodes[0].dcid == "node1"
89 | assert len(arcs.arcs["label2"].nodes) == 1
90 | assert arcs.arcs["label2"].nodes[0].dcid == "node3"
91 |
92 |
93 | def test_arcs_model_validation_empty():
94 | """Test Arcs.model_validate with empty data."""
95 | json_data = {}
96 | arcs = Arcs.model_validate(json_data)
97 | assert len(arcs.arcs) == 0
98 |
99 |
100 | def test_properties_model_validation():
101 | """Test that Properties.model_validate parses data correctly."""
102 | json_data = {"properties": ["prop1", "prop2", "prop3"]}
103 | properties = Properties.model_validate(json_data)
104 | assert len(properties.properties) == 3
105 | assert properties.properties == ["prop1", "prop2", "prop3"]
106 |
107 |
108 | def test_properties_model_validation_empty():
109 | """Test Properties.model_validate with empty data."""
110 | json_data = {}
111 | properties = Properties.model_validate(json_data)
112 | assert properties.properties is None
113 |
114 |
115 | def test_statvarconstraint_model_validation():
116 | """Test StatVarConstraint.model_validate parses data correctly."""
117 | data = {
118 | "constraintId": "DevelopmentFinanceScheme",
119 | "constraintName": "Development Finance Scheme",
120 | "valueId": "ODAGrants",
121 | "valueName": "Official Development Assistance Grants",
122 | }
123 | constraint = StatVarConstraint.model_validate(data)
124 |
125 | assert constraint.constraintId == "DevelopmentFinanceScheme"
126 | assert constraint.constraintName == "Development Finance Scheme"
127 | assert constraint.valueId == "ODAGrants"
128 | assert constraint.valueName == "Official Development Assistance Grants"
129 |
130 |
131 | def test_statvarconstraints_model_validation():
132 | """Test StatVarConstraints root model validates mapping properly."""
133 | constraints = StatVarConstraints.model_validate({
134 | "sv/1": [
135 | {
136 | "constraintId": "DevelopmentFinanceScheme",
137 | "constraintName": "Development Finance Scheme",
138 | "valueId": "ODAGrants",
139 | "valueName": "Official Development Assistance Grants",
140 | },
141 | {
142 | "constraintId": "DevelopmentFinanceRecipient",
143 | "constraintName": "Development Finance Recipient",
144 | "valueId": "country/GTM",
145 | "valueName": "Guatemala",
146 | },
147 | ],
148 | "sv/2": [],
149 | })
150 |
151 | assert "sv/1" in constraints and "sv/2" in constraints
152 | assert len(constraints["sv/1"]) == 2
153 | assert constraints["sv/2"] == []
154 |
--------------------------------------------------------------------------------
/datacommons_client/tests/endpoints/test_observation_endpoint.py:
--------------------------------------------------------------------------------
1 | from unittest.mock import MagicMock
2 |
3 | from datacommons_client.endpoints.base import API
4 | from datacommons_client.endpoints.observation import ObservationEndpoint
5 | from datacommons_client.endpoints.response import ObservationResponse
6 | from datacommons_client.models.observation import ByVariable
7 | from datacommons_client.models.observation import ObservationDate
8 | from datacommons_client.models.observation import ObservationSelect
9 |
10 |
11 | def test_fetch():
12 | """Tests the fetch method of ObservationEndpoint."""
13 | api_mock = MagicMock(spec=API)
14 | api_mock.post.return_value = {"byVariable": {}}
15 | endpoint = ObservationEndpoint(api=api_mock)
16 |
17 | response = endpoint.fetch(variable_dcids="dcid/variableID",
18 | date=ObservationDate.LATEST,
19 | select=["date", "variable", "entity", "value"],
20 | entity_dcids="dc/EntityID",
21 | filter_facet_domains="domain1",
22 | filter_facet_ids="facet1")
23 |
24 | # Check the response
25 | assert isinstance(response, ObservationResponse)
26 |
27 | # Check the post request
28 | api_mock.post.assert_called_once_with(payload={
29 | "date": ObservationDate.LATEST,
30 | "variable": {
31 | "dcids": ["dcid/variableID"]
32 | },
33 | "entity": {
34 | "dcids": ["dc/EntityID"],
35 | },
36 | "select": ["date", "variable", "entity", "value"],
37 | "filter": {
38 | "domains": ["domain1"],
39 | "facet_ids": ["facet1"]
40 | }
41 | },
42 | endpoint="observation",
43 | all_pages=True,
44 | next_token=None)
45 |
46 |
47 | def test_fetch_observations_by_entity_type():
48 | """Tests the fetch_observations_by_entity_type method."""
49 | api_mock = MagicMock(spec=API)
50 | api_mock.post.return_value = {"byVariable": {}}
51 | endpoint = ObservationEndpoint(api=api_mock)
52 |
53 | response = endpoint.fetch_observations_by_entity_type(
54 | date="2023",
55 | parent_entity="Earth",
56 | entity_type="Country",
57 | select=["variable", "entity", "facet"],
58 | variable_dcids="dc/VariableID")
59 |
60 | # Check the response
61 | assert isinstance(response, ObservationResponse)
62 |
63 | # Check the post request
64 | api_mock.post.assert_called_once_with(payload={
65 | "date": "2023",
66 | "variable": {
67 | "dcids": ["dc/VariableID"]
68 | },
69 | "entity": {
70 | "expression": "Earth<-containedInPlace+{typeOf:Country}"
71 | },
72 | "select": ["variable", "entity", "facet"],
73 | },
74 | endpoint="observation",
75 | all_pages=True,
76 | next_token=None)
77 |
78 |
79 | def test_fetch_observations_facets_by_entity_type():
80 | """Tests the fetch_observations_by_entity_type method."""
81 | api_mock = MagicMock(spec=API)
82 | api_mock.post.return_value = {"byVariable": {}}
83 | endpoint = ObservationEndpoint(api=api_mock)
84 |
85 | response = endpoint.fetch_observations_by_entity_type(
86 | date="2023",
87 | parent_entity="Earth",
88 | entity_type="Country",
89 | variable_dcids="dc/VariableID",
90 | select=["variable", "entity", "facet"],
91 | )
92 |
93 | # Check the response
94 | assert isinstance(response, ObservationResponse)
95 |
96 | # Check the post request
97 | api_mock.post.assert_called_once_with(payload={
98 | "date": "2023",
99 | "variable": {
100 | "dcids": ["dc/VariableID"]
101 | },
102 | "entity": {
103 | "expression": "Earth<-containedInPlace+{typeOf:Country}"
104 | },
105 | "select": ["variable", "entity", "facet"],
106 | },
107 | endpoint="observation",
108 | all_pages=True,
109 | next_token=None)
110 |
111 |
112 | def test_fetch_available_statistical_variables_single_entity():
113 | """Test fetching variables for a single entity."""
114 | mock_data = {
115 | "var1": ["ent1"],
116 | "var2": ["ent1"],
117 | }
118 |
119 | # Mock the fetch method on the ObservationEndpoint instance
120 | endpoint = ObservationEndpoint(api=MagicMock())
121 | endpoint.fetch = MagicMock()
122 | endpoint.fetch.return_value.get_data_by_entity = MagicMock(
123 | return_value=mock_data)
124 |
125 | result = endpoint.fetch_available_statistical_variables("ent1")
126 |
127 | expected = {
128 | "ent1": ["var1", "var2"],
129 | }
130 | assert result == expected
131 |
132 | endpoint.fetch.assert_called_once_with(
133 | entity_dcids="ent1",
134 | select=[ObservationSelect.VARIABLE, ObservationSelect.ENTITY],
135 | variable_dcids=[])
136 |
137 |
138 | def test_fetch_available_statistical_variables_multiple_entities():
139 | """Test fetching variables for multiple entities."""
140 | mock_data = {
141 | "var1": ["ent1", "ent2"],
142 | "var2": ["ent2"],
143 | }
144 |
145 | endpoint = ObservationEndpoint(api=MagicMock())
146 | endpoint.fetch = MagicMock()
147 | endpoint.fetch.return_value.get_data_by_entity = MagicMock(
148 | return_value=mock_data)
149 |
150 | result = endpoint.fetch_available_statistical_variables(["ent1", "ent2"])
151 |
152 | expected = {
153 | "ent1": ["var1"],
154 | "ent2": ["var1", "var2"],
155 | }
156 | assert result == expected
157 |
--------------------------------------------------------------------------------
/datacommons_client/tests/endpoints/test_payloads.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from datacommons_client.endpoints.payloads import NodeRequestPayload
4 | from datacommons_client.endpoints.payloads import ObservationRequestPayload
5 | from datacommons_client.endpoints.payloads import ResolveRequestPayload
6 | from datacommons_client.models.observation import ObservationDate
7 | from datacommons_client.models.observation import ObservationSelect
8 | from datacommons_client.utils.error_handling import InvalidObservationSelectError
9 |
10 |
11 | def test_node_payload_normalize():
12 | """Tests that NodeRequestPayload correctly normalizes single and multiple node_dcids."""
13 | payload = NodeRequestPayload(node_dcids="node1", expression="prop1")
14 | assert payload.node_dcids == ["node1"]
15 |
16 | payload = NodeRequestPayload(node_dcids=["node1", "node2"],
17 | expression="prop1")
18 | assert payload.node_dcids == ["node1", "node2"]
19 |
20 |
21 | def test_node_payload_validate():
22 | """Tests that NodeRequestPayload validates its inputs correctly."""
23 | with pytest.raises(ValueError):
24 | NodeRequestPayload(node_dcids="node1",
25 | expression=123) # `expression` must be a string
26 |
27 |
28 | def test_node_payload_to_dict():
29 | """Tests NodeRequestPayload conversion to dictionary."""
30 | payload = NodeRequestPayload(node_dcids="node1", expression="prop1")
31 | assert payload.to_dict() == {"nodes": ["node1"], "property": "prop1"}
32 |
33 |
34 | def test_observation_payload_normalize():
35 | """Tests that ObservationRequestPayload normalizes inputs correctly."""
36 | payload = ObservationRequestPayload(
37 | date="LATEST",
38 | variable_dcids="var1",
39 | select=["variable", "entity"],
40 | entity_dcids="ent1",
41 | filter_facet_domains="domain1",
42 | filter_facet_ids="facets1",
43 | )
44 | assert payload.variable_dcids == ["var1"]
45 | assert payload.entity_dcids == ["ent1"]
46 | assert payload.filter_facet_domains == ["domain1"]
47 | assert payload.filter_facet_ids == ["facets1"]
48 | assert payload.date == ObservationDate.LATEST
49 |
50 | assert "filter" in payload.to_dict()
51 | assert "facet_ids" in payload.to_dict()["filter"]
52 | assert "domains" in payload.to_dict()["filter"]
53 |
54 | # Check that when domain and facets are not included, they are not in the payload
55 | payload = ObservationRequestPayload(
56 | date="all",
57 | variable_dcids=["var1"],
58 | select=["variable", "entity"],
59 | entity_dcids=["ent1"],
60 | )
61 | assert payload.date == ObservationDate.ALL
62 | assert payload.variable_dcids == ["var1"]
63 | assert payload.entity_dcids == ["ent1"]
64 | assert "filter" not in payload.to_dict()
65 |
66 |
67 | def test_observation_select_invalid_value():
68 | """Tests that an invalid ObservationSelect value raises InvalidObservationSelectError."""
69 | with pytest.raises(InvalidObservationSelectError):
70 | ObservationSelect("invalid")
71 |
72 |
73 | def test_observation_payload_validate():
74 | """Tests that ObservationRequestPayload validates its inputs."""
75 | with pytest.raises(InvalidObservationSelectError):
76 | ObservationRequestPayload(
77 | date="LATEST",
78 | variable_dcids="var1",
79 | select=["variable"],
80 | entity_dcids=None,
81 | entity_expression=None,
82 | ) # Requires either `entity_dcids` or `entity_expression`
83 |
84 | with pytest.raises(InvalidObservationSelectError):
85 | ObservationRequestPayload(
86 | date="LATEST",
87 | variable_dcids="var1",
88 | select=["value"], # Missing required "variable" and "entity"
89 | entity_expression="expression",
90 | )
91 |
92 | with pytest.raises(ValueError):
93 | ObservationRequestPayload(
94 | date="LATEST",
95 | variable_dcids="var1",
96 | select=["variable", "entity"],
97 | entity_dcids="ent1",
98 | entity_expression=
99 | "expression", # Both `entity_dcids` and `entity_expression` set
100 | )
101 |
102 |
103 | def test_observation_payload_to_dict():
104 | """Tests ObservationRequestPayload conversion to dictionary."""
105 | payload = ObservationRequestPayload(
106 | date="LATEST",
107 | variable_dcids="var1",
108 | select=["variable", "entity"],
109 | entity_dcids="ent1",
110 | filter_facet_ids="facets1",
111 | )
112 | assert payload.to_dict() == {
113 | "date": ObservationDate.LATEST,
114 | "variable": {
115 | "dcids": ["var1"]
116 | },
117 | "entity": {
118 | "dcids": ["ent1"]
119 | },
120 | "select": ["variable", "entity"],
121 | "filter": {
122 | "facet_ids": ["facets1"]
123 | }
124 | }
125 |
126 |
127 | def test_resolve_payload_normalize():
128 | """Tests that ResolveRequestPayload normalizes single and multiple node_dcids."""
129 | payload = ResolveRequestPayload(node_dcids="node1", expression="expr1")
130 | assert payload.node_dcids == ["node1"]
131 |
132 | payload = ResolveRequestPayload(node_dcids=["node1", "node2"],
133 | expression="expr1")
134 | assert payload.node_dcids == ["node1", "node2"]
135 |
136 |
137 | def test_resolve_payload_validate():
138 | """Tests that ResolveRequestPayload validates its inputs correctly."""
139 | with pytest.raises(ValueError):
140 | ResolveRequestPayload(node_dcids="node1",
141 | expression=123) # `expression` must be a string
142 |
143 |
144 | def test_resolve_payload_to_dict():
145 | """Tests ResolveRequestPayload conversion to dictionary."""
146 | payload = ResolveRequestPayload(node_dcids="node1", expression="expr1")
147 | assert payload.to_dict() == {"nodes": ["node1"], "property": "expr1"}
148 |
--------------------------------------------------------------------------------
/datacommons_client/tests/models/test_observation_models.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from datacommons_client.models.observation import Facet
4 | from datacommons_client.models.observation import Observation
5 | from datacommons_client.models.observation import ObservationSelectList
6 | from datacommons_client.models.observation import OrderedFacet
7 | from datacommons_client.models.observation import Variable
8 | from datacommons_client.utils.error_handling import InvalidObservationSelectError
9 |
10 |
11 | def test_observation_model_validation():
12 | """Test that Observation.model_validate parses data correctly."""
13 | json_data = {"date": "2024-01-01", "value": 123.45}
14 | observation = Observation.model_validate(json_data)
15 | assert observation.date == "2024-01-01"
16 | assert observation.value == 123.45
17 | assert isinstance(observation.value, float)
18 |
19 |
20 | def test_observation_model_validation_partial():
21 | """Test Observation.model_validate with missing data."""
22 | json_data = {"date": "2024-01-01"}
23 | observation = Observation.model_validate(json_data)
24 | assert observation.date == "2024-01-01"
25 | assert observation.value is None
26 |
27 |
28 | def test_ordered_facets_model_validation():
29 | """Test that OrderedFacet.model_validate parses data correctly."""
30 | json_data = {
31 | "earliestDate":
32 | "2023-01-01",
33 | "facetId":
34 | "facet123",
35 | "latestDate":
36 | "2024-01-01",
37 | "obsCount":
38 | 2,
39 | "observations": [
40 | {
41 | "date": "2023-01-01",
42 | "value": 100.0
43 | },
44 | {
45 | "date": "2024-01-01",
46 | "value": 200.0
47 | },
48 | ],
49 | }
50 | ordered_facets = OrderedFacet.model_validate(json_data)
51 | assert ordered_facets.earliestDate == "2023-01-01"
52 | assert ordered_facets.facetId == "facet123"
53 | assert ordered_facets.latestDate == "2024-01-01"
54 | assert ordered_facets.obsCount == 2
55 | assert len(ordered_facets.observations) == 2
56 | assert ordered_facets.observations[0].value == 100.0
57 |
58 |
59 | def test_ordered_facets_model_validation_empty_observations():
60 | """Test OrderedFacet.model_validate with empty observations."""
61 | json_data = {
62 | "earliestDate": "2023-01-01",
63 | "facetId": "facet123",
64 | "latestDate": "2024-01-01",
65 | "obsCount": 0,
66 | "observations": [],
67 | }
68 | ordered_facets = OrderedFacet.model_validate(json_data)
69 | assert len(ordered_facets.observations) == 0
70 |
71 |
72 | def test_variable_model_validation():
73 | """Test that Variable.model_validate parses data correctly."""
74 | json_data = {
75 | "byEntity": {
76 | "entity1": {
77 | "orderedFacets": [{
78 | "earliestDate":
79 | "2023-01-01",
80 | "facetId":
81 | "facet1",
82 | "latestDate":
83 | "2023-12-31",
84 | "obsCount":
85 | 2,
86 | "observations": [
87 | {
88 | "date": "2023-01-01",
89 | "value": 50.0
90 | },
91 | {
92 | "date": "2023-12-31",
93 | "value": 75.0
94 | },
95 | ],
96 | }]
97 | }
98 | }
99 | }
100 | variable = Variable.model_validate(json_data)
101 | assert "entity1" in variable.byEntity
102 | facets = variable.byEntity["entity1"].orderedFacets
103 | assert len(facets) == 1
104 | assert facets[0].facetId == "facet1"
105 | assert facets[0].observations[0].value == 50.0
106 |
107 |
108 | def test_variable_model_validation_empty():
109 | """Test Variable.model_validate with empty byEntity."""
110 | json_data = {"byEntity": {}}
111 | variable = Variable.model_validate(json_data)
112 | assert len(variable.byEntity) == 0
113 |
114 |
115 | def test_facet_model_validation():
116 | """Test that Facet.model_validate parses data correctly."""
117 | json_data = {
118 | "importName": "Import 1",
119 | "measurementMethod": "Method A",
120 | "observationPeriod": "2023",
121 | "provenanceUrl": "http://example.com",
122 | "unit": "usd",
123 | }
124 | facet = Facet.model_validate(json_data)
125 | assert facet.importName == "Import 1"
126 | assert facet.measurementMethod == "Method A"
127 | assert facet.observationPeriod == "2023"
128 | assert facet.provenanceUrl == "http://example.com"
129 | assert facet.unit == "usd"
130 |
131 |
132 | def test_facet_model_validation_partial():
133 | """Test Facet.model_validate with missing data."""
134 | json_data = {"importName": "Import 1", "unit": "GTQ"}
135 | facet = Facet.model_validate(json_data)
136 | assert facet.importName == "Import 1"
137 | assert facet.measurementMethod is None
138 | assert facet.unit == "GTQ"
139 | assert facet.provenanceUrl is None
140 |
141 |
142 | def test_observation_select_list_defaults():
143 | """ObservationSelectList returns default selects when none provided."""
144 | osl = ObservationSelectList.model_validate(None)
145 | assert osl.select == ["date", "variable", "entity", "value"]
146 |
147 |
148 | def test_observation_select_list_custom():
149 | """ObservationSelectList accepts custom select lists."""
150 | osl = ObservationSelectList.model_validate(["variable", "entity", "facet"])
151 | assert osl.select == ["variable", "entity", "facet"]
152 |
153 |
154 | def test_observation_select_list_missing_required():
155 | """Missing required select entries raises InvalidObservationSelectError."""
156 | with pytest.raises(InvalidObservationSelectError):
157 | ObservationSelectList.model_validate(["date", "value"])
158 |
--------------------------------------------------------------------------------
/datacommons_client/endpoints/resolve.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from datacommons_client.endpoints.base import API
4 | from datacommons_client.endpoints.base import Endpoint
5 | from datacommons_client.endpoints.payloads import ResolveRequestPayload
6 | from datacommons_client.endpoints.response import ResolveResponse
7 |
8 |
9 | def _resolve_correspondence_expression(from_type: str,
10 | to_type: str,
11 | entity_type: str | None = None) -> str:
12 | """
13 | Constructs a relation expression for fetching correspondence between entities of two types.
14 |
15 | Args:
16 | from_type (str): The source entity type.
17 | to_type (str): The target entity type.
18 | entity_type (Optional[str]): Optional type of the entities.
19 |
20 | Returns:
21 | str: The relation expression to fetch correspondence between entities of the given types.
22 | """
23 | return (f"<-{from_type}{{typeOf:{entity_type}}}->{to_type}"
24 | if entity_type else f"<-{from_type}->{to_type}")
25 |
26 |
27 | class ResolveEndpoint(Endpoint):
28 | """
29 | A class to interact with the resolve API endpoint.
30 |
31 | Args:
32 | api (API): The API instance providing the environment configuration
33 | (base URL, headers, authentication) to be used for requests.
34 | """
35 |
36 | def __init__(self, api: API):
37 | """Initializes the ResolveEndpoint instance."""
38 | super().__init__(endpoint="resolve", api=api)
39 |
40 | def fetch(self, node_ids: str | list[str],
41 | expression: str | list[str]) -> ResolveResponse:
42 | """
43 | Fetches resolved data for the given nodes and expressions, identified by name,
44 | coordinates, or wiki ID.
45 |
46 | Args:
47 | node_ids (str | list[str]): One or more node IDs to resolve.
48 | expression (str): The relation expression to query.
49 |
50 | Returns:
51 | ResolveResponse: The response object containing the resolved data.
52 | """
53 | # Check if the node_ids is a single string. If so, convert it to a list.
54 | if isinstance(node_ids, str):
55 | node_ids = [node_ids]
56 |
57 | # Construct the payload
58 | payload = ResolveRequestPayload(node_dcids=node_ids,
59 | expression=expression).to_dict()
60 |
61 | # Send the request and return the response
62 | return ResolveResponse.model_validate(self.post(payload))
63 |
64 | def fetch_dcids_by_name(self,
65 | names: str | list[str],
66 | entity_type: Optional[str] = None) -> ResolveResponse:
67 | """
68 | Fetches DCIDs for entities by their names.
69 |
70 | Args:
71 | names (str | list[str]): One or more entity names to resolve.
72 | entity_type (Optional[str]): Optional type of the entities.
73 |
74 | Returns:
75 | ResolveResponse: The response object containing the resolved DCIDs.
76 | """
77 |
78 | expression = _resolve_correspondence_expression(from_type="description",
79 | to_type="dcid",
80 | entity_type=entity_type)
81 |
82 | return self.fetch(node_ids=names, expression=expression)
83 |
84 | def fetch_dcids_by_wikidata_id(
85 | self,
86 | wikidata_ids: str | list[str],
87 | entity_type: Optional[str] = None) -> ResolveResponse:
88 | """
89 | Fetches DCIDs for entities by their Wikidata IDs.
90 |
91 | Args:
92 | wikidata_ids (str | list[str]): One or more Wikidata IDs to resolve.
93 | entity_type (Optional[str]): Optional type of the entities.
94 |
95 | Returns:
96 | ResolveResponse: The response object containing the resolved DCIDs.
97 | """
98 | expression = _resolve_correspondence_expression(from_type="wikidataId",
99 | to_type="dcid",
100 | entity_type=entity_type)
101 |
102 | return self.fetch(node_ids=wikidata_ids, expression=expression)
103 |
104 | def fetch_dcid_by_coordinates(
105 | self,
106 | latitude: str,
107 | longitude: str,
108 | entity_type: Optional[str] = None) -> ResolveResponse:
109 | """
110 | Fetches DCIDs for entities by their geographic coordinates.
111 |
112 | Args:
113 | latitude (str): Latitude of the entity.
114 | longitude (str): Longitude of the entity.
115 | entity_type (Optional[str]): Optional type of the entities to refine results
116 | (e.g., "City", "State", "Country").
117 |
118 | Returns:
119 | ResolveResponse: The response object containing the resolved DCIDs.
120 |
121 | Example:
122 | To find the DCID for "Mountain View" using its latitude and longitude:
123 | ```python
124 | latitude = "37.42"
125 | longitude = "-122.08"
126 | response = client.fetch_dcid_by_coordinates(latitude=latitude, longitude=longitude)
127 | print(response.entities)
128 | ```
129 | Note:
130 | - For ambiguous results, providing an entity type (e.g., "City") can help disambiguate.
131 | - The coordinates should be passed as strings in decimal format (e.g., "37.42", "-122.08").
132 |
133 |
134 | """
135 | expression = _resolve_correspondence_expression(from_type="geoCoordinate",
136 | to_type="dcid",
137 | entity_type=entity_type)
138 | coordinates = f"{latitude}#{longitude}"
139 | return self.fetch(node_ids=coordinates, expression=expression)
140 |
--------------------------------------------------------------------------------
/datacommons_client/tests/endpoints/test_resolve_endpoint.py:
--------------------------------------------------------------------------------
1 | from unittest.mock import MagicMock
2 |
3 | from datacommons_client.endpoints.base import API
4 | from datacommons_client.endpoints.resolve import _resolve_correspondence_expression
5 | from datacommons_client.endpoints.resolve import ResolveEndpoint
6 | from datacommons_client.endpoints.response import ResolveResponse
7 | from datacommons_client.models.resolve import Candidate
8 | from datacommons_client.models.resolve import Entity
9 |
10 |
11 | def test_fetch():
12 | """Tests the fetch method of ResolveEndpoint."""
13 | api_mock = MagicMock(spec=API)
14 | api_mock.post = MagicMock(return_value={})
15 | endpoint = ResolveEndpoint(api=api_mock)
16 |
17 | response = endpoint.fetch(node_ids="Node1", expression="some_expression")
18 |
19 | # Check the response
20 | assert isinstance(response, ResolveResponse)
21 |
22 | # Check the post request
23 | api_mock.post.assert_called_once_with(payload={
24 | "nodes": ["Node1"],
25 | "property": "some_expression",
26 | },
27 | endpoint="resolve",
28 | all_pages=True,
29 | next_token=None)
30 |
31 |
32 | def test_fetch_dcid_by_name():
33 | """Tests the fetch_dcid_by_name method."""
34 | api_mock = MagicMock(spec=API)
35 | api_mock.post = MagicMock(return_value={})
36 | endpoint = ResolveEndpoint(api=api_mock)
37 |
38 | response = endpoint.fetch_dcids_by_name(names=["Entity1"],
39 | entity_type="Place")
40 |
41 | # Check the response
42 | assert isinstance(response, ResolveResponse)
43 |
44 | # Check the post request
45 | api_mock.post.assert_called_once_with(payload={
46 | "nodes": ["Entity1"],
47 | "property": "<-description{typeOf:Place}->dcid"
48 | },
49 | endpoint="resolve",
50 | all_pages=True,
51 | next_token=None)
52 |
53 |
54 | def test_fetch_dcid_by_wikidata_id():
55 | """Tests the fetch_dcid_by_wikidata_id method."""
56 | api_mock = MagicMock(spec=API)
57 | api_mock.post = MagicMock(return_value={})
58 | endpoint = ResolveEndpoint(api=api_mock)
59 |
60 | response = endpoint.fetch_dcids_by_wikidata_id(wikidata_ids="Q12345",
61 | entity_type="Country")
62 |
63 | # Check the response
64 | assert isinstance(response, ResolveResponse)
65 |
66 | # Check the post request
67 | api_mock.post.assert_called_once_with(payload={
68 | "nodes": ["Q12345"],
69 | "property": "<-wikidataId{typeOf:Country}->dcid",
70 | },
71 | endpoint="resolve",
72 | all_pages=True,
73 | next_token=None)
74 |
75 |
76 | def test_fetch_dcids_list_by_wikidata_id():
77 | """Tests the fetch_dcid_by_wikidata_id method."""
78 | api_mock = MagicMock(spec=API)
79 | api_mock.post = MagicMock(return_value={})
80 | endpoint = ResolveEndpoint(api=api_mock)
81 |
82 | response = endpoint.fetch_dcids_by_wikidata_id(
83 | wikidata_ids=["Q12345", "Q695660"])
84 |
85 | # Check the response
86 | assert isinstance(response, ResolveResponse)
87 |
88 | # Check the post request
89 | api_mock.post.assert_called_once_with(payload={
90 | "nodes": ["Q12345", "Q695660"],
91 | "property": "<-wikidataId->dcid",
92 | },
93 | endpoint="resolve",
94 | all_pages=True,
95 | next_token=None)
96 |
97 |
98 | def test_fetch_dcid_by_coordinates():
99 | """Tests the fetch_dcid_by_coordinates method."""
100 | api_mock = MagicMock(spec=API)
101 | api_mock.post = MagicMock(return_value={})
102 | endpoint = ResolveEndpoint(api=api_mock)
103 |
104 | response = endpoint.fetch_dcid_by_coordinates(latitude="37.7749",
105 | longitude="-122.4194",
106 | entity_type="City")
107 |
108 | # Check the response
109 | assert isinstance(response, ResolveResponse)
110 |
111 | # Check the post request
112 | api_mock.post.assert_called_once_with(payload={
113 | "nodes": ["37.7749#-122.4194"],
114 | "property": "<-geoCoordinate{typeOf:City}->dcid",
115 | },
116 | endpoint="resolve",
117 | all_pages=True,
118 | next_token=None)
119 |
120 |
121 | def test_resolve_correspondence_expression():
122 | """Tests the resolve_correspondence_expression function."""
123 | expression = _resolve_correspondence_expression(from_type="description",
124 | to_type="dcid",
125 | entity_type="Place")
126 | assert expression == "<-description{typeOf:Place}->dcid"
127 |
128 | expression_no_entity_type = _resolve_correspondence_expression(
129 | from_type="description", to_type="dcid")
130 | assert expression_no_entity_type == "<-description->dcid"
131 |
132 |
133 | def test_flatten_resolve_response():
134 | """Tests the flatten_resolve_response function."""
135 | # Mock ResolveResponse with multiple entities
136 | mock_data = ResolveResponse(entities=[
137 | Entity(node="Node1", candidates=[Candidate(dcid="Candidate1")]),
138 | Entity(node="Node2",
139 | candidates=[
140 | Candidate(dcid="Candidate2"),
141 | Candidate(dcid="Candidate3")
142 | ]),
143 | Entity(node="Node3", candidates=[]) # No candidates
144 | ])
145 |
146 | # Call the function
147 | result = mock_data.to_flat_dict()
148 |
149 | # Expected output
150 | expected = {
151 | "Node1": "Candidate1", # Single candidate
152 | "Node2": ["Candidate2", "Candidate3"], # Multiple candidates
153 | "Node3": [], # No candidates
154 | }
155 |
156 | # Assertions
157 | assert result == expected
158 |
--------------------------------------------------------------------------------
/datacommons_client/models/observation.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from typing import List, Optional
3 |
4 | from pydantic import Field
5 | from pydantic import field_validator
6 | from pydantic import model_serializer
7 | from pydantic import RootModel
8 |
9 | from datacommons_client.models.base import BaseDCModel
10 | from datacommons_client.models.base import DictLikeRootModel
11 | from datacommons_client.models.base import entityDCID
12 | from datacommons_client.models.base import facetID
13 | from datacommons_client.models.base import ListLikeRootModel
14 | from datacommons_client.models.base import variableDCID
15 | from datacommons_client.utils.error_handling import InvalidObservationSelectError
16 |
17 |
18 | class ObservationDate(str, Enum):
19 | LATEST = "LATEST"
20 | ALL = ""
21 |
22 | @classmethod
23 | def _missing_(cls, value):
24 | if isinstance(value, str):
25 | u = value.strip().upper()
26 | if u == "LATEST":
27 | return cls.LATEST
28 | if u in ("ALL", ""):
29 | return cls.ALL
30 | raise ValueError(f"Invalid date value: '{value}'. Only 'LATEST' or"
31 | f" '' (empty string) are allowed.")
32 |
33 |
34 | class ObservationSelect(str, Enum):
35 | DATE = "date"
36 | VARIABLE = "variable"
37 | ENTITY = "entity"
38 | VALUE = "value"
39 | FACET = "facet"
40 |
41 | @classmethod
42 | def valid_values(cls):
43 | """Returns a list of valid enum values."""
44 | return sorted(cls._value2member_map_.keys())
45 |
46 | @classmethod
47 | def _missing_(cls, value):
48 | """Handle missing enum values by raising a custom error."""
49 | message = f"Invalid `select` Field: '{value}'. Only {', '.join(cls.valid_values())} are allowed."
50 | raise InvalidObservationSelectError(message=message)
51 |
52 |
53 | class ObservationSelectList(RootModel[list[ObservationSelect]]):
54 | """A model to represent a list of ObservationSelect values.
55 |
56 | Attributes:
57 | select (List[ObservationSelect]): A list of ObservationSelect enum values.
58 | """
59 |
60 | root: Optional[list[ObservationSelect | str]] = None
61 |
62 | @field_validator("root", mode="before")
63 | def _validate_select(cls, v):
64 | if v is None:
65 | select = [
66 | ObservationSelect.DATE,
67 | ObservationSelect.VARIABLE,
68 | ObservationSelect.ENTITY,
69 | ObservationSelect.VALUE,
70 | ]
71 | else:
72 | select = v
73 |
74 | select = [ObservationSelect(s).value for s in select]
75 |
76 | required_select = {"variable", "entity"}
77 |
78 | missing_fields = required_select - set(select)
79 | if missing_fields:
80 | raise InvalidObservationSelectError(message=(
81 | f"The 'select' field must include at least the following: {', '.join(required_select)} "
82 | f"(missing: {', '.join(missing_fields)})"))
83 |
84 | return select
85 |
86 | @property
87 | def select(self) -> list[str]:
88 | """Return select values directly as list"""
89 | return self.root or []
90 |
91 |
92 | class Observation(BaseDCModel):
93 | """Represents an observation with a date and value.
94 |
95 | Attributes:
96 | date (str): The date of the observation.
97 | value (float): Optional. The value of the observation.
98 | """
99 |
100 | date: Optional[str] = None
101 | value: Optional[float] = None
102 |
103 |
104 | class OrderedFacet(BaseDCModel):
105 | """Represents ordered facets of observations.
106 |
107 | Attributes:
108 | earliestDate (str): The earliest date in the observations.
109 | facetId (str): The identifier for the facet.
110 | latestDate (str): The latest date in the observations.
111 | obsCount (int): The total number of observations.
112 | observations (List[Observation]): A list of observations associated with the facet.
113 | """
114 |
115 | earliestDate: Optional[str] = None
116 | facetId: Optional[str] = None
117 | latestDate: Optional[str] = None
118 | obsCount: Optional[int] = None
119 | observations: list[Observation] = Field(default_factory=list)
120 |
121 |
122 | class OrderedFacets(BaseDCModel):
123 | """Represents a list of ordered facets.
124 | """
125 | orderedFacets: list[OrderedFacet] = Field(default_factory=list)
126 |
127 |
128 | class Variable(BaseDCModel):
129 | """Represents a variable with data grouped by entity.
130 |
131 | Attributes:
132 | byEntity (dict[entityDCID, OrderedFacets]): A dictionary mapping
133 | entities to their ordered facets.
134 | """
135 |
136 | byEntity: dict[entityDCID, OrderedFacets] = Field(default_factory=dict)
137 |
138 |
139 | class Facet(BaseDCModel):
140 | """Represents metadata for a facet.
141 |
142 | Attributes:
143 | importName (str): The name of the data import.
144 | measurementMethod (str): The method used to measure the data.
145 | observationPeriod (str): The period over which the observations were made.
146 | provenanceUrl (str): The URL of the data's provenance.
147 | unit (str): The unit of the observations.
148 | """
149 |
150 | importName: Optional[str] = None
151 | measurementMethod: Optional[str] = None
152 | observationPeriod: Optional[str] = None
153 | provenanceUrl: Optional[str] = None
154 | unit: Optional[str] = None
155 |
156 |
157 | class ByVariable(BaseDCModel, DictLikeRootModel[dict[variableDCID, Variable]]):
158 | """A root model whose value is a dict mapping variableDCID to Variable."""
159 |
160 |
161 | class VariableByEntity(BaseDCModel,
162 | DictLikeRootModel[dict[variableDCID,
163 | dict[entityDCID,
164 | OrderedFacets]]]):
165 | """A root model whose value is a dict mapping entityDCID to Variable."""
166 |
167 |
168 | class ObservationRecord(Observation, Facet):
169 | """Represents a record of observations for a specific variable and entity.
170 |
171 | Attributes:
172 | date (str): The date of the observation.
173 | value (float): The value of the observation.
174 | """
175 |
176 | entity: Optional[entityDCID] = None
177 | variable: Optional[variableDCID] = None
178 | facetId: Optional[facetID] = None
179 |
180 | _order = [
181 | "date", "entity", "variable", "facetId", "importName",
182 | "measurementMethod", "observationPeriod", "provenanceUrl", "unit", "value"
183 | ]
184 |
185 | @model_serializer(mode="wrap")
186 | def _reorder(self, helper):
187 | """Reorders the fields for serialization."""
188 | data = helper(self)
189 | ordered = {}
190 |
191 | # Ensure the order of fields matches the specified order
192 | for key in self._order:
193 | if key in data:
194 | ordered[key] = data.pop(key)
195 |
196 | # Add any remaining fields that were not in the order list
197 | ordered.update(data)
198 |
199 | # Ensure the 'value' field is always at the end
200 | if "value" in ordered:
201 | ordered["value"] = ordered.pop("value")
202 |
203 | return ordered
204 |
205 |
206 | class ObservationRecords(BaseDCModel,
207 | ListLikeRootModel[list[ObservationRecord]]):
208 | """A root model whose value is a list of ObservationRecord."""
209 |
--------------------------------------------------------------------------------
/datacommons_client/endpoints/base.py:
--------------------------------------------------------------------------------
1 | import re
2 | from typing import Any, Dict, Optional
3 |
4 | from datacommons_client.utils.request_handling import check_instance_is_valid
5 | from datacommons_client.utils.request_handling import post_request
6 | from datacommons_client.utils.request_handling import resolve_instance_url
7 |
8 |
9 | class API:
10 | """Represents a configured API interface to the Data Commons API.
11 |
12 | This class handles environment setup, resolving the base URL, building headers,
13 | or optionally using a fully qualified URL directly. It can be used standalone
14 | to interact with the API or in combination with Endpoint classes.
15 | """
16 |
17 | def __init__(
18 | self,
19 | api_key: Optional[str] = None,
20 | dc_instance: Optional[str] = None,
21 | url: Optional[str] = None,
22 | surface_header_value: Optional[str] = None,
23 | ):
24 | """
25 | Initializes the API instance.
26 |
27 | Args:
28 | api_key: The API key for authentication. Defaults to None.
29 | dc_instance: The Data Commons instance domain. Ignored if `url` is provided.
30 | Defaults to 'datacommons.org' if both `url` and `dc_instance` are None.
31 | url: A fully qualified URL for the base API. This may be useful if more granular control
32 | of the API is required (for local development, for example). If provided, dc_instance`
33 | should not be provided.
34 | surface_header_value: indicates which DC surface (MCP server, etc.) makes a call to the python library.
35 | If the call originated internally, this is null and we pass in "clientlib-python" as the surface header
36 |
37 | Raises:
38 | ValueError: If both `dc_instance` and `url` are provided.
39 | """
40 | if dc_instance and url:
41 | raise ValueError("Cannot provide both `dc_instance` and `url`.")
42 |
43 | if not dc_instance and not url:
44 | dc_instance = "datacommons.org"
45 |
46 | if url is not None:
47 | # Use the given URL directly (strip trailing slash)
48 | self.base_url = check_instance_is_valid(url.rstrip("/"), api_key=api_key)
49 | else:
50 | # Resolve from dc_instance
51 | self.base_url = resolve_instance_url(dc_instance)
52 |
53 | self.headers = self.build_headers(surface_header_value=surface_header_value,
54 | api_key=api_key)
55 |
56 | def __repr__(self) -> str:
57 | """Returns a readable representation of the API object.
58 |
59 | Indicates the base URL and if it's authenticated.
60 |
61 | Returns:
62 | str: A string representation of the API object.
63 | """
64 | has_auth = " (Authenticated)" if "X-API-Key" in self.headers else ""
65 | return f""
66 |
67 | def post(self,
68 | payload: dict[str, Any],
69 | endpoint: Optional[str] = None,
70 | *,
71 | all_pages: bool = True,
72 | next_token: Optional[str] = None) -> Dict[str, Any]:
73 | """Makes a POST request using the configured API environment.
74 |
75 | If `endpoint` is provided, it will be appended to the base_url. Otherwise,
76 | it will just POST to the base URL.
77 |
78 | Args:
79 | payload: The JSON payload for the POST request.
80 | endpoint: An optional endpoint path to append to the base URL.
81 | all_pages: If True, fetch all pages of the response. If False, fetch only the first page.
82 | Defaults to True. Set to False to only fetch the first page. In that case, a
83 | `next_token` key in the response will indicate if more pages are available.
84 | That token can be used to fetch the next page.
85 |
86 | Returns:
87 | A dictionary containing the merged response data.
88 |
89 | Raises:
90 | ValueError: If the payload is not a valid dictionary.
91 | """
92 | if not isinstance(payload, dict):
93 | raise ValueError("Payload must be a dictionary.")
94 |
95 | url = (self.base_url if endpoint is None else f"{self.base_url}/{endpoint}")
96 |
97 | return post_request(url=url,
98 | payload=payload,
99 | headers=self.headers,
100 | all_pages=all_pages,
101 | next_token=next_token)
102 |
103 | def build_headers(self,
104 | surface_header_value: Optional[str],
105 | api_key: Optional[str] = None) -> dict[str, str]:
106 | """Build request headers for API requests.
107 |
108 | Includes JSON content type. If an API key is provided, add it as `X-API-Key`.
109 |
110 | Args:
111 | self: the API, which includes API key and surface header if available
112 |
113 | Returns:
114 | A dictionary of headers for the request.
115 | """
116 | headers = {
117 | "Content-Type": "application/json",
118 | "x-surface": "clientlib-python"
119 | }
120 | if api_key:
121 | headers["X-API-Key"] = api_key
122 |
123 | if surface_header_value:
124 | headers["x-surface"] = surface_header_value
125 |
126 | return headers
127 |
128 |
129 | class Endpoint:
130 | """Represents a specific endpoint within the Data Commons API.
131 |
132 | This class leverages an API instance to make requests. It does not
133 | handle instance resolution or headers directly; that is delegated to the API instance.
134 |
135 | Attributes:
136 | endpoint (str): The endpoint path (e.g., 'node').
137 | api (API): The API instance providing configuration and the `post` method.
138 | """
139 |
140 | def __init__(self, endpoint: str, api: API):
141 | """
142 | Initializes the Endpoint instance.
143 |
144 | Args:
145 | endpoint: The endpoint path (e.g., 'node').
146 | api: An API instance that provides the environment configuration.
147 | """
148 | self.endpoint = endpoint
149 | self.api = api
150 |
151 | def __repr__(self) -> str:
152 | """Returns a readable representation of the Endpoint object.
153 |
154 | Shows the endpoint and underlying API configuration.
155 |
156 | Returns:
157 | str: A string representation of the Endpoint object.
158 | """
159 | return f"<{self.endpoint.title()} Endpoint using {repr(self.api)}>"
160 |
161 | def post(self,
162 | payload: dict[str, Any],
163 | all_pages: bool = True,
164 | next_token: Optional[str] = None) -> Dict[str, Any]:
165 | """Makes a POST request to the specified endpoint using the API instance.
166 |
167 | Args:
168 | payload: The JSON payload for the POST request.
169 | all_pages: If True, fetch all pages of the response. If False, fetch only the first page.
170 | Defaults to True. Set to False to only fetch the first page. In that case, a
171 | `next_token` key in the response will indicate if more pages are available.
172 | That token can be used to fetch the next page.
173 | next_token: Optionally, the token to fetch the next page of results. Defaults to None.
174 |
175 | Returns:
176 | A dictionary with the merged API response data.
177 |
178 | Raises:
179 | ValueError: If the payload is not a valid dictionary.
180 | """
181 | return self.api.post(payload=payload,
182 | endpoint=self.endpoint,
183 | all_pages=all_pages,
184 | next_token=next_token)
185 |
--------------------------------------------------------------------------------
/datacommons_client/utils/data_processing.py:
--------------------------------------------------------------------------------
1 | from dataclasses import asdict
2 | import json
3 | from typing import Any, Dict, List
4 |
5 | from datacommons_client.models.base import ArcLabel
6 | from datacommons_client.models.base import facetID
7 | from datacommons_client.models.base import NodeDCID
8 | from datacommons_client.models.base import Property
9 | from datacommons_client.models.node import Arcs
10 | from datacommons_client.models.node import FlattenedArcsMapping
11 | from datacommons_client.models.node import FlattenedPropertiesMapping
12 | from datacommons_client.models.node import Name
13 | from datacommons_client.models.node import Node
14 | from datacommons_client.models.node import NodeGroup
15 | from datacommons_client.models.node import Properties
16 | from datacommons_client.models.observation import Facet
17 | from datacommons_client.models.observation import ObservationRecord
18 | from datacommons_client.models.observation import ObservationRecords
19 | from datacommons_client.models.observation import OrderedFacets
20 | from datacommons_client.models.observation import VariableByEntity
21 |
22 |
23 | def unpack_arcs(arcs: Dict[ArcLabel, NodeGroup]) -> dict[Property, list[Node]]:
24 | """Simplify the 'arcs' structure."""
25 | # Return dictionary of property nodes
26 | return {
27 | prop: getattr(arc_data, "nodes", []) for prop, arc_data in arcs.items()
28 | }
29 |
30 |
31 | def flatten_properties(
32 | data: Dict[NodeDCID, Arcs | Properties]
33 | ) -> FlattenedPropertiesMapping | FlattenedArcsMapping:
34 | """
35 | Flatten the properties of a node response.
36 |
37 | Processes a dictionary of node responses, extracting and
38 | simplifying their properties and arcs into a flattened dictionary.
39 |
40 | Args:
41 | data (Dict[NodeDCID, Arcs | Properties]):
42 | The input dictionary containing node responses. Each node maps to
43 | a dictionary with potential "arcs" and "properties" keys.
44 |
45 | Returns:
46 | FlattenedPropertiesMapping | FlattenedArcsMapping:
47 | A flattened dictionary where keys are node identifiers, and values
48 | are the simplified properties or nodes.
49 | """
50 | if not data:
51 | return FlattenedPropertiesMapping.model_validate({})
52 |
53 | first_node = next(iter(data.values()))
54 | is_properties = isinstance(first_node, Properties)
55 | mapping_cls = FlattenedPropertiesMapping if is_properties else FlattenedArcsMapping
56 |
57 | # Store simplified properties
58 | items = {}
59 | for node_id, node_data in data.items():
60 | if is_properties:
61 | props = getattr(node_data, "properties", None)
62 | if props:
63 | items[node_id] = props
64 | else:
65 | arcs = getattr(node_data, "arcs", None)
66 | if arcs:
67 | items[node_id] = unpack_arcs(arcs)
68 |
69 | return mapping_cls.model_validate(items)
70 |
71 |
72 | def extract_observations(
73 | variable: str, entity: str, entity_data: OrderedFacets,
74 | facet_metadata: dict[facetID, Facet]) -> list[ObservationRecord]:
75 | """
76 | Extracts observations for a given variable, entity, and its data.
77 |
78 | Args:
79 | variable (str): The variable name.
80 | entity (str): The entity name.
81 | entity_data (OrderedFacets): Data for the entity, including ordered facets.
82 | facet_metadata (dict[facetID, Facet]): Metadata for facets.
83 |
84 | Returns:
85 | list[dict]: A list of observation records.
86 | """
87 | observations = []
88 | for facet in entity_data.orderedFacets:
89 | for observation in facet.observations:
90 | observations.append(
91 | ObservationRecord.model_validate({
92 | "date": observation.date,
93 | "entity": entity,
94 | "variable": variable,
95 | "value": observation.value,
96 | "facetId": facet.facetId,
97 | **facet_metadata.get(facet.facetId, Facet()).to_dict(),
98 | }))
99 |
100 | return observations
101 |
102 |
103 | def observations_as_records(data: VariableByEntity,
104 | facets: dict[facetID, Facet]) -> ObservationRecords:
105 | """
106 | Converts observation data into a list of records.
107 |
108 | Args:
109 | data (VariableByEntity): A mapping of variables to entities and their data.
110 | facets (dict): Facet metadata for the observations.
111 |
112 | Returns:
113 | ObservationRecords: A flattened list of observation records.
114 | """
115 |
116 | records = []
117 | for variable, entities in data.items():
118 | for entity, entity_data in entities.items():
119 | for record in extract_observations(
120 | variable=variable,
121 | entity=entity,
122 | entity_data=entity_data,
123 | facet_metadata=facets,
124 | ):
125 | records.append(record)
126 |
127 | return ObservationRecords.model_validate(records)
128 |
129 |
130 | def group_variables_by_entity(
131 | data: dict[str, list[str]]) -> dict[str, list[str]]:
132 | """Groups variables by the entities they are associated with.
133 | Takes a dictionary mapping statistical variable DCIDs to a list of entity DCIDs,
134 | and returns a new dictionary mapping each entity DCID to a list of statistical
135 | variables available for that entity.
136 | Args:
137 | data: A dictionary where each key is a variable DCID and the value is a list
138 | of entity DCIDs that have observations for that variable.
139 | Returns:
140 | A dictionary where each key is an entity DCID and the value is a list of
141 | variable DCIDs available for that entity.
142 | """
143 | result: dict[str, list[str]] = {}
144 | for variable, entities in data.items():
145 | for entity in entities:
146 | result.setdefault(entity, []).append(variable)
147 | return result
148 |
149 |
150 | class SerializableMixin:
151 | """Provides serialization methods for the Response dataclasses."""
152 |
153 | def to_dict(self, exclude_none: bool = True) -> Dict[str, Any]:
154 | """Converts the instance to a dictionary.
155 |
156 | Args:
157 | exclude_none: If True, only include non-empty values in the response.
158 |
159 | Returns:
160 | Dict[str, Any]: The dictionary representation of the instance.
161 | """
162 |
163 | def _remove_none(data: Any) -> Any:
164 | """Recursively removes None or empty values from a dictionary or list."""
165 | if isinstance(data, dict):
166 | return {k: _remove_none(v) for k, v in data.items() if v is not None}
167 | elif isinstance(data, list):
168 | return [_remove_none(item) for item in data]
169 | return data
170 |
171 | result = asdict(self)
172 | return _remove_none(result) if exclude_none else result
173 |
174 | def to_json(self, exclude_none: bool = True) -> str:
175 | """Converts the instance to a JSON string.
176 |
177 | Args:
178 | exclude_none: If True, only include non-empty values in the response.
179 |
180 | Returns:
181 | str: The JSON string representation of the instance.
182 | """
183 | return json.dumps(self.to_dict(exclude_none=exclude_none), indent=2)
184 |
185 |
186 | def flatten_names_dictionary(names_dict: dict[str, Name]) -> dict[str, str]:
187 | """
188 | Flattens a dictionary which contains Name objects into a flattened dictionary
189 | with DCIDs as keys and names as values.
190 |
191 | Args:
192 | names_dict (dict[str, Name]): The input dictionary to flatten.
193 |
194 | Returns:
195 | dict[str, str]: A flattened dictionary with DCIDs as keys and names as values.
196 | """
197 |
198 | return {dcid: name.to_dict()['value'] for dcid, name in names_dict.items()}
199 |
--------------------------------------------------------------------------------
/datacommons_client/endpoints/observation.py:
--------------------------------------------------------------------------------
1 | from typing import Optional
2 |
3 | from datacommons_client.endpoints.base import API
4 | from datacommons_client.endpoints.base import Endpoint
5 | from datacommons_client.endpoints.payloads import ObservationRequestPayload
6 | from datacommons_client.endpoints.response import ObservationResponse
7 | from datacommons_client.models.observation import ObservationDate
8 | from datacommons_client.models.observation import ObservationSelect
9 | from datacommons_client.utils.data_processing import group_variables_by_entity
10 |
11 |
12 | class ObservationEndpoint(Endpoint):
13 | """
14 | A class to interact with the observation API endpoint.
15 |
16 | Args:
17 | api (API): The API instance providing the environment configuration
18 | (base URL, headers, authentication) to be used for requests.
19 | """
20 |
21 | def __init__(self, api: API):
22 | """Initializes the ObservationEndpoint instance."""
23 | super().__init__(endpoint="observation", api=api)
24 |
25 | def fetch(
26 | self,
27 | variable_dcids: str | list[str],
28 | date: ObservationDate | str = ObservationDate.LATEST,
29 | select: Optional[list[ObservationSelect | str]] = None,
30 | entity_dcids: Optional[str | list[str]] = None,
31 | entity_expression: Optional[str] = None,
32 | filter_facet_domains: Optional[str | list[str]] = None,
33 | filter_facet_ids: Optional[str | list[str]] = None
34 | ) -> ObservationResponse:
35 | """
36 | Fetches data from the observation endpoint.
37 |
38 | Args:
39 | variable_dcids (str | list[str]): One or more variable IDs for the data.
40 | date (str | ObservationDate): The date for which data is being requested.
41 | Defaults to the latest observation.
42 | select (list[ObservationSelect]): Fields to include in the response.
43 | Defaults to ["date", "variable", "entity", "value"].
44 | entity_dcids (Optional[str | list[str]]): One or more entity IDs to filter the data.
45 | entity_expression (Optional[str]): A string expression to filter entities.
46 | filter_facet_domains (Optional[str | list[str]]): One or more domain names to filter the data.
47 | filter_facet_ids (Optional[str | list[str]]): One or more facet IDs to filter the data.
48 |
49 | Returns:
50 | ObservationResponse: The response object containing observations for the specified query.
51 | """
52 | # Construct the payload
53 | payload = ObservationRequestPayload(
54 | date=date,
55 | variable_dcids=variable_dcids,
56 | select=select,
57 | entity_dcids=entity_dcids,
58 | entity_expression=entity_expression,
59 | filter_facet_domains=filter_facet_domains,
60 | filter_facet_ids=filter_facet_ids,
61 | ).to_dict()
62 |
63 | response = self.post(payload=payload)
64 |
65 | # Send the request
66 | return ObservationResponse.model_validate(response)
67 |
68 | def fetch_observations_by_entity_type(
69 | self,
70 | date: ObservationDate | str,
71 | parent_entity: str,
72 | entity_type: str,
73 | variable_dcids: str | list[str],
74 | *,
75 | select: Optional[list[ObservationSelect | str]] = None,
76 | filter_facet_domains: Optional[str | list[str]] = None,
77 | filter_facet_ids: Optional[str | list[str]] = None
78 | ) -> ObservationResponse:
79 | """
80 | Fetches all observations for a given entity type.
81 |
82 | Args:
83 | date (ObservationDate | str): The date option for the observations.
84 | Use 'all' for all dates, 'latest' for the most recent data,
85 | or provide a date as a string (e.g., "2024").
86 | parent_entity (str): The parent entity under which the target entities fall.
87 | For example, "africa" for African countries, or "Earth" for all countries.
88 | entity_type (str): The type of entities for which to fetch observations.
89 | For example, "Country" or "Region".
90 | variable_dcids (str | list[str]): The variable(s) to fetch observations for.
91 | This can be a single variable ID or a list of IDs.
92 | select (Optional[list[ObservationSelect | str]]): Fields to include in the response.
93 | If not provided, defaults to ["date", "variable", "entity", "value"].
94 | filter_facet_domains: Optional[str | list[str]: One or more domain names to filter the data.
95 | filter_facet_ids: Optional[str | list[str]: One or more facet IDs to filter the data.
96 |
97 | Returns:
98 | ObservationResponse: The response object containing observations for the specified entity type.
99 |
100 | Example:
101 | To fetch all observations for African countries for a specific variable:
102 |
103 | ```python
104 | api = API()
105 | ObservationEndpoint(api).fetch_observations_by_entity_type(
106 | date="all",
107 | parent_entity="africa",
108 | entity_type="Country",
109 | variable_dcids="sdg/SI_POV_DAY1"
110 | )
111 | ```
112 | """
113 |
114 | return self.fetch(
115 | variable_dcids=variable_dcids,
116 | date=date,
117 | select=[s for s in ObservationSelect] if not select else select,
118 | entity_expression=
119 | f"{parent_entity}<-containedInPlace+{{typeOf:{entity_type}}}",
120 | filter_facet_domains=filter_facet_domains,
121 | filter_facet_ids=filter_facet_ids)
122 |
123 | def fetch_observations_by_entity_dcid(
124 | self,
125 | date: ObservationDate | str,
126 | entity_dcids: str | list[str],
127 | variable_dcids: str | list[str],
128 | *,
129 | select: Optional[list[ObservationSelect | str]] = None,
130 | filter_facet_domains: Optional[str | list[str]] = None,
131 | filter_facet_ids: Optional[str | list[str]] = None
132 | ) -> ObservationResponse:
133 | """
134 | Fetches all observations for a given entity type.
135 |
136 | Args:
137 | date (ObservationDate | str): The date option for the observations.
138 | Use 'all' for all dates, 'latest' for the most recent data,
139 | or provide a date as a string (e.g., "2024").
140 | entity_dcids (str | list[str]): One or more entity IDs to filter the data.
141 | variable_dcids (str | list[str]): The variable(s) to fetch observations for.
142 | This can be a single variable ID or a list of IDs.
143 | select (Optional[list[ObservationSelect | str]]): Fields to include in the response.
144 | If not provided, defaults to ["date", "variable", "entity", "value"].
145 | filter_facet_domains: Optional[str | list[str]: One or more domain names to filter the data.
146 | filter_facet_ids: Optional[str | list[str]: One or more facet IDs to filter the data.
147 |
148 | Returns:
149 | ObservationResponse: The response object containing observations for the specified entity type.
150 |
151 | Example:
152 | To fetch all observations for Nigeria for a specific variable:
153 |
154 | ```python
155 | api = API()
156 | ObservationEndpoint(api).fetch_observations_by_entity_dcid(
157 | date="all",
158 | entity_dcids="country/NGA",
159 | variable_dcids="sdg/SI_POV_DAY1"
160 | )
161 | ```
162 | """
163 |
164 | return self.fetch(
165 | variable_dcids=variable_dcids,
166 | date=date,
167 | select=[s for s in ObservationSelect] if not select else select,
168 | entity_dcids=entity_dcids,
169 | filter_facet_domains=filter_facet_domains,
170 | filter_facet_ids=filter_facet_ids)
171 |
172 | def fetch_available_statistical_variables(
173 | self,
174 | entity_dcids: str | list[str],
175 | ) -> dict[str, list[str]]:
176 | """
177 | Fetches available statistical variables (which have observations) for given entities.
178 | Args:
179 | entity_dcids (str | list[str]): One or more entity DCIDs(s) to fetch variables for.
180 | Returns:
181 | dict[str, list[str]]: A dictionary mapping entity DCIDs to their available statistical variables.
182 | """
183 |
184 | # Fetch observations for the given entity DCIDs. If variable is empty list
185 | # all available variables are retrieved.
186 | data = self.fetch(
187 | entity_dcids=entity_dcids,
188 | select=[ObservationSelect.VARIABLE, ObservationSelect.ENTITY],
189 | variable_dcids=[]).get_data_by_entity()
190 |
191 | return group_variables_by_entity(data=data)
192 |
--------------------------------------------------------------------------------
/datacommons/test/node_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import unittest
16 | from unittest.mock import patch
17 |
18 | import datacommons
19 |
20 |
21 | class TestProperties(unittest.TestCase):
22 |
23 | @patch("datacommons.node._post")
24 | def test_with_data(self, _post):
25 |
26 | def side_effect(path, data):
27 | if path == "/v2/node" and data == {
28 | "nodes": ["City", "Count_Person", "foo"],
29 | "property": "->"
30 | }:
31 | return {
32 | "data": {
33 | "City": {
34 | "properties": [
35 | "name", "provenance", "subClassOf", "typeOf"
36 | ]
37 | },
38 | "Count_Person": {
39 | "properties": [
40 | "description", "measuredProperty", "memberOf", "name",
41 | "populationType", "provenance", "statType", "typeOf"
42 | ]
43 | },
44 | "foo": {}
45 | }
46 | }
47 |
48 | _post.side_effect = side_effect
49 | response = datacommons.properties(["City", "Count_Person", "foo"])
50 | assert response == {
51 | "City": ["name", "provenance", "subClassOf", "typeOf"],
52 | "Count_Person": [
53 | "description", "measuredProperty", "memberOf", "name",
54 | "populationType", "provenance", "statType", "typeOf"
55 | ],
56 | "foo": []
57 | }
58 |
59 | @patch("datacommons.node._post")
60 | def test_with_direction(self, _post):
61 |
62 | def side_effect(path, data):
63 | if path == "/v2/node" and data == {
64 | "nodes": ["City", "Count_Person", "foo"],
65 | "property": "<-"
66 | }:
67 | return {
68 | "data": {
69 | "City": {
70 | "properties": [
71 | "placeType", "rangeIncludes", "schoolLocationType",
72 | "typeOf"
73 | ]
74 | },
75 | "Count_Person": {
76 | "properties": [
77 | "measurementDenominator", "outputProperty",
78 | "relevantVariable"
79 | ]
80 | },
81 | "foo": {}
82 | }
83 | }
84 |
85 | _post.side_effect = side_effect
86 | response = datacommons.properties(["City", "Count_Person", "foo"],
87 | is_out=False)
88 | assert response == {
89 | "City": ["placeType", "rangeIncludes", "schoolLocationType", "typeOf"],
90 | "Count_Person": [
91 | "measurementDenominator", "outputProperty", "relevantVariable"
92 | ],
93 | "foo": []
94 | }
95 |
96 |
97 | class TestPropertyValues(unittest.TestCase):
98 |
99 | @patch("datacommons.node._post")
100 | def test_with_data(self, _post):
101 |
102 | def side_effect(path, data):
103 | print(path)
104 | if path == "/v1/bulk/property/values/out" and data == {
105 | "nodes": ["geoId/06"],
106 | "property": "name",
107 | }:
108 | return {
109 | "data": [{
110 | "node":
111 | "geoId/06",
112 | "values": [{
113 | "provenanceId": "dc/5n63hr1",
114 | "value": "California"
115 | }]
116 | }]
117 | }
118 |
119 | _post.side_effect = side_effect
120 | response = datacommons.property_values(["geoId/06"], "name")
121 | assert response == {"geoId/06": ["California"]}
122 |
123 | @patch("datacommons.node._post")
124 | def test_multiple_values(self, _post):
125 |
126 | def side_effect(path, data):
127 | print(path)
128 | if path == "/v1/bulk/property/values/out" and data == {
129 | "nodes": ["geoId/06"],
130 | "property": "geoOverlaps",
131 | }:
132 | return {
133 | "data": [{
134 | "node":
135 | "geoId/06",
136 | "values": [{
137 | "provenanceId": "dc/5n63hr1",
138 | "value": "geoId/05"
139 | }, {
140 | "provenanceId": "dc/5n63hr1",
141 | "value": "geoId/07"
142 | }]
143 | }]
144 | }
145 |
146 | _post.side_effect = side_effect
147 | response = datacommons.property_values(["geoId/06"], "geoOverlaps")
148 | assert response == {"geoId/06": ["geoId/05", "geoId/07"]}
149 |
150 |
151 | class TestTriples(unittest.TestCase):
152 |
153 | @patch("datacommons.node._post")
154 | def test_with_data(self, _post):
155 |
156 | def side_effect(path, data):
157 | print(path)
158 | if path == "/v1/bulk/triples/out" and data == {
159 | "nodes": ["Class"],
160 | }:
161 | return {
162 | "data": [{
163 | "node": "Class",
164 | "triples": {
165 | "typeOf": {
166 | "nodes": [{
167 | "name": "Class",
168 | "types": ["Class"],
169 | "dcid": "Class",
170 | "provenanceId": "dc/5l5zxr1"
171 | }, {
172 | "name": "Class",
173 | "types": ["Class"],
174 | "dcid": "Class",
175 | "provenanceId": "dc/5l5zxr1"
176 | }]
177 | },
178 | "isPartOf": {
179 | "nodes": [{
180 | "provenanceId": "dc/5l5zxr1",
181 | "value": "http://meta.schema.org"
182 | }]
183 | },
184 | "name": {
185 | "nodes": [{
186 | "provenanceId": "dc/5l5zxr1",
187 | "value": "Class"
188 | }]
189 | },
190 | "provenance": {
191 | "nodes": [{
192 | "name": "BaseSchema",
193 | "types": ["Provenance"],
194 | "dcid": "dc/5l5zxr1",
195 | "provenanceId": "dc/5l5zxr1"
196 | }]
197 | },
198 | "sameAs": {
199 | "nodes": [{
200 | "provenanceId": "dc/5l5zxr1",
201 | "value": "http://www.w3.org/2000/01/rdf-schema"
202 | }]
203 | },
204 | "subClassOf": {
205 | "nodes": [{
206 | "name": "Intangible",
207 | "types": ["Class"],
208 | "dcid": "Intangible",
209 | "provenanceId": "dc/5l5zxr1"
210 | }]
211 | }
212 | }
213 | }]
214 | }
215 |
216 | _post.side_effect = side_effect
217 | response = datacommons.triples(["Class"])
218 | assert response == {
219 | "Class": {
220 | 'isPartOf': [{
221 | 'provenanceId': 'dc/5l5zxr1',
222 | 'value': 'http://meta.schema.org'
223 | }],
224 | 'name': [{
225 | 'provenanceId': 'dc/5l5zxr1',
226 | 'value': 'Class'
227 | }],
228 | 'provenance': [{
229 | 'dcid': 'dc/5l5zxr1',
230 | 'name': 'BaseSchema',
231 | 'provenanceId': 'dc/5l5zxr1',
232 | 'types': ['Provenance']
233 | }],
234 | 'sameAs': [{
235 | 'provenanceId': 'dc/5l5zxr1',
236 | 'value': 'http://www.w3.org/2000/01/rdf-schema'
237 | }],
238 | 'subClassOf': [{
239 | 'dcid': 'Intangible',
240 | 'name': 'Intangible',
241 | 'provenanceId': 'dc/5l5zxr1',
242 | 'types': ['Class']
243 | }],
244 | 'typeOf': [{
245 | 'dcid': 'Class',
246 | 'name': 'Class',
247 | 'provenanceId': 'dc/5l5zxr1',
248 | 'types': ['Class']
249 | }, {
250 | 'dcid': 'Class',
251 | 'name': 'Class',
252 | 'provenanceId': 'dc/5l5zxr1',
253 | 'types': ['Class']
254 | }]
255 | },
256 | }
257 |
--------------------------------------------------------------------------------
/datacommons/core.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API Core.
15 |
16 | Provides primitive operations for working with collections of nodes. For a
17 | collection of nodes identified by their dcids, this submodule implements the
18 | following:
19 |
20 | - Getting all property labels
21 | - Getting all property values
22 | - Getting all triples
23 | """
24 |
25 | from __future__ import absolute_import
26 | from __future__ import division
27 | from __future__ import print_function
28 |
29 | from collections import defaultdict
30 |
31 | import datacommons.utils as utils
32 |
33 | # ----------------------------- WRAPPER FUNCTIONS -----------------------------
34 |
35 |
36 | def get_property_labels(dcids, out=True):
37 | """ Returns the labels of properties defined for the given :code:`dcids`.
38 |
39 | Args:
40 | dcids (:obj:`iterable` of :obj:`str`): A list of nodes identified by their
41 | dcids.
42 | out (:obj:`bool`, optional): Whether or not the property points away from
43 | the given list of nodes.
44 |
45 | Returns:
46 | A :obj:`dict` mapping dcids to lists of property labels. If `out` is `True`,
47 | then property labels correspond to edges directed away from given nodes.
48 | Otherwise, they correspond to edges directed towards the given nodes.
49 |
50 | Raises:
51 | ValueError: If the payload returned by the Data Commons REST API is
52 | malformed.
53 |
54 | Examples:
55 | To get all outgoing property labels for
56 | `California `_ and
57 | `Colorado `_, we can write
58 | the following.
59 |
60 | >>> get_property_labels(['geoId/06', 'geoId/08'])
61 | {
62 | "geoId/06": [
63 | "containedInPlace",
64 | "geoId",
65 | "kmlCoordinates",
66 | "name",
67 | "provenance",
68 | "typeOf"
69 | ],
70 | "geoId/08",: [
71 | "containedInPlace",
72 | "geoId",
73 | "kmlCoordinates",
74 | "name",
75 | "provenance",
76 | "typeOf"
77 | ]
78 | }
79 |
80 | We can also get incoming property labels by setting `out=False`.
81 |
82 | >>> get_property_labels(['geoId/06', 'geoId/08'], out=False)
83 | {
84 | "geoId/06": [
85 | "addressRegion",
86 | "containedInPlace",
87 | "location",
88 | "overlapsWith"
89 | ],
90 | "geoId/08",: [
91 | "addressRegion",
92 | "containedInPlace",
93 | "location",
94 | "overlapsWith"
95 | ]
96 | }
97 | """
98 | # Generate the GetProperty query and send the request
99 | dcids = filter(lambda v: v == v, dcids) # Filter out NaN values
100 | dcids = list(dcids)
101 | url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_labels']
102 | payload = utils._send_request(url, req_json={'dcids': dcids})
103 |
104 | # Return the results based on the orientation
105 | results = {}
106 | for dcid in dcids:
107 | if out:
108 | results[dcid] = payload[dcid]['outLabels']
109 | else:
110 | results[dcid] = payload[dcid]['inLabels']
111 | return results
112 |
113 |
114 | def get_property_values(dcids,
115 | prop,
116 | out=True,
117 | value_type=None,
118 | limit=utils._MAX_LIMIT):
119 | """ Returns property values of given :code:`dcids` along the given property.
120 |
121 | Args:
122 | dcids (:obj:`iterable` of :obj:`str`): dcids to get property values for.
123 | prop (:obj:`str`): The property to get property values for.
124 | out (:obj:`bool`, optional): A flag that indicates the property is directed
125 | away from the given nodes when set to true.
126 | value_type (:obj:`str`, optional): A type to filter returned property values
127 | by.
128 | limit (:obj:`int`, optional): The maximum number of property values returned
129 | aggregated over all given nodes.
130 |
131 | Returns:
132 | Returned property values are formatted as a :obj:`dict` from a given dcid
133 | to a list of its property values.
134 |
135 | Raises:
136 | ValueError: If the payload returned by the Data Commons REST API is
137 | malformed.
138 |
139 | Examples:
140 | We would like to get the `name` of a list of states specified by their dcid:
141 | `geoId/06 `_,
142 | `geoId/21 `_, and
143 | `geoId/24 `_
144 |
145 | First, let's try specifying the :code:`dcids` as a :obj:`list` of
146 | :obj:`str`.
147 |
148 | >>> get_property_values(["geoId/06", "geoId/21", "geoId/24"], "name")
149 | {
150 | "geoId/06": ["California"],
151 | "geoId/21": ["Kentucky"],
152 | "geoId/24": ["Maryland"],
153 | }
154 | """
155 | # Convert the dcids field and format the request to GetPropertyValue
156 | dcids = filter(lambda v: v == v, dcids) # Filter out NaN values
157 | dcids = list(dcids)
158 | if out:
159 | direction = 'out'
160 | else:
161 | direction = 'in'
162 |
163 | req_json = {
164 | 'dcids': dcids,
165 | 'property': prop,
166 | 'limit': limit,
167 | 'direction': direction
168 | }
169 | if value_type:
170 | req_json['value_type'] = value_type
171 |
172 | # Send the request
173 | url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_values']
174 | payload = utils._send_request(url, req_json=req_json)
175 |
176 | # Create the result format for when dcids is provided as a list.
177 | unique_results = defaultdict(set)
178 | for dcid in dcids:
179 | # Get the list of nodes based on the direction given.
180 | nodes = []
181 | if out:
182 | if dcid in payload and 'out' in payload[dcid]:
183 | nodes = payload[dcid]['out']
184 | else:
185 | if dcid in payload and 'in' in payload[dcid]:
186 | nodes = payload[dcid]['in']
187 |
188 | # Add nodes to unique_results if it is not empty
189 | for node in nodes:
190 | if 'dcid' in node:
191 | unique_results[dcid].add(node['dcid'])
192 | elif 'value' in node:
193 | unique_results[dcid].add(node['value'])
194 |
195 | # Make sure each dcid is in the results dict, and convert all sets to lists.
196 | results = {dcid: sorted(list(unique_results[dcid])) for dcid in dcids}
197 |
198 | return results
199 |
200 |
201 | def get_triples(dcids, limit=utils._MAX_LIMIT):
202 | """ Returns all triples associated with the given :code:`dcids`.
203 |
204 | A knowledge graph can be described as a collection of `triples` which are
205 | 3-tuples that take the form `(s, p, o)`. Here `s` and `o` are nodes in the
206 | graph called the *subject* and *object* respectively while `p` is the property
207 | label of a directed edge from `s` to `o` (sometimes also called the
208 | *predicate*).
209 |
210 | Args:
211 | dcids (:obj:`iterable` of :obj:`str`): A list of dcids to get triples for.
212 | limit (:obj:`int`, optional): The maximum total number of triples to get.
213 |
214 | Returns:
215 | A :obj:`dict` mapping dcids to a :obj:`list` of triples `(s, p, o)` where
216 | `s`, `p`, and `o` are instances of :obj:`str` and either the subject
217 | or object is the mapped dcid.
218 |
219 | Raises:
220 | ValueError: If the payload returned by the Data Commons REST API is
221 | malformed.
222 |
223 | Examples:
224 | We would like to get five triples associated with
225 | `California `_
226 |
227 | >>> get_triples(["geoId/06"], limit=5)
228 | {
229 | "geoId/06": [
230 | ("geoId/06", "name", "California"),
231 | ("geoId/06", "typeOf", "State"),
232 | ("geoId/06", "geoId", "06"),
233 | ("geoId/0687056", "containedInPlace", "geoId/06"),
234 | ("geoId/0686440", "containedInPlace", "geoId/06")
235 | ]
236 | }
237 | """
238 | # Generate the GetTriple query and send the request.
239 | dcids = filter(lambda v: v == v, dcids) # Filter out NaN values
240 | dcids = list(dcids)
241 | url = utils._API_ROOT + utils._API_ENDPOINTS['get_triples']
242 | payload = utils._send_request(url, req_json={'dcids': dcids, 'limit': limit})
243 |
244 | # Create a map from dcid to list of triples.
245 | results = defaultdict(list)
246 | for dcid in dcids:
247 | # Make sure each dcid is mapped to an empty list.
248 | results[dcid]
249 |
250 | # Add triples as appropriate
251 | for t in payload[dcid]:
252 | if 'objectId' in t:
253 | results[dcid].append((t['subjectId'], t['predicate'], t['objectId']))
254 | elif 'objectValue' in t:
255 | results[dcid].append((t['subjectId'], t['predicate'], t['objectValue']))
256 | return dict(results)
257 |
--------------------------------------------------------------------------------
/datacommons_client/client.py:
--------------------------------------------------------------------------------
1 | from typing import Literal, Optional
2 |
3 | from datacommons_client.endpoints.base import API
4 | from datacommons_client.endpoints.node import NodeEndpoint
5 | from datacommons_client.endpoints.observation import ObservationEndpoint
6 | from datacommons_client.endpoints.resolve import ResolveEndpoint
7 | from datacommons_client.models.observation import ObservationDate
8 | from datacommons_client.utils.dataframes import add_entity_names_to_observations_dataframe
9 | from datacommons_client.utils.dataframes import add_property_constraints_to_observations_dataframe
10 | from datacommons_client.utils.decorators import requires_pandas
11 | from datacommons_client.utils.error_handling import NoDataForPropertyError
12 |
13 | try:
14 | import pandas as pd
15 | except ImportError:
16 | pd = None
17 |
18 |
19 | class DataCommonsClient:
20 | """
21 | A client for interacting with the Data Commons API.
22 |
23 | This class provides convenient access to the V2 Data Commons API endpoints.
24 |
25 | Attributes:
26 | api (API): An instance of the API class that handles requests.
27 | node (NodeEndpoint): Provides access to node-related queries, such as fetching property labels
28 | and values for individual or multiple nodes in the Data Commons knowledge graph.
29 | observation (ObservationEndpoint): Handles observation-related queries, allowing retrieval of
30 | statistical observations associated with entities, variables, and dates (e.g., GDP of California in 2010).
31 | resolve (ResolveEndpoint): Manages resolution queries to find different DCIDs for entities.
32 |
33 | """
34 |
35 | def __init__(self,
36 | api_key: Optional[str] = None,
37 | *,
38 | dc_instance: Optional[str] = "datacommons.org",
39 | url: Optional[str] = None,
40 | surface_header_value: Optional[str] = None):
41 | """
42 | Initializes the DataCommonsClient.
43 |
44 | Args:
45 | api_key (Optional[str]): The API key for authentication. Defaults to None. Note that
46 | custom DC instances do not currently require an API key.
47 | dc_instance (Optional[str]): The Data Commons instance to use. Defaults to "datacommons.org".
48 | url (Optional[str]): A custom, fully resolved URL for the Data Commons API. Defaults to None.
49 | """
50 | # If a fully resolved URL is provided, and the default dc_instance is used,
51 | # ignore that default value
52 | if dc_instance == "datacommons.org" and url:
53 | dc_instance = None
54 |
55 | # Create an instance of the API class which will be injected to the endpoints
56 | self.api = API(api_key=api_key,
57 | dc_instance=dc_instance,
58 | url=url,
59 | surface_header_value=surface_header_value)
60 |
61 | # Create instances of the endpoints
62 | self.node = NodeEndpoint(api=self.api)
63 | self.observation = ObservationEndpoint(api=self.api)
64 | self.resolve = ResolveEndpoint(api=self.api)
65 |
66 | def _find_filter_facet_ids(
67 | self,
68 | fetch_by: Literal["entity", "entity_type"],
69 | date: ObservationDate | str,
70 | variable_dcids: str | list[str],
71 | entity_dcids: Literal["all"] | list[str] = "all",
72 | entity_type: Optional[str] = None,
73 | parent_entity: Optional[str] = None,
74 | property_filters: Optional[dict[str, str | list[str]]] = None,
75 | ) -> list[str] | None:
76 | """Finds matching facet IDs for property filters.
77 |
78 | Args:
79 | fetch_by (Literal["entity", "entity_type"]): Determines whether to fetch by entity or entity type.
80 | variable_dcids (str | list[str]): The variable DCIDs for which to retrieve facet IDs.
81 | entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs, or "all" if filtering by entity type.
82 | entity_type (Optional[str]): The entity type, required if fetching by entity type.
83 | parent_entity (Optional[str]): The parent entity, used when fetching by entity type.
84 | property_filters (Optional[dict[str, str | list[str]]): A dictionary of properties to match facets against.
85 |
86 | Returns:
87 | list[str] | None: A list of matching facet IDs, or None if no filters are applied.
88 | """
89 |
90 | if not property_filters:
91 | return None
92 |
93 | if fetch_by == "entity":
94 | observations = self.observation.fetch_observations_by_entity_dcid(
95 | date=date,
96 | entity_dcids=entity_dcids,
97 | variable_dcids=variable_dcids,
98 | select=["variable", "entity", "facet"],
99 | )
100 | else:
101 | observations = self.observation.fetch_observations_by_entity_type(
102 | date=date,
103 | entity_type=entity_type,
104 | parent_entity=parent_entity,
105 | variable_dcids=variable_dcids,
106 | select=["variable", "entity", "facet"],
107 | )
108 |
109 | facet_sets = [
110 | observations.find_matching_facet_id(property_name=p, value=v)
111 | for p, v in property_filters.items()
112 | ]
113 |
114 | facet_ids = list({facet for facets in facet_sets for facet in facets})
115 |
116 | return facet_ids
117 |
118 | @requires_pandas
119 | def observations_dataframe(
120 | self,
121 | variable_dcids: str | list[str],
122 | date: ObservationDate | str,
123 | entity_dcids: Literal["all"] | list[str] = "all",
124 | entity_type: Optional[str] = None,
125 | parent_entity: Optional[str] = None,
126 | property_filters: Optional[dict[str, str | list[str]]] = None,
127 | include_constraints_metadata: bool = False,
128 | ):
129 | """
130 | Fetches statistical observations and returns them as a Pandas DataFrame.
131 |
132 | The Observation API fetches statistical observations linked to entities and variables
133 | at a particular date (e.g., "population of USA in 2020", "GDP of California in 2010").
134 |
135 | Args:
136 | variable_dcids (str | list[str]): One or more variable DCIDs for the observation.
137 | date (ObservationDate | str): The date for which observations are requested. It can be
138 | a specific date, "all" to retrieve all observations, or "latest" to get the most recent observations.
139 | entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs for which to retrieve data.
140 | Defaults to "all".
141 | entity_type (Optional[str]): The type of entities to filter by when `entity_dcids="all"`.
142 | Required if `entity_dcids="all"`. Defaults to None.
143 | parent_entity (Optional[str]): The parent entity under which the target entities fall.
144 | Required if `entity_dcids="all"`. Defaults to None.
145 | property_filters (Optional[dict[str, str | list[str]]): An optional dictionary used to filter
146 | the data by using observation properties like `measurementMethod`, `unit`, or `observationPeriod`.
147 | include_constraints_metadata (bool): If True, includes the dcid and name of any constraint
148 | properties associated with the variable DCIDs (based on the `constraintProperties` property)
149 | in the returned DataFrame. Defaults to False.
150 |
151 | Returns:
152 | pd.DataFrame: A DataFrame containing the requested observations.
153 | """
154 |
155 | if entity_dcids == "all" and not (entity_type and parent_entity):
156 | raise ValueError(
157 | "When 'entity_dcids' is 'all', both 'parent_entity' and 'entity_type' must be specified."
158 | )
159 |
160 | if entity_dcids != "all" and (entity_type or parent_entity):
161 | raise ValueError(
162 | "Specify 'entity_type' and 'parent_entity' only when 'entity_dcids' is 'all'."
163 | )
164 |
165 | # If property filters are provided, fetch the required facet IDs. Otherwise, set to None.
166 | facets = self._find_filter_facet_ids(
167 | fetch_by="entity" if entity_dcids != "all" else "entity_type",
168 | date=date,
169 | variable_dcids=variable_dcids,
170 | entity_dcids=entity_dcids,
171 | entity_type=entity_type,
172 | parent_entity=parent_entity,
173 | property_filters=property_filters,
174 | )
175 |
176 | if not facets and property_filters:
177 | raise NoDataForPropertyError
178 |
179 | if entity_dcids == "all":
180 | observations = self.observation.fetch_observations_by_entity_type(
181 | date=date,
182 | parent_entity=parent_entity,
183 | entity_type=entity_type,
184 | variable_dcids=variable_dcids,
185 | filter_facet_ids=facets,
186 | )
187 | else:
188 | observations = self.observation.fetch_observations_by_entity_dcid(
189 | date=date,
190 | entity_dcids=entity_dcids,
191 | variable_dcids=variable_dcids,
192 | filter_facet_ids=facets,
193 | )
194 |
195 | # Convert the observations to a DataFrame
196 | df = pd.DataFrame(observations.to_observation_records().model_dump())
197 |
198 | # Add entity names to the DataFrame
199 | df = add_entity_names_to_observations_dataframe(
200 | endpoint=self.node,
201 | observations_df=df,
202 | entity_columns=["entity", "variable"],
203 | )
204 |
205 | if include_constraints_metadata:
206 | df = add_property_constraints_to_observations_dataframe(
207 | endpoint=self.node,
208 | observations_df=df,
209 | )
210 |
211 | return df
212 |
--------------------------------------------------------------------------------
/datacommons/stat_vars.py:
--------------------------------------------------------------------------------
1 | # Copyright 2020 Google Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Data Commons Python API Stat Module.
15 |
16 | Provides functions for getting data on StatisticalVariables from Data Commons Graph.
17 | """
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 |
23 | import collections
24 |
25 | import six
26 |
27 | import datacommons.utils as utils
28 |
29 | # stat_var specific batch size.
30 | _STAT_BATCH_SIZE = 2000
31 |
32 |
33 | def get_stat_value(place,
34 | stat_var,
35 | date=None,
36 | measurement_method=None,
37 | observation_period=None,
38 | unit=None,
39 | scaling_factor=None):
40 | """Returns a value for `place` based on the `stat_var`.
41 |
42 | Args:
43 | place (`str`): The dcid of Place to query for.
44 | stat_var (`str`): The dcid of the StatisticalVariable.
45 | date (`str`): Optional, the preferred date of observation
46 | in ISO 8601 format. If not specified, returns the latest observation.
47 | measurement_method (`str`): Optional, the dcid of the preferred
48 | `measurementMethod` value.
49 | observation_period (`str`): Optional, the preferred
50 | `observationPeriod` value.
51 | unit (`str`): Optional, the dcid of the preferred `unit` value.
52 | scaling_factor (`int`): Optional, the preferred `scalingFactor` value.
53 | Returns:
54 | A `float` the value of `stat_var` for `place`, filtered
55 | by optional args. If no data, returns nan.
56 |
57 | Raises:
58 | ValueError: If the payload returned by the Data Commons REST API is
59 | malformed.
60 |
61 | Examples:
62 | >>> get_stat_value("geoId/05", "Count_Person")
63 | 366331
64 | """
65 | url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_value']
66 | url += '?place={}&stat_var={}'.format(place, stat_var)
67 | if date:
68 | url += '&date={}'.format(date)
69 | if measurement_method:
70 | url += '&measurement_method={}'.format(measurement_method)
71 | if observation_period:
72 | url += '&observation_period={}'.format(observation_period)
73 | if unit:
74 | url += '&unit={}'.format(unit)
75 | if scaling_factor:
76 | url += '&scaling_factor={}'.format(scaling_factor)
77 |
78 | try:
79 | res_json = utils._send_request(url, post=False, use_payload=False)
80 | except ValueError:
81 | return float('nan')
82 | if 'value' not in res_json:
83 | return float('nan')
84 | return res_json['value']
85 |
86 |
87 | def get_stat_series(place,
88 | stat_var,
89 | measurement_method=None,
90 | observation_period=None,
91 | unit=None,
92 | scaling_factor=None):
93 | """Returns a `dict` mapping dates to value of `stat_var` for `place`.
94 |
95 | Args:
96 | place (`str`): The dcid of Place to query for.
97 | stat_var (`str`): The dcid of the StatisticalVariable.
98 | measurement_method (`str`): Optional, the dcid of the preferred
99 | `measurementMethod` value.
100 | observation_period (`str`): Optional, the preferred
101 | `observationPeriod` value.
102 | unit (`str`): Optional, the dcid of the preferred `unit` value.
103 | scaling_factor (`int`): Optional, the preferred `scalingFactor` value.
104 | Returns:
105 | A `dict` mapping dates to value of `stat_var` for `place`,
106 | representing a time series that satisfies all input parameters.
107 |
108 | Raises:
109 | ValueError: If the payload returned by the Data Commons REST API is
110 | malformed.
111 |
112 | Examples:
113 | >>> get_stat_series("geoId/05", "Count_Person")
114 | {"1962":17072000,"2009":36887615,"1929":5531000,"1930":5711000}
115 | """
116 | url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_series']
117 | url += '?place={}&stat_var={}'.format(place, stat_var)
118 | if measurement_method:
119 | url += '&measurement_method={}'.format(measurement_method)
120 | if observation_period:
121 | url += '&observation_period={}'.format(observation_period)
122 | if unit:
123 | url += '&unit={}'.format(unit)
124 | if scaling_factor:
125 | url += '&scaling_factor={}'.format(scaling_factor)
126 |
127 | try:
128 | res_json = utils._send_request(url, post=False, use_payload=False)
129 | except ValueError:
130 | return {}
131 |
132 | if 'series' not in res_json:
133 | return {}
134 | return res_json['series']
135 |
136 |
137 | def get_stat_all(places, stat_vars):
138 | """Returns a nested `dict` of all time series for `places` and `stat_vars`.
139 |
140 | Args:
141 | places (`Iterable` of `str`): The dcids of Places to query for.
142 | stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables.
143 | Returns:
144 | A nested `dict` mapping Places to StatisticalVariables and all available
145 | time series for each Place and StatisticalVariable pair.
146 |
147 | Raises:
148 | ValueError: If the payload returned by the Data Commons REST API is
149 | malformed.
150 |
151 | Examples:
152 | >>> get_stat_all(["geoId/05", "geoId/06"], ["Count_Person", "Count_Person_Male"])
153 | {
154 | "geoId/05": {
155 | "Count_Person": {
156 | "sourceSeries": [
157 | {
158 | "val": {
159 | "2010": 1633,
160 | "2011": 1509,
161 | "2012": 1581,
162 | },
163 | "observationPeriod": "P1Y",
164 | "importName": "Wikidata",
165 | "provenanceDomain": "wikidata.org"
166 | },
167 | {
168 | "val": {
169 | "2010": 1333,
170 | "2011": 1309,
171 | "2012": 131,
172 | },
173 | "observationPeriod": "P1Y",
174 | "importName": "CensusPEPSurvey",
175 | "provenanceDomain": "census.gov"
176 | }
177 | ],
178 | }
179 | },
180 | "Count_Person_Male": {
181 | "sourceSeries": [
182 | {
183 | "val": {
184 | "2010": 1633,
185 | "2011": 1509,
186 | "2012": 1581,
187 | },
188 | "observationPeriod": "P1Y",
189 | "importName": "CensusPEPSurvey",
190 | "provenanceDomain": "census.gov"
191 | }
192 | ],
193 | }
194 | },
195 | "geoId/02": {
196 | "Count_Person": {},
197 | "Count_Person_Male": {
198 | "sourceSeries": [
199 | {
200 | "val": {
201 | "2010": 13,
202 | "2011": 13,
203 | "2012": 322,
204 | },
205 | "observationPeriod": "P1Y",
206 | "importName": "CensusPEPSurvey",
207 | "provenanceDomain": "census.gov"
208 | }
209 | ]
210 | }
211 | }
212 | }
213 | """
214 | url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all']
215 | # Cast iterable-like to list.
216 | places = list(places)
217 | stat_vars = list(stat_vars)
218 |
219 | # Aiming for _STAT_BATCH_SIZE entries total.
220 | # _STAT_BATCH_SIZE = num places x num stat_vars, so aim for
221 | # _STAT_BATCH_SIZE/len(stat_vars) places per batch.
222 | places_per_batch = _STAT_BATCH_SIZE // len(stat_vars)
223 | # Get number of batches via an arithmetic ceiling trick:
224 | # 11//10 rounds down to 1.
225 | # -11//10 rounds down to -2.
226 | # We can divide with, then remove the negative to get the ceiling.
227 | batches = -(-len(places) // places_per_batch)
228 | res = {}
229 | for i in range(batches):
230 | req_json = {
231 | 'stat_vars': stat_vars,
232 | 'places': places[i * places_per_batch:(i + 1) * places_per_batch]
233 | }
234 | # Send the request
235 | res_json = utils._send_request(url, req_json=req_json, use_payload=False)
236 | if 'placeData' not in res_json:
237 | # The REST API spec will always return a dictionary under
238 | # placeData, even if no places exist or have no
239 | # data. If no Places are provided, REST will return an
240 | # error, which will have been caught and passed on in
241 | # _send_request.
242 | raise ValueError("Unexpected response from REST stat/all API.")
243 |
244 | # Unnest the REST response for keys that have single-element values.
245 | place_statvar_series = collections.defaultdict(dict)
246 | for place_dcid, place in res_json['placeData'].items():
247 | stat_var_data = place.get('statVarData')
248 | if not stat_var_data:
249 | # The REST API spec will always return a dictionary under
250 | # statVarData, even if no StatVars exist or have no
251 | # data. If no StatVars are provided, REST will return an
252 | # error, which will have been caught and passed on in
253 | # _send_request.
254 | raise ValueError("Unexpected response from REST stat/all API.")
255 | for stat_var_dcid, stat_var in stat_var_data.items():
256 | place_statvar_series[place_dcid][stat_var_dcid] = stat_var
257 | res.update(dict(place_statvar_series))
258 |
259 | return res
260 |
--------------------------------------------------------------------------------
/datacommons_client/tests/utils/test_graph.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | from unittest.mock import MagicMock
3 |
4 | from datacommons_client.models.node import Node
5 | from datacommons_client.utils.graph import _assemble_tree
6 | from datacommons_client.utils.graph import _fetch_relationship_uncached
7 | from datacommons_client.utils.graph import _postorder_nodes
8 | from datacommons_client.utils.graph import build_graph_map
9 | from datacommons_client.utils.graph import build_relationship_tree
10 | from datacommons_client.utils.graph import fetch_relationship_lru
11 | from datacommons_client.utils.graph import flatten_relationship
12 |
13 |
14 | def test_fetch_parents_uncached_returns_data():
15 | """Test _fetch_parents_uncached delegates to endpoint correctly."""
16 | endpoint = MagicMock()
17 | endpoint.fetch_place_parents.return_value.get.return_value = [
18 | Node(dcid="parent1", name="Parent 1", types=["Country"])
19 | ]
20 |
21 | result = _fetch_relationship_uncached(endpoint,
22 | "test_dcid",
23 | contained_type=None,
24 | relationship="parents")
25 | assert isinstance(result, list)
26 | assert result[0].dcid == "parent1"
27 | endpoint.fetch_place_parents.assert_called_once_with(
28 | "test_dcid",
29 | as_dict=False,
30 | )
31 |
32 |
33 | def test_fetch_relationship_lru_caches_results():
34 | """Test fetch_relationship_lru uses LRU cache and returns list."""
35 | endpoint = MagicMock()
36 | endpoint.fetch_place_parents.return_value.get.return_value = [
37 | Node(dcid="parentX", name="Parent X", types=["Region"])
38 | ]
39 |
40 | result1 = fetch_relationship_lru(endpoint,
41 | "nodeA",
42 | contained_type=None,
43 | relationship="parents")
44 | result2 = fetch_relationship_lru(endpoint,
45 | "nodeA",
46 | contained_type=None,
47 | relationship="parents")
48 | fetch_relationship_lru(endpoint,
49 | "nodeA",
50 | contained_type=None,
51 | relationship="parents")
52 |
53 | assert isinstance(result1, list)
54 | assert result1[0].dcid == "parentX"
55 | assert result1 == result2
56 | assert endpoint.fetch_place_parents.call_count == 1
57 |
58 |
59 | def test_build_ancestry_map_linear_tree():
60 | """A -> B -> C"""
61 |
62 | def fetch_mock(dcid):
63 | return {
64 | "C": [Node(dcid="B", name="Node B", types=["Type"])],
65 | "B": [Node(dcid="A", name="Node A", types=["Type"])],
66 | "A": [],
67 | }.get(dcid, [])
68 |
69 | root, graph = build_graph_map("C", fetch_mock, max_workers=2)
70 |
71 | assert root == "C"
72 | assert set(graph.keys()) == {"C", "B", "A"}
73 | assert graph["C"][0].dcid == "B"
74 | assert graph["B"][0].dcid == "A"
75 | assert graph["A"] == []
76 |
77 |
78 | def test_build_ancestry_map_branching_graph():
79 | r"""
80 | Graph:
81 | F
82 | / \
83 | D E
84 | / \ /
85 | B C
86 | \/
87 | A
88 | """
89 |
90 | def fetch_mock(dcid):
91 | return {
92 | "A": (Node(dcid="B", name="Node B",
93 | types=["Type"]), Node(dcid="C",
94 | name="Node C",
95 | types=["Type"])),
96 | "B": (Node(dcid="D", name="Node D", types=["Type"]),),
97 | "C": (Node(dcid="D", name="Node D",
98 | types=["Type"]), Node(dcid="E",
99 | name="Node E",
100 | types=["Type"])),
101 | "D": (Node(dcid="F", name="Node F", types=["Type"]),),
102 | "E": (Node(dcid="F", name="Node F", types=["Type"]),),
103 | "F": tuple(),
104 | }.get(dcid, tuple())
105 |
106 | root, ancestry = build_graph_map("A", fetch_mock, max_workers=4)
107 |
108 | assert root == "A"
109 | assert set(ancestry.keys()) == {"A", "B", "C", "D", "E", "F"}
110 | assert [p.dcid for p in ancestry["A"]] == ["B", "C"] # A has two parents
111 | assert [p.dcid for p in ancestry["B"]] == ["D"] # B has one parent
112 | assert [p.dcid for p in ancestry["C"]] == ["D", "E"] # C has two parents
113 | assert [p.dcid for p in ancestry["D"]] == ["F"] # D has one parent
114 | assert [p.dcid for p in ancestry["E"]] == ["F"] # E has one parent
115 | assert ancestry["F"] == [] # F has no parents
116 |
117 |
118 | def test_build_ancestry_map_cycle_detection():
119 | """
120 | Graph with a cycle:
121 | A -> B -> C -> A
122 | (Should not loop infinitely)
123 | """
124 |
125 | call_count = defaultdict(int)
126 |
127 | def fetch_mock(dcid):
128 | call_count[dcid] += 1
129 | return {
130 | "A": (Node(dcid="B", name="B", types=["Type"]),),
131 | "B": (Node(dcid="C", name="C", types=["Type"]),),
132 | "C": (Node(dcid="A", name="A", types=["Type"]),), # Cycle back to A
133 | }.get(dcid, tuple())
134 |
135 | root, ancestry = build_graph_map("A", fetch_mock, max_workers=2)
136 |
137 | assert root == "A" # Since we start from A
138 | assert set(ancestry.keys()) == {"A", "B", "C"}
139 | assert [p.dcid for p in ancestry["A"]] == ["B"] # A points to B
140 | assert [p.dcid for p in ancestry["B"]] == ["C"] # B points to C
141 | assert [p.dcid for p in ancestry["C"]] == ["A"
142 | ] # C points back to A but it's ok
143 |
144 | # Check that each node was fetched only once (particularly for A to avoid infinite loop)
145 | assert call_count["A"] == 1
146 | assert call_count["B"] == 1
147 | assert call_count["C"] == 1
148 |
149 |
150 | def test_postorder_nodes_simple_graph():
151 | """Test postorder traversal on a simple graph."""
152 | ancestry = {
153 | "C": [Node(dcid="B", name="B", types=["Type"])],
154 | "B": [Node(dcid="A", name="A", types=["Type"])],
155 | "A": [],
156 | }
157 |
158 | order = _postorder_nodes("C", ancestry)
159 | assert order == ["A", "B", "C"]
160 |
161 | new_order = _postorder_nodes("B", ancestry)
162 | assert new_order == ["A", "B"]
163 |
164 |
165 | def test_postorder_nodes_ignores_disconnected():
166 | """
167 | Graph:
168 | A <- B <- C
169 | D (disconnected)
170 | """
171 | graph = {
172 | "A": [Node(dcid="B", name="B", types=["Type"])],
173 | "B": [Node(dcid="C", name="C", types=["Type"])],
174 | "C": [],
175 | "D": [Node(dcid="Z", name="Z", types=["Type"])],
176 | }
177 | order = _postorder_nodes("A", graph)
178 | assert order == ["C", "B", "A"]
179 | assert "D" not in order
180 |
181 |
182 | def test_assemble_tree_creates_nested_structure():
183 | """Test _assemble_tree creates a nested structure."""
184 | ancestry = {
185 | "C": [Node(dcid="B", name="Node B", types=["Type"])],
186 | "B": [Node(dcid="A", name="Node A", types=["Type"])],
187 | "A": [],
188 | }
189 | postorder = ["A", "B", "C"]
190 | tree = _assemble_tree(postorder, ancestry, relationship_key="parents")
191 |
192 | assert tree["dcid"] == "C"
193 | assert tree["parents"][0]["dcid"] == "B"
194 | assert tree["parents"][0]["parents"][0]["dcid"] == "A"
195 |
196 |
197 | def test_postorder_nodes_ignores_unreachable_nodes():
198 | """
199 | Graph:
200 | A → B → C
201 | Ancestry map also includes D (unconnected)
202 | """
203 | ancestry = {
204 | "A": [Node(dcid="B", name="B", types=["Type"])],
205 | "B": [Node(dcid="C", name="C", types=["Type"])],
206 | "C": [],
207 | "D": [Node(dcid="X", name="X", types=["Type"])],
208 | }
209 |
210 | postorder = _postorder_nodes("A", ancestry)
211 |
212 | # Only nodes reachable from A should be included
213 | assert postorder == ["C", "B", "A"]
214 | assert "D" not in postorder
215 |
216 |
217 | def test_assemble_tree_shared_parent_not_duplicated():
218 | """
219 | Structure:
220 | A → C
221 | B → C
222 | Both A and B have same parent C
223 | """
224 |
225 | ancestry = {
226 | "A": [Node(dcid="C", name="C name", types=["City"])],
227 | "B": [Node(dcid="C", name="C name", types=["City"])],
228 | "C": [],
229 | }
230 |
231 | postorder = ["C", "A", "B"] # C first to allow bottom-up build
232 | tree = _assemble_tree(postorder, ancestry, relationship_key="parents")
233 |
234 | assert tree["dcid"] == "B"
235 | assert len(tree["parents"]) == 1
236 | assert tree["parents"][0]["dcid"] == "C"
237 |
238 | # Confirm C only appears once
239 | assert tree["parents"][0] is not None
240 | assert tree["parents"][0]["name"] == "C name"
241 |
242 |
243 | def test_build_ancestry_tree_nested_output():
244 | """Test build_ancestry_tree creates a nested structure."""
245 | ancestry = {
246 | "C": [Node(dcid="B", name="B", types=["Type"])],
247 | "B": [Node(dcid="A", name="A", types=["Type"])],
248 | "A": [],
249 | }
250 |
251 | tree = build_relationship_tree("C", ancestry, relationship_key="parents")
252 |
253 | assert tree["dcid"] == "C"
254 | assert tree["parents"][0]["dcid"] == "B"
255 | assert tree["parents"][0]["parents"][0]["dcid"] == "A"
256 |
257 |
258 | def test_flatten_ancestry_deduplicates():
259 | """Test flatten_ancestry deduplicates parents."""
260 |
261 | ancestry = {
262 | "X": [Node(dcid="A", name="A", types=["Country"])],
263 | "Y": [
264 | Node(dcid="A", name="A", types=["Country"]),
265 | Node(dcid="B", name="B", types=["City"])
266 | ],
267 | }
268 |
269 | flat = flatten_relationship(ancestry)
270 |
271 | assert {"dcid": "A", "name": "A", "types": ["Country"]} in flat
272 | assert {"dcid": "B", "name": "B", "types": ["City"]} in flat
273 | assert len(flat) == 2
274 |
--------------------------------------------------------------------------------