├── datacommons_client
    ├── models
    │   ├── __init__.py
    │   ├── resolve.py
    │   ├── node.py
    │   ├── base.py
    │   └── observation.py
    ├── tests
    │   ├── README.MD
    │   ├── test_utils.py
    │   ├── test_decorators.py
    │   ├── models
    │   │   ├── test_resolve_models.py
    │   │   ├── test_node_models.py
    │   │   └── test_observation_models.py
    │   ├── test_names.py
    │   ├── endpoints
    │   │   ├── test_error_handling.py
    │   │   ├── test_observation_endpoint.py
    │   │   ├── test_payloads.py
    │   │   └── test_resolve_endpoint.py
    │   ├── test_dataframes.py
    │   └── utils
    │   │   └── test_graph.py
    ├── utils
    │   ├── __init__.py
    │   ├── decorators.py
    │   ├── names.py
    │   ├── error_handling.py
    │   ├── dataframes.py
    │   └── data_processing.py
    ├── endpoints
    │   ├── __init__.py
    │   ├── payloads.py
    │   ├── resolve.py
    │   ├── base.py
    │   └── observation.py
    ├── __init__.py
    ├── README.md
    └── client.py
├── datacommons_pandas
    ├── core.py
    ├── key.py
    ├── node.py
    ├── places.py
    ├── sparql.py
    ├── utils.py
    ├── requests.py
    ├── stat_vars.py
    ├── test
    │   └── __init__.py
    ├── examples
    │   ├── __init__.py
    │   └── df_builder.py
    ├── README.md
    ├── __init__.py
    ├── setup.py
    └── CHANGELOG.md
├── requirements.txt
├── notebooks
    ├── intro_data_science
    │   └── README.md
    └── README.md
├── cloudbuild.yaml
├── .github
    └── ISSUE_TEMPLATE
    │   ├── default-template.md
    │   └── bug_report.md
├── datacommons
    ├── examples
    │   ├── __init__.py
    │   ├── query.py
    │   ├── core.py
    │   └── places.py
    ├── test
    │   ├── __init__.py
    │   ├── set_api_key_test.py
    │   ├── sparql_test.py
    │   └── node_test.py
    ├── key.py
    ├── README.md
    ├── requests.py
    ├── __init__.py
    ├── setup.py
    ├── node.py
    ├── sparql.py
    ├── utils.py
    ├── core.py
    └── stat_vars.py
├── CONTRIBUTING.md
├── docs
    ├── development.md
    └── release.md
├── .gitignore
├── pyproject.toml
└── run_test.sh


/datacommons_client/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/README.MD:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/datacommons_client/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/datacommons_client/endpoints/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/datacommons_pandas/core.py:
--------------------------------------------------------------------------------
1 | ../datacommons/core.py


--------------------------------------------------------------------------------
/datacommons_pandas/key.py:
--------------------------------------------------------------------------------
1 | ../datacommons/key.py


--------------------------------------------------------------------------------
/datacommons_pandas/node.py:
--------------------------------------------------------------------------------
1 | ../datacommons/node.py


--------------------------------------------------------------------------------
/datacommons_pandas/places.py:
--------------------------------------------------------------------------------
1 | ../datacommons/places.py


--------------------------------------------------------------------------------
/datacommons_pandas/sparql.py:
--------------------------------------------------------------------------------
1 | ../datacommons/sparql.py


--------------------------------------------------------------------------------
/datacommons_pandas/utils.py:
--------------------------------------------------------------------------------
1 | ../datacommons/utils.py


--------------------------------------------------------------------------------
/datacommons_pandas/requests.py:
--------------------------------------------------------------------------------
1 | ../datacommons/requests.py


--------------------------------------------------------------------------------
/datacommons_pandas/stat_vars.py:
--------------------------------------------------------------------------------
1 | ../datacommons/stat_vars.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | isort==5.13.2
2 | mock
3 | pandas
4 | pytest
5 | requests==2.32.0
6 | typing_extensions==4.12.2
7 | yapf==0.40.2
8 | pydantic>=2.11


--------------------------------------------------------------------------------
/notebooks/intro_data_science/README.md:
--------------------------------------------------------------------------------
1 | All notebooks have been updated to use the V2 Python APIs and are found in the `v2/intro_data_science` directory.


--------------------------------------------------------------------------------
/cloudbuild.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - id: api_python
3 |     name: python:3.10-slim
4 |     entrypoint: /bin/bash
5 |     args:
6 |       - -c
7 |       - "./run_test.sh -s && hatch run test:all"
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/default-template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Default template
 3 | about: 'Create an issue for all other questions about the API '
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/datacommons_client/utils/decorators.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | 
 3 | try:
 4 |   import pandas as pd
 5 | except ImportError:
 6 |   pd = None
 7 | 
 8 | 
 9 | def requires_pandas(func):
10 |   """Decorator to check if Pandas is available before executing a method."""
11 | 
12 |   @wraps(func)
13 |   def wrapper(*args, **kwargs):
14 |     if pd is None:
15 |       raise ImportError("Pandas is required for this method")
16 |     return func(*args, **kwargs)
17 | 
18 |   return wrapper
19 | 


--------------------------------------------------------------------------------
/datacommons/examples/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/datacommons/test/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/datacommons_pandas/test/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/datacommons_pandas/examples/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/datacommons_client/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = "2.1.4"
 2 | """
 3 | Data Commons Client Package
 4 | 
 5 | This package provides a Python client for interacting with the Data Commons API.
 6 | """
 7 | 
 8 | from datacommons_client.client import DataCommonsClient
 9 | from datacommons_client.endpoints.base import API
10 | from datacommons_client.endpoints.node import NodeEndpoint
11 | from datacommons_client.endpoints.observation import ObservationEndpoint
12 | from datacommons_client.endpoints.resolve import ResolveEndpoint
13 | 
14 | __all__ = [
15 |     "DataCommonsClient",
16 |     "API",
17 |     "NodeEndpoint",
18 |     "ObservationEndpoint",
19 |     "ResolveEndpoint",
20 | ]
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve the API
 4 | title: "[BUG] Description of bug"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Additional context**
27 | Add any other context about the problem here.
28 | 


--------------------------------------------------------------------------------
/datacommons/key.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ API key related functions.
15 | """
16 | 
17 | import os
18 | 
19 | # Environment variable for API key.
20 | _KEY_ENV = 'DC_API_KEY'
21 | 
22 | 
23 | def set_api_key(api_key):
24 |   os.environ[_KEY_ENV] = api_key
25 | 
26 | 
27 | def get_api_key():
28 |   return os.environ.get(_KEY_ENV, '')
29 | 


--------------------------------------------------------------------------------
/datacommons/test/set_api_key_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API unit tests.
15 | 
16 | Unit tests setting the API Key.
17 | """
18 | import unittest
19 | 
20 | import datacommons.key as key
21 | 
22 | _KEY = "test-api-key"
23 | 
24 | 
25 | class TestApiKey(unittest.TestCase):
26 |   """Unit test for setting or not setting the API Key."""
27 | 
28 |   def test_set_api_key(self):
29 |     key.set_api_key(_KEY)
30 |     self.assertEqual(key.get_api_key(), _KEY)
31 | 
32 | 
33 | if __name__ == '__main__':
34 |   unittest.main()
35 | 


--------------------------------------------------------------------------------
/datacommons_client/README.md:
--------------------------------------------------------------------------------
 1 | # Data Commons Python API
 2 | 
 3 | This is a Python library for accessing data in the Data Commons Graph.
 4 | 
 5 | To get started, install this package from pip.
 6 | 
 7 | ```bash
 8 | pip install datacommons-client
 9 | ```
10 | 
11 | To get additional functionality to work with Pandas DataFrames, install the package
12 | with the optional Pandas dependency.
13 | 
14 | ```bash
15 | pip install "datacommons-client[Pandas]"
16 | ```
17 | 
18 | Once the package is installed, import `datacommons_client`.
19 | 
20 | ```python
21 | import datacommons_client as dc
22 | ```
23 | 
24 | For more detail on getting started with the API, please visit <https://docs.datacommons.org/api/python/v2/>.
25 | 
26 | ## About Data Commons
27 | 
28 | [Data Commons](https://datacommons.org/) is an open knowledge repository that
29 | provides a unified view across multiple public data sets and statistics. You can
30 | view what [datasets](https://datacommons.org/datasets) are currently ingested
31 | and browse the graph using our [browser](https://datacommons.org/browser).
32 | 
33 | ## License
34 | 
35 | Apache 2.0
36 | 
37 | ## Support
38 | 
39 | For questions, please send an email to `support@datacommons.org`.
40 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google.com/conduct/).
29 | 


--------------------------------------------------------------------------------
/docs/development.md:
--------------------------------------------------------------------------------
 1 | # Python API Development
 2 | 
 3 | This client library supports `python>=3.10`.
 4 | 
 5 | ## Set up
 6 | If you haven't already, clone this repository.
 7 | 
 8 | ```bash
 9 | git clone https://github.com/datacommonsorg/api-python.git
10 | cd api-python
11 | ```
12 | 
13 | To set up the Python environment for development, run:
14 | 
15 | ```bash
16 | ./run_test.sh -s
17 | ```
18 | 
19 | This will install `hatch`, which is the main tool used to manage the
20 | environment, dependencies, and development tools. You can also manually install
21 | `hatch` and create a virtual environment.
22 | 
23 | ```bash
24 | pip install hatch
25 | hatch env create
26 | ```
27 | 
28 | ## Code style and linting
29 | We use `isort` and `yapf` for code formatting. Check formatting with:
30 | 
31 | ```bash
32 | hatch run lint:check
33 | ```
34 | 
35 | To automatically fix formatting run:
36 | 
37 | ```bash
38 | hatch run lint:format
39 | ```
40 | 
41 | ## Running tests
42 | 
43 | To test, run:
44 | 
45 | ```bash
46 | hatch run test:all
47 | ```
48 | 
49 | To debug the continuous integration tests, run:
50 | 
51 | ```bash
52 | gcloud builds submit . --project=datcom-ci --config=cloudbuild.yaml
53 | ```
54 | 
55 | Both commands will run the same set of tests.


--------------------------------------------------------------------------------
/datacommons_client/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | from datacommons_client.utils.data_processing import group_variables_by_entity
 2 | 
 3 | 
 4 | def test_group_variables_by_entity_basic():
 5 |   """Test grouping with simple variable-entity mapping."""
 6 |   input_data = {
 7 |       "var1": ["ent1", "ent2"],
 8 |       "var2": ["ent2", "ent3"],
 9 |       "var3": ["ent1"],
10 |   }
11 |   expected_output = {
12 |       "ent1": ["var1", "var3"],
13 |       "ent2": ["var1", "var2"],
14 |       "ent3": ["var2"],
15 |   }
16 | 
17 |   result = group_variables_by_entity(input_data)
18 |   assert result == expected_output
19 | 
20 | 
21 | def test_group_variables_by_entity_duplicate_entities():
22 |   """Test grouping when a variable has duplicate entities."""
23 |   input_data = {
24 |       "var1": ["ent1", "ent1", "ent2"],
25 |   }
26 |   result = group_variables_by_entity(input_data)
27 |   assert result["ent1"].count("var1") == 2  # duplicates are preserved
28 |   assert "ent2" in result
29 |   assert result["ent2"] == ["var1"]
30 | 
31 | 
32 | def test_group_variables_by_entity_preserves_order():
33 |   """Test if the order of variables is preserved in the resulting entity lists."""
34 |   input_data = {
35 |       "var1": ["ent1"],
36 |       "var2": ["ent1"],
37 |       "var3": ["ent1"],
38 |   }
39 |   result = group_variables_by_entity(input_data)
40 |   assert result["ent1"] == ["var1", "var2", "var3"]
41 | 


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
 1 | # Python API Notebooks
 2 | 
 3 | This directory contains Colab notebooks that use the V1 Python API. For current notebooks, see the `v2` directory.
 4 | 
 5 | Notebook | Description
 6 | -------- | -----------
 7 | 
 8 | [`Place Similarity with Data Commons.ipynb`](https://colab.research.google.com/drive/1t7dFDSpCT16QDkNuD933QgLUL9BOdCAS) | A notebook that identifies similar places given a place and one or more statistical variables from Data Commons.
 9 | [`Missing Data Imputation Tutorial.ipynb`](https://colab.research.google.com/drive/1S_rMCyRsgygd8sV-r8aLRPcKwZPFcEGb) | A notebook that analyzes the different types of time series holes and different methods of imputing those holes.
10 | [`analyzing_genomic_data.ipynb`](https://colab.research.google.com/drive/1Io7EDr4LjfPLl_l2JYY8__WbfitfNlOf) | A notebook that analyzes genetic variants within RUNX1 (provided by multiple datasets from UCSC Genome Browser, NCBI/gene, and ClinVar).
11 | [`Drug_Discovery_With_Data_Commons.ipynb`](https://colab.research.google.com/drive/1dSKYiRMn3mbDsInorQzYM0yk7sqv6fIV) | A notebook performing drug discovery by identifying novel applications of previously approved drugs using Biomedical Data Commons.
12 | [`protein-charts.ipynb`](https://colab.research.google.com/drive/1Kh-ufqobdChZ2qQgEY0rdPA2_DBmOiSG) | A notebook summarizing various protein properties and interactions using graphical visualizations.
13 | 
14 | 


--------------------------------------------------------------------------------
/datacommons_pandas/README.md:
--------------------------------------------------------------------------------
 1 | # Data Commons Pandas API
 2 | 
 3 | This is a Python library for creating pandas objects with data in the
 4 | Data Commons Graph.
 5 | 
 6 | To get started, install this package from pip.
 7 | 
 8 | ```bash
 9 | pip install datacommons_pandas
10 | ```
11 | 
12 | Once the package is installed, import `datacommons_pandas`.
13 | 
14 | ```python
15 | import datacommons_pandas as dcpd
16 | ```
17 | 
18 | For more detail on getting started with the API, please visit our
19 | [API Overview](https://docs.datacommons.org/api/pandas/).
20 | 
21 | When you are ready to use the API, you can refer to `examples` for
22 | examples on how to use this package to perform various tasks. More tutorials and
23 | documentation can be found on our [tutorials page](https://docs.datacommons.org/tutorials/)!
24 | 
25 | ## About Data Commons
26 | 
27 | [Data Commons](https://datacommons.org/) is an open knowledge repository that
28 | provides a unified view across multiple public data sets and statistics. You can
29 | view what [datasets](https://datacommons.org/datasets) are currently ingested
30 | and browse the graph using our [browser](https://datacommons.org/browser).
31 | 
32 | ## License
33 | 
34 | Apache 2.0
35 | 
36 | ## Support
37 | 
38 | For general questions or issues about the API, please open an issue on our
39 | [issues](https://github.com/datacommonsorg/api-python/issues) page. For all other
40 | questions, please send an email to `support@datacommons.org`.
41 | 


--------------------------------------------------------------------------------
/datacommons/README.md:
--------------------------------------------------------------------------------
 1 | # Data Commons Python API
 2 | 
 3 | This is a Python library for accessing data in the Data Commons Graph.
 4 | 
 5 | > See also: [Data Commons Pandas API](../datacommons_pandas/README.md).
 6 | 
 7 | To get started, install this package from pip.
 8 | 
 9 | ```bash
10 | pip install datacommons
11 | ```
12 | 
13 | Once the package is installed, import `datacommons`.
14 | 
15 | ```python
16 | import datacommons as dc
17 | ```
18 | 
19 | For more detail on getting started with the API, please visit our
20 | [API Overview](https://docs.datacommons.org/api/).
21 | 
22 | When you are ready to use the API, you can refer to `examples` for
23 | examples on how to use this package to perform various tasks. More tutorials and
24 | documentation can be found on our [tutorials page](https://docs.datacommons.org/tutorials/)!
25 | 
26 | ## About Data Commons
27 | 
28 | [Data Commons](https://datacommons.org/) is an open knowledge repository that
29 | provides a unified view across multiple public data sets and statistics. You can
30 | view what [datasets](https://datacommons.org/datasets) are currently ingested
31 | and browse the graph using our [browser](https://datacommons.org/browser).
32 | 
33 | ## License
34 | 
35 | Apache 2.0
36 | 
37 | ## Support
38 | 
39 | For general questions or issues about the API, please open an issue on our
40 | [issues](https://github.com/google/datacommons/issues) page. For all other
41 | questions, please send an email to `support@datacommons.org`.
42 | 


--------------------------------------------------------------------------------
/datacommons/examples/query.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API examples.
15 | 
16 | Example on how to use the Client API SPARQL query wrapper.
17 | """
18 | 
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 | 
23 | import datacommons as dc
24 | 
25 | 
26 | def main():
27 |   # Create a SPARQL query querying for the name of some states
28 |   query = ('''
29 | SELECT  ?name ?dcid
30 | WHERE {
31 |   ?a typeOf Place .
32 |   ?a name ?name .
33 |   ?a dcid ("geoId/06" "geoId/21" "geoId/24") .
34 |   ?a dcid ?dcid
35 | }
36 | ''')
37 |   print('> Issuing query.\n{}'.format(query))
38 | 
39 |   # Iterate through all the rows in the results.
40 |   print('> Printing results.\n')
41 |   for row in dc.query(query_string=query):
42 |     print('  {}'.format(row))
43 | 
44 | 
45 | if __name__ == '__main__':
46 |   main()
47 | 


--------------------------------------------------------------------------------
/datacommons_client/models/resolve.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from pydantic import Field
 4 | 
 5 | from datacommons_client.models.base import BaseDCModel
 6 | from datacommons_client.models.base import DictLikeRootModel
 7 | from datacommons_client.models.base import DominantType
 8 | from datacommons_client.models.base import NodeDCID
 9 | from datacommons_client.models.base import Query
10 | 
11 | 
12 | class Candidate(BaseDCModel):
13 |   """Represents a candidate in the resolution response.
14 | 
15 |     Attributes:
16 |         dcid (DCID): The Data Commons ID for the candidate.
17 |         dominantType (Optional[DominantType]): The dominant type of the candidate,
18 |             if available. This represents the primary type associated with the DCID.
19 |     """
20 | 
21 |   dcid: NodeDCID = Field(default_factory=str)
22 |   dominantType: Optional[DominantType] = None
23 | 
24 | 
25 | class Entity(BaseDCModel):
26 |   """Represents an entity with its resolution candidates.
27 | 
28 |     Attributes:
29 |         node (Query): The query string or node being resolved.
30 |         candidates (List[Candidate]): A list of candidates that match the query.
31 |     """
32 | 
33 |   node: Query
34 |   candidates: list[Candidate] = Field(default_factory=list)
35 | 
36 | 
37 | class FlatCandidateMapping(BaseDCModel,
38 |                            DictLikeRootModel[dict[Query,
39 |                                                   list[NodeDCID] | NodeDCID]]):
40 |   """A model to represent a mapping of queries to candidates."""
41 | 


--------------------------------------------------------------------------------
/datacommons/requests.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Send http requests to Data Commons REST API endpoints.
15 | """
16 | 
17 | from typing import Dict
18 | 
19 | import requests
20 | 
21 | import datacommons.key as key
22 | 
23 | # REST API endpoint root
24 | _API_ROOT = "https://api.datacommons.org"
25 | 
26 | 
27 | def _post(path: str, data={}) -> Dict:
28 |   url = _API_ROOT + path
29 |   headers = {'Content-Type': 'application/json'}
30 |   api_key = key.get_api_key()
31 |   if api_key:
32 |     headers['x-api-key'] = api_key
33 |   try:
34 |     resp = requests.post(url, json=data, headers=headers)
35 |     if resp.status_code != 200:
36 |       raise Exception(
37 |           f'{resp.status_code}: {resp.reason}\n{resp.json()["message"]}')
38 |     return resp.json()
39 |   except requests.exceptions.Timeout:
40 |     raise Exception('Data request timed out, please try again.')
41 |   except requests.exceptions.RequestException as e:
42 |     raise e
43 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/test_decorators.py:
--------------------------------------------------------------------------------
 1 | from unittest import mock
 2 | 
 3 | import pytest
 4 | 
 5 | from datacommons_client.utils.decorators import requires_pandas
 6 | 
 7 | try:
 8 |   import pandas as pd
 9 | 
10 |   PANDAS_AVAILABLE = True
11 | except ImportError:
12 |   PANDAS_AVAILABLE = False
13 | 
14 | 
15 | @requires_pandas
16 | def function_requiring_pandas():
17 |   return "Pandas is available"
18 | 
19 | 
20 | def test_requires_pandas_with_pandas():
21 |   """Test that the function executes normally when Pandas is available."""
22 |   if PANDAS_AVAILABLE:
23 |     assert function_requiring_pandas() == "Pandas is available"
24 | 
25 | 
26 | def test_requires_pandas_without_pandas(monkeypatch):
27 |   """Test that the decorator raises ImportError when Pandas is not available."""
28 |   # Simulate Pandas being unavailable
29 |   monkeypatch.setattr("datacommons_client.utils.decorators.pd", None)
30 |   with pytest.raises(ImportError, match="Pandas is required for this method"):
31 |     function_requiring_pandas()
32 | 
33 | 
34 | def test_importerror_handling(monkeypatch):
35 |   """Test that the ImportError block is executed when Pandas is not installed."""
36 | 
37 |   # Simulate pandas not being available
38 |   with mock.patch.dict("sys.modules", {"pandas": None}):
39 |     import importlib
40 | 
41 |     # Reload the module so that a new check of Pandas is performed
42 |     import datacommons_client.utils.decorators
43 |     importlib.reload(datacommons_client.utils.decorators)
44 | 
45 |   # Ensure pd is set to None
46 |   assert datacommons_client.utils.decorators.pd is None
47 | 


--------------------------------------------------------------------------------
/datacommons/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # isort: skip_file
16 | 
17 | ################################## IMPORTANT #################################
18 | # All user-facing functions in this package must be symlinked to the         #
19 | # datacommons_pandas pkg. This is so that users do not need to import both   #
20 | # libraries for pd support. Please keep the below imports in sync with the   #
21 | # __init__.py in the datacommons_pandas/ dir, and add a symlink when         #
22 | # creating a new file.                                                       #
23 | # TODO: https://github.com/datacommonsorg/api-python/issues/149              #
24 | ##############################################################################
25 | 
26 | # Data Commons SPARQL query support
27 | from datacommons.sparql import query
28 | 
29 | # Data Commons Python API
30 | from datacommons.core import get_property_labels, get_property_values, get_triples
31 | from datacommons.places import get_places_in, get_related_places, get_stats
32 | from datacommons.stat_vars import get_stat_value, get_stat_series, get_stat_all
33 | 
34 | from datacommons.key import set_api_key
35 | from datacommons.node import properties, property_values, triples
36 | 


--------------------------------------------------------------------------------
/datacommons_pandas/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # isort: skip_file
16 | 
17 | from datacommons_pandas.df_builder import build_time_series, build_time_series_dataframe, build_multivariate_dataframe
18 | 
19 | ################################ SYMLINK FILES ################################
20 | # We include symlinks to all user-facing functions from the datacommons pkg.  #
21 | # This is so that users do not need to import both libraries for pd support.  #
22 | # Please keep the below in sync with the __init__.py in the datacommons/ dir  #
23 | # TODO: enforce this. https://github.com/datacommonsorg/api-python/issues/149 #
24 | ##############################################@################################
25 | # Data Commons SPARQL query support
26 | from datacommons_pandas.sparql import query
27 | 
28 | # Data Commons Python API
29 | from datacommons_pandas.core import get_property_labels, get_property_values, get_triples
30 | from datacommons_pandas.places import get_places_in, get_related_places, get_stats
31 | from datacommons_pandas.stat_vars import get_stat_value, get_stat_series, get_stat_all
32 | 
33 | from datacommons_pandas.key import set_api_key
34 | from datacommons_pandas.node import properties, property_values, triples
35 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/models/test_resolve_models.py:
--------------------------------------------------------------------------------
 1 | from datacommons_client.models.resolve import Candidate
 2 | from datacommons_client.models.resolve import Entity
 3 | 
 4 | 
 5 | def test_candidate_model_validation():
 6 |   """Test that Candidate.model_validate parses full data correctly."""
 7 |   json_data = {"dcid": "dcid123", "dominantType": "Place"}
 8 |   candidate = Candidate.model_validate(json_data)
 9 |   assert candidate.dcid == "dcid123"
10 |   assert candidate.dominantType == "Place"
11 | 
12 | 
13 | def test_candidate_model_validation_partial():
14 |   """Test Candidate.model_validate with missing optional dominantType."""
15 |   json_data = {"dcid": "dcid456"}
16 |   candidate = Candidate.model_validate(json_data)
17 |   assert candidate.dcid == "dcid456"
18 |   assert candidate.dominantType is None
19 | 
20 | 
21 | def test_entity_model_validation():
22 |   """Test that Entity.model_validate handles multiple candidates."""
23 |   json_data = {
24 |       "node":
25 |           "test_query",
26 |       "candidates": [
27 |           {
28 |               "dcid": "dcid123",
29 |               "dominantType": "Place"
30 |           },
31 |           {
32 |               "dcid": "dcid456",
33 |               "dominantType": "Event"
34 |           },
35 |       ],
36 |   }
37 |   entity = Entity.model_validate(json_data)
38 |   assert entity.node == "test_query"
39 |   assert len(entity.candidates) == 2
40 |   assert entity.candidates[0].dcid == "dcid123"
41 |   assert entity.candidates[0].dominantType == "Place"
42 |   assert entity.candidates[1].dcid == "dcid456"
43 |   assert entity.candidates[1].dominantType == "Event"
44 | 
45 | 
46 | def test_entity_model_validation_empty_candidates():
47 |   """Test Entity.model_validate with no candidates."""
48 |   json_data = {"node": "test_query", "candidates": []}
49 |   entity = Entity.model_validate(json_data)
50 |   assert entity.node == "test_query"
51 |   assert len(entity.candidates) == 0
52 | 


--------------------------------------------------------------------------------
/datacommons/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Build and distribute the datacommons package to PyPI."""
15 | import os
16 | 
17 | from setuptools import setup
18 | 
19 | dir_path = os.path.dirname(os.path.realpath(__file__))
20 | with open(os.path.join(dir_path, 'README.md'), 'r') as fh:
21 |   long_description = fh.read()
22 | 
23 | # Package metadata.
24 | NAME = 'datacommons'
25 | DESCRIPTION = 'A library to access Data Commons Python API.'
26 | URL = 'https://github.com/datacommonsorg/api-python'
27 | EMAIL = 'support@datacommons.org'
28 | AUTHOR = 'datacommons.org'
29 | REQUIRES_PYTHON = '>=3.7'
30 | VERSION = '1.4.3'
31 | REQUIRED = ['six', 'requests']
32 | PACKAGES = ['datacommons']
33 | 
34 | setup(
35 |     name=NAME,
36 |     version=VERSION,
37 |     description=DESCRIPTION,
38 |     long_description=long_description,
39 |     long_description_content_type='text/markdown',
40 |     author=AUTHOR,
41 |     author_email=EMAIL,
42 |     maintainer=AUTHOR,
43 |     maintainer_email=EMAIL,
44 |     python_requires=REQUIRES_PYTHON,
45 |     url=URL,
46 |     packages=PACKAGES,
47 |     install_requires=REQUIRED,
48 |     include_package_data=True,
49 |     license='Apache 2.0',
50 |     classifiers=[
51 |         'Intended Audience :: Developers',
52 |         'License :: OSI Approved :: Apache Software License',
53 |         'Programming Language :: Python',
54 |         'Programming Language :: Python :: 3.7',
55 |         'Programming Language :: Python :: Implementation :: CPython',
56 |         'Topic :: Software Development',
57 |     ],
58 | )
59 | 


--------------------------------------------------------------------------------
/datacommons_pandas/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Build and distribute the datacommons_pandas package to PyPI."""
15 | import os
16 | 
17 | from setuptools import setup
18 | 
19 | dir_path = os.path.dirname(os.path.realpath(__file__))
20 | with open(os.path.join(dir_path, 'README.md'), 'r') as fh:
21 |   long_description = fh.read()
22 | 
23 | # Package metadata.
24 | NAME = 'datacommons_pandas'
25 | DESCRIPTION = 'A library to create pandas objects using the Data Commons Python API.'
26 | URL = 'https://github.com/datacommonsorg/api-python'
27 | EMAIL = 'support@datacommons.org'
28 | AUTHOR = 'datacommons.org'
29 | REQUIRES_PYTHON = '>=3.7'
30 | VERSION = '0.0.3'
31 | REQUIRED = ['pandas', 'six', 'requests']
32 | PACKAGES = ['datacommons_pandas']
33 | 
34 | setup(
35 |     name=NAME,
36 |     version=VERSION,
37 |     description=DESCRIPTION,
38 |     long_description=long_description,
39 |     long_description_content_type='text/markdown',
40 |     author=AUTHOR,
41 |     author_email=EMAIL,
42 |     maintainer=AUTHOR,
43 |     maintainer_email=EMAIL,
44 |     python_requires=REQUIRES_PYTHON,
45 |     url=URL,
46 |     packages=PACKAGES,
47 |     install_requires=REQUIRED,
48 |     include_package_data=True,
49 |     license='Apache 2.0',
50 |     classifiers=[
51 |         'Intended Audience :: Developers',
52 |         'License :: OSI Approved :: Apache Software License',
53 |         'Programming Language :: Python',
54 |         'Programming Language :: Python :: 3.7',
55 |         'Programming Language :: Python :: Implementation :: CPython',
56 |         'Topic :: Software Development',
57 |     ],
58 | )
59 | 


--------------------------------------------------------------------------------
/datacommons_pandas/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## 0.0.3
 4 | 
 5 | **Date** - 11/10/2020
 6 | 
 7 | **Release Tag** - [pd.0.0.3](https://github.com/datacommonsorg/api-python/releases/tag/pd0.0.3)
 8 | 
 9 | **Release Status** - Current head of branch [`master`](https://github.com/datacommonsorg/api-python/tree/master)
10 | 
11 | Update to use datacommons Python API 1.4.3, which returns empty data structures instead of erroring when no data is available.
12 | 
13 | ## 0.0.2
14 | 
15 | **Date** - 09/16/2020
16 | 
17 | **Release Tag** - [pd.0.0.2](https://github.com/datacommonsorg/api-python/releases/tag/pd0.0.2)
18 | 
19 | **Release Status** - Current head of branch [`master`](https://github.com/datacommonsorg/api-python/tree/master)
20 | 
21 | Update to use datacommons Python API 1.4.2, which adds batching to the get_stat_all function used by build_time_series_dataframe and build_multivariate_dataframe.
22 | 
23 | ## 0.0.1
24 | 
25 | **Date** - 08/25/2020
26 | 
27 | **Release Tag** - [pd.0.0.1](https://github.com/datacommonsorg/api-python/releases/tag/pd0.0.1)
28 | 
29 | **Release Status** - Current head of branch [`master`](https://github.com/datacommonsorg/api-python/tree/master)
30 | 
31 | Added pandas wrapper functions.
32 | 
33 | -   `build_time_series` constructs a pd.Series for a given StatisticalVariable and Place, where the time series are indexed by date.
34 | -   `build_time_series_dataframe` constructs a pd.DataFrame for a given StatisticalVariable and a set of Places. The DataFrame will have Places as the index and dates as the columns.
35 | -   `build_multivariate_dataframe` constructs a pd.DataFrame for a set of StatisticalVariables and a set of Places. The DataFrame will have Places as index and StatisticalVariables as the columns. The values are the most recent values for the chosen StatVarObservation options.
36 | 
37 | For multi-place functions, when a StatisticalVariable has multiple StatVarObservation options,
38 | Data Commons chooses a set of StatVarObservation options that covers the most places. This
39 | ensures that the data fetched for a StatisticalVariable is comparable across places.
40 | When there is a tie, we select the StatVarObservation options set with the latest date
41 | data is available for any place.
42 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | __pycache__/
  2 | .dat
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # Unit test / coverage reports
 32 | htmlcov/
 33 | .tox/
 34 | .nox/
 35 | .coverage
 36 | .coverage.*
 37 | .cache
 38 | nosetests.xml
 39 | coverage.xml
 40 | *.cover
 41 | .hypothesis/
 42 | .pytest_cache/
 43 | 
 44 | # Translations
 45 | *.mo
 46 | *.pot
 47 | 
 48 | # Django stuff:
 49 | *.log
 50 | local_settings.py
 51 | db.sqlite3
 52 | db.sqlite3-journal
 53 | 
 54 | # Flask stuff:
 55 | instance/
 56 | .webassets-cache
 57 | 
 58 | # Scrapy stuff:
 59 | .scrapy
 60 | 
 61 | # Sphinx documentation
 62 | docs/_build/
 63 | 
 64 | # PyBuilder
 65 | target/
 66 | 
 67 | # Jupyter Notebook
 68 | .ipynb_checkpoints
 69 | 
 70 | # IPython
 71 | profile_default/
 72 | ipython_config.py
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # Environments
 78 | .env
 79 | .venv
 80 | env/
 81 | venv/
 82 | ENV/
 83 | env.bak/
 84 | venv.bak/
 85 | 
 86 | ### Ignore MAC OS System files ###
 87 | # General
 88 | .DS_Store
 89 | .AppleDouble
 90 | .LSOverride
 91 | .profraw
 92 | 
 93 | # Icon must end with two \r
 94 | Icon
 95 | 
 96 | # Thumbnails
 97 | ._*
 98 | 
 99 | # Files that might appear in the root of a volume
100 | .DocumentRevisions-V100
101 | .fseventsd
102 | .Spotlight-V100
103 | .TemporaryItems
104 | .Trashes
105 | .VolumeIcon.icns
106 | .com.apple.timemachine.donotpresent
107 | 
108 | # Directories potentially created on remote AFP share
109 | .AppleDB
110 | .AppleDesktop
111 | Network Trash Folder
112 | Temporary Items
113 | .apdisk
114 | 
115 | ### Ignore BAZEL BUILD System files ###
116 | /bazel-*
117 | 
118 | ### R and RStudio ###
119 | .Rproj.user
120 | .Rhistory
121 | .RData
122 | .Ruserdata
123 | datacommons.RCheck
124 | *tar.gz
125 | 
126 | ## VSCode
127 | .vscode/
128 | 
129 | ## JetBrains
130 | .idea/
131 | 
132 | # Gemini
133 | GEMINI.md
134 | .gemini/
135 | 
136 | # Temp files
137 | tmp/


--------------------------------------------------------------------------------
/datacommons_client/utils/names.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from datacommons_client.models.node import Node
 4 | 
 5 | DEFAULT_NAME_PROPERTY: str = "name"
 6 | NAME_WITH_LANGUAGE_PROPERTY: str = "nameWithLanguage"
 7 | DEFAULT_NAME_LANGUAGE: str = "en"
 8 | 
 9 | 
10 | def extract_name_from_english_name_property(properties: list | Node) -> str:
11 |   """
12 |     Extracts the name from a list of properties with English names.
13 |     Args:
14 |         properties (list): A list of properties with English names.
15 |     Returns:
16 |         str: The extracted name.
17 |     """
18 |   if not properties:
19 |     return ''
20 | 
21 |   if isinstance(properties, Node):
22 |     properties = [properties]
23 | 
24 |   return properties[0].value
25 | 
26 | 
27 | def extract_name_from_property_with_language(
28 |     properties: list,
29 |     language: str,
30 |     fallback_language: Optional[str] = None) -> tuple[str | None, str | None]:
31 |   """
32 |     Extracts the name from a list of properties with language tags.
33 |     Args:
34 |         properties (list): A list of properties with language tags.
35 |         language (str): The desired language code.
36 |         fallback_language: If provided, this language will be used as a fallback if the requested
37 |             language is not available. If not provided, no fallback will be used.
38 | 
39 |     Returns:
40 |         tuple[str,str]: A tuple containing the extracted name and its language.
41 |     """
42 |   # If a non-English language is requested, unpack the response to get it.
43 |   fallback_name = None
44 | 
45 |   # Iterate through the properties to find the name in the specified language
46 |   for candidate in properties:
47 |     # If no language is specified, skip the candidate
48 |     if "@" not in candidate.value:
49 |       continue
50 | 
51 |     # Split the candidate value into name and language
52 |     name, lang = candidate.value.rsplit("@", 1)
53 | 
54 |     # If the language matches, add the name to the dictionary.
55 |     if lang == language:
56 |       return name, lang
57 |     # If language is 'en', store the name as a fallback
58 |     if fallback_language and (lang == fallback_language):
59 |       fallback_name = name
60 | 
61 |   # If no name was found in the specified language, use the fallback name (if available)
62 |   return fallback_name, fallback_language if fallback_language else None
63 | 


--------------------------------------------------------------------------------
/datacommons/examples/core.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API examples.
15 | 
16 | Basic demo for get_property_labels, get_property_values, and get_triples.
17 | """
18 | 
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 | 
23 | import datacommons as dc
24 | 
25 | 
26 | def main():
27 |   # Set the dcid to be that of Santa Clara County.
28 |   dcids = ['geoId/06085', 'dc/p/zsb968m3v1f97']
29 | 
30 |   # Print all incoming and outgoing properties from Santa Clara County.
31 |   print('Property Labels for Santa Clara County')
32 |   in_labels = dc.get_property_labels(dcids)
33 |   out_labels = dc.get_property_labels(dcids, out=False)
34 |   print('> Printing properties for {}'.format(dcids))
35 |   print('> Incoming properties: {}'.format(in_labels))
36 |   print('> Outgoing properties: {}'.format(out_labels))
37 | 
38 |   # Print all property values for "containedInPlace" for Santa Clara County.
39 |   print('Property Values for "containedInPlace" of Santa Clara County')
40 |   prop_vals = dc.get_property_values(dcids,
41 |                                      'containedInPlace',
42 |                                      out=False,
43 |                                      value_type='City')
44 |   print('> Cities contained in {}'.format(dcids))
45 |   for dcid in dcids:
46 |     for city_dcid in prop_vals[dcid]:
47 |       print('  - {}'.format(city_dcid))
48 | 
49 |   # Print the first 10 triples associated with Santa Clara County
50 |   print('Triples for Santa Clara County')
51 |   triples = dc.get_triples(dcids)
52 |   for dcid in dcids:
53 |     print('> Triples for {}'.format(dcid))
54 |     for s, p, o in triples[dcid][:5]:
55 |       print('  - ("{}", {}, "{}")'.format(s, p, o))
56 | 
57 | 
58 | if __name__ == '__main__':
59 |   main()
60 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/test_names.py:
--------------------------------------------------------------------------------
 1 | from datacommons_client.models.node import Node
 2 | from datacommons_client.utils.names import extract_name_from_english_name_property
 3 | from datacommons_client.utils.names import extract_name_from_property_with_language
 4 | 
 5 | 
 6 | def test_extract_name_from_english_name_property_with_list():
 7 |   """Test extracting name from a list of Nodes."""
 8 |   properties = [Node(value="Test Name")]
 9 |   result = extract_name_from_english_name_property(properties)
10 |   assert result == "Test Name"
11 | 
12 | 
13 | def test_extract_name_from_english_empty_list():
14 |   """Test extracting name from an empty list."""
15 |   result = extract_name_from_english_name_property([])
16 |   assert result == ""
17 | 
18 | 
19 | def test_extract_name_from_english_not_list():
20 |   """Test extracting name from a single Node (not in a list)."""
21 |   property_node = Node(value="Single Node Name")
22 |   result = extract_name_from_english_name_property(property_node)
23 |   assert result == "Single Node Name"
24 | 
25 | 
26 | def test_extract_name_from_property_with_language_match():
27 |   """Test extracting name when desired language is present."""
28 |   properties = [
29 |       Node(value="Nombre@es"),
30 |       Node(value="Name@en"),
31 |   ]
32 |   result = extract_name_from_property_with_language(properties,
33 |                                                     language="es",
34 |                                                     fallback_language="en")
35 |   assert result[0] == "Nombre"
36 |   assert result[1] == "es"
37 | 
38 | 
39 | def test_extract_name_from_property_with_language_fallback():
40 |   """Test fallback to English when desired language is not found."""
41 |   properties = [
42 |       Node(value="Name@en"),
43 |       Node(value="Nom@fr"),
44 |       Node(value="Nome@it"),
45 |   ]
46 |   result = extract_name_from_property_with_language(properties,
47 |                                                     language="de",
48 |                                                     fallback_language="it")
49 |   assert result[0] == "Nome"
50 |   assert result[1] == "it"
51 | 
52 | 
53 | def test_extract_name_from_property_with_language_no_fallback():
54 |   """Test no result when language is not found and fallback is disabled."""
55 |   properties = [
56 |       Node(value="Name@en"),
57 |       Node(value="Nom@fr"),
58 |   ]
59 |   result = extract_name_from_property_with_language(properties, language="de")
60 |   assert result[0] is None
61 |   assert result[1] is None
62 | 
63 | 
64 | def test_extract_name_from_property_without_language_tags():
65 |   """Test that properties without language tags are skipped."""
66 |   properties = [
67 |       Node(value="Plain str"),
68 |       Node(value="Name@en"),
69 |   ]
70 |   result = extract_name_from_property_with_language(properties, language="en")
71 |   assert result[0] == "Name"
72 |   assert result[1] == "en"
73 | 


--------------------------------------------------------------------------------
/docs/release.md:
--------------------------------------------------------------------------------
 1 | # Python API Release
 2 | 
 3 | ## Releasing the `datacommons_client` package
 4 | Support for V2 of the Data Commons API is being released as a new client library
 5 | called `datacommons_client`.
 6 | 
 7 | To release:
 8 | 1. Update [CHANGELOG.md](../CHANGELOG.md) with relevant changes.
 9 | 2. Bump the version by running `hatch version` followed by `patch`, `minor`, `major`, a 
10 | specific version number, or `--pre beta` for a beta version, for example.
11 | 3. Build the package
12 | ```bash
13 | hatch build
14 | ```
15 | 4. (optionally) Test the deployment process locally
16 | ```bash
17 | hatch run release:localtest
18 | ```
19 | 5. Test the deployment process on Test PyPi
20 | ```bash
21 | hatch run release:testpypi
22 | ```
23 | 
24 | 6. Once verified, upload to PyPI:
25 | ```bash
26 | hatch run release:pypi
27 | ```
28 | 
29 | 7. Create a version tag on Git:
30 | ```bash
31 | hatch run release:tag
32 | ```
33 | 
34 | ---
35 | 
36 | ## Releasing the legacy packages
37 | 
38 | 
39 | Note: Always release `datacommons_pandas` when `datacommons` is released.
40 | 
41 | **If this is your first time releasing to PyPI**, please review the PyPI guide
42 | starting from the
43 | [setup
44 | section](https://packaging.python.org/tutorials/packaging-projects/#creating-setup-py).
45 | 
46 | ## Prepare release tools
47 | 
48 | ```bash
49 | python3 -m venv .env
50 | source .env/bin/activate
51 | python3 -m pip install --upgrade setuptools wheel
52 | python3 -m pip install --upgrade twine
53 | ```
54 | 
55 | ## Release to Test PyPI
56 | 
57 | 1. In [datacommons/setup.py](../datacommons/setup.py) and [datacommons_pandas/setup.py](../datacommons_pandas/setup.py):
58 | 
59 |    - Append "-USERNAME" to the package "NAME". For example,
60 |      `NAME = 'foo_package-janedoe123'`.
61 |    - Increment the "VERSION" codes to something that has not been used in your
62 |      test project. This will not affect the production PyPI versioning.
63 | 
64 | 1. In the repo root directly, build the dists and release to TestPyPI:
65 | 
66 |    ```bash
67 |    rm dist/*
68 |    python3 datacommons/setup.py sdist bdist_wheel
69 |    python3 datacommons_pandas/setup.py sdist bdist_wheel
70 |    python3 -m twine upload --repository testpypi dist/*
71 |    ```
72 | 
73 | ## Release to Production PyPI
74 | 
75 | 1. In [datacommons/setup.py](../datacommons/setup.py) and
76 |    [datacommons_pandas/setup.py](../datacommons_pandas/setup.py):
77 | 
78 |    - Revert the package name to `datacommons` and `datacommons_pandas`
79 |    - Update and double check "VERSION"
80 | 
81 | 1. Update [datacommons/CHANGELOG.md](../datacommons/CHANGELOG.md) and [datacommons_pandas/CHANGELOG.md](../datacommons_pandas/CHANGELOG.md)
82 | 
83 | 1. Build the dists and release to PyPI:
84 | 
85 |    ```bash
86 |    rm dist/*
87 |    python3 datacommons/setup.py sdist bdist_wheel
88 |    python3 datacommons_pandas/setup.py sdist bdist_wheel
89 |    python3 -m twine upload dist/*
90 |    ```
91 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "datacommons-client"
  3 | dynamic = ["version"]
  4 | description = "A library to access Data Commons Python API."
  5 | readme = "datacommons_client/README.md"
  6 | authors = [
  7 |     { name = "datacommons.org", email = "support@datacommons.org" },
  8 |     { name = "one.org", email= "data@one.org"}
  9 | ]
 10 | maintainers = [
 11 |     { name = "datacommons.org", email = "support@datacommons.org" }
 12 | ]
 13 | license = { file = "LICENSE" }
 14 | dependencies = [
 15 | "requests>=2.32",
 16 | "typing_extensions",
 17 | "pydantic>=2.11"
 18 | ]
 19 | requires-python = ">=3.10"
 20 | keywords = ["data commons", "api", "data", "development"]
 21 | classifiers = [
 22 |     "Intended Audience :: Developers",
 23 |     "License :: OSI Approved :: Apache Software License",
 24 |     "Programming Language :: Python",
 25 |     "Programming Language :: Python :: 3.10",
 26 |     "Programming Language :: Python :: 3.11",
 27 |     "Programming Language :: Python :: 3.12",
 28 |     "Programming Language :: Python :: 3.13",
 29 |     "Programming Language :: Python :: Implementation :: CPython",
 30 |     "Topic :: Software Development"
 31 | ]
 32 | urls = { "Homepage" = "https://github.com/datacommonsorg/api-python" }
 33 | 
 34 | [project.optional-dependencies]
 35 | pandas = ["pandas"]
 36 | dev = [
 37 |     "pytest",
 38 |     "isort",
 39 |     "yapf",
 40 |     "mock",
 41 |     "hatch"
 42 | ]
 43 | 
 44 | [tool.hatch.version]
 45 | path = "datacommons_client/__init__.py"
 46 | 
 47 | 
 48 | [tool.hatch.build.targets.sdist]
 49 | include = [
 50 |     "datacommons_client",
 51 |     "README.md",
 52 |     "LICENSE",
 53 |     "CHANGELOG.md"
 54 | ]
 55 | 
 56 | [tool.hatch.build.targets.wheel]
 57 | include = [
 58 |     "datacommons_client"
 59 | ]
 60 | 
 61 | [tool.hatch.envs.default]
 62 | dependencies = [
 63 |     "pytest",
 64 |     "isort",
 65 |     "yapf",
 66 |     "hatch",
 67 | ]
 68 | 
 69 | [tool.hatch.envs.test]
 70 | dependencies = [
 71 |     "pytest",
 72 |     "mock",
 73 |     "pandas",
 74 |     "isort",
 75 |     "yapf"
 76 | ]
 77 | 
 78 | 
 79 | [tool.hatch.envs.test.scripts]
 80 | setup = "./run_test.sh -s"
 81 | all = "./run_test.sh -a"
 82 | python = "./run_test.sh -p"
 83 | lint = "./run_test.sh -l"
 84 | 
 85 | [tool.hatch.envs.lint]
 86 | dependencies = [
 87 |     "isort",
 88 |     "yapf"
 89 | ]
 90 | 
 91 | [tool.hatch.envs.lint.scripts]
 92 | check = "./run_test.sh -l"
 93 | format = "./run_test.sh -f"
 94 | 
 95 | [tool.hatch.envs.release]
 96 | dependencies = [
 97 |     "twine"
 98 | ]
 99 | 
100 | [tool.hatch.envs.release.scripts]
101 | localtest = "hatch build && twine check dist/*"
102 | testpypi = "hatch build && twine upload --repository testpypi dist/*"
103 | pypi = "hatch build && twine upload dist/*"
104 | tag = "git commit -am 'Bump version to {version}' && git tag v{version}"
105 | 
106 | 
107 | [build-system]
108 | requires = ["hatchling"]
109 | build-backend = "hatchling.build"
110 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/endpoints/test_error_handling.py:
--------------------------------------------------------------------------------
 1 | from requests import Request
 2 | from requests import Response
 3 | 
 4 | from datacommons_client.utils.error_handling import APIError
 5 | from datacommons_client.utils.error_handling import DataCommonsError
 6 | from datacommons_client.utils.error_handling import DCAuthenticationError
 7 | from datacommons_client.utils.error_handling import DCConnectionError
 8 | from datacommons_client.utils.error_handling import DCStatusError
 9 | from datacommons_client.utils.error_handling import InvalidDCInstanceError
10 | from datacommons_client.utils.error_handling import NoDataForPropertyError
11 | 
12 | 
13 | def test_data_commons_error_default_message():
14 |   """Tests that DataCommonsError uses the default message."""
15 |   error = DataCommonsError()
16 |   assert str(error) == DataCommonsError.default_message
17 | 
18 | 
19 | def test_data_commons_error_custom_message():
20 |   """Tests that DataCommonsError uses a custom message when provided."""
21 |   error = DataCommonsError("Custom message")
22 |   assert str(error) == "Custom message"
23 | 
24 | 
25 | def test_api_error_without_response():
26 |   """Tests APIError initialization without a Response object."""
27 |   error = APIError()
28 |   assert str(error) == f"\n{APIError.default_message}"
29 | 
30 | 
31 | def test_api_error_with_response():
32 |   """Tests APIError initialization with a mocked Response object.
33 | 
34 |     Verifies that the string representation includes status code,
35 |     request URL, and response text.
36 |     """
37 |   mock_request = Request("GET", "http://example.com").prepare()
38 |   mock_response = Response()
39 |   mock_response.request = mock_request
40 |   mock_response.status_code = 404
41 |   mock_response._content = b"Not Found"
42 | 
43 |   error = APIError(response=mock_response)
44 |   assert "Status Code: 404" in str(error)
45 |   assert "Request URL: http://example.com" in str(error)
46 |   assert "Not Found" in str(error)
47 | 
48 | 
49 | def test_subclass_default_messages():
50 |   """Tests that subclasses use their default messages."""
51 |   connection_error = DCConnectionError()
52 |   assert DCConnectionError.default_message in str(connection_error)
53 | 
54 |   status_error = DCStatusError()
55 |   assert DCStatusError.default_message in str(status_error)
56 | 
57 |   auth_error = DCAuthenticationError()
58 |   assert DCAuthenticationError.default_message in str(auth_error)
59 | 
60 |   instance_error = InvalidDCInstanceError()
61 |   assert InvalidDCInstanceError.default_message in str(instance_error)
62 | 
63 |   filter_error = NoDataForPropertyError()
64 |   assert NoDataForPropertyError.default_message in str(filter_error)
65 | 
66 | 
67 | def test_subclass_custom_message():
68 |   """Tests that subclasses use custom messages when provided."""
69 |   error = DCAuthenticationError(response=Response(),
70 |                                 message="Custom auth error")
71 |   assert str(error) == "\nCustom auth error"
72 | 


--------------------------------------------------------------------------------
/datacommons/examples/places.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #   http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API examples.
15 | 
16 | Basic demo for get_places_in
17 | """
18 | 
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 | 
23 | import datacommons as dc
24 | 
25 | 
26 | def main():
27 |   # Create a list of dcids for Santa Clara and Montgomery County.
28 |   sc, mc = 'geoId/06085', 'geoId/24031'
29 |   dcids = [sc, mc]
30 | 
31 |   # Get all CensusTracts in these two counties.
32 |   print('Get Census Tracts')
33 |   tracts = dc.get_places_in(dcids, 'CensusTract')
34 |   if sc in tracts:
35 |     print('> 10 CensusTracts in Santa Clara County')
36 |     for dcid in tracts[sc][:10]:
37 |       print('  - {}'.format(dcid))
38 |   if mc in tracts:
39 |     print('> 10 CensusTracts in Montgomery County')
40 |     for dcid in tracts[mc][:10]:
41 |       print('  - {}'.format(dcid))
42 | 
43 |   # Get place stats.
44 |   print('Get place stats -- all')
45 |   stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
46 |                        'dc/0hyp6tkn18vcb',
47 |                        obs_dates='all')
48 |   print(stats)
49 | 
50 |   print('Get place stats -- latest')
51 |   stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
52 |                        'dc/0hyp6tkn18vcb')
53 |   print(stats)
54 | 
55 |   print('Get place stats -- 2014')
56 |   stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
57 |                        'dc/0hyp6tkn18vcb',
58 |                        obs_dates=['2014'])
59 |   print(stats)
60 | 
61 |   print('Get place stats -- 2014 badly formatted')
62 |   stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
63 |                        'dc/0hyp6tkn18vcb',
64 |                        obs_dates='2014')
65 |   print(stats)
66 | 
67 |   print('Get place stats -- 2015-2016')
68 |   stats = dc.get_stats(['geoId/05', 'geoId/06', 'dc/madDcid'],
69 |                        'dc/0hyp6tkn18vcb',
70 |                        obs_dates=['2015', '2016'])
71 |   print(stats)
72 | 
73 |   # Get related places.
74 | 
75 | 
76 | # TODO(*): Fix the related places example.
77 | #  print('Get related places')
78 | #   related_places = dc.get_related_places(['geoId/06085'], 'Person', 'count',
79 | #       'CensusACS5yrSurvey', "measuredValue", {"gender": "Female"})
80 | #   print(related_places)
81 | 
82 | if __name__ == '__main__':
83 |   main()
84 | 


--------------------------------------------------------------------------------
/datacommons_client/utils/error_handling.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from requests import Response
 4 | 
 5 | 
 6 | class DataCommonsError(Exception):
 7 |   """Base exception for all Data Commons-related errors."""
 8 | 
 9 |   default_message = "An error occurred getting data from Data Commons API."
10 | 
11 |   def __init__(self, message: Optional[str] = None):
12 |     """Initializes a DataCommonsError with a default or custom message."""
13 |     super().__init__(message or self.default_message)
14 | 
15 | 
16 | class APIError(DataCommonsError):
17 |   """Represents an error interacting with Data Commons API."""
18 | 
19 |   default_message = "An API error occurred."
20 | 
21 |   def __init__(
22 |       self,
23 |       response: Optional[Response] = None,
24 |       message: Optional[str] = None,
25 |   ):
26 |     """Initializes an APIError.
27 | 
28 |         Args:
29 |             response (Optional[Response]): The response, if available.
30 |             message (Optional[str]): A descriptive error message.
31 |         """
32 |     super().__init__(message or self.default_message)
33 |     self.response = response
34 |     self.request = getattr(response, "request", None)
35 |     self.status_code = getattr(response, "status_code", None)
36 | 
37 |   def __str__(self) -> str:
38 |     """Returns a detailed string representation of the error.
39 | 
40 |         Returns:
41 |             str: A string describing the error, including the request URL if available.
42 |         """
43 | 
44 |     details = f"\n{self.args[0]}"
45 |     if self.status_code:
46 |       details += f"\nStatus Code: {self.status_code}"
47 |     if getattr(self.request, "url", None):
48 |       details += f"\nRequest URL: {self.request.url}"
49 |     if getattr(self.response, "text", None):
50 |       details += f"\nResponse: {self.response.text}"
51 | 
52 |     return details
53 | 
54 | 
55 | class DCConnectionError(APIError):
56 |   """Raised for network-related errors in the Data Commons API."""
57 | 
58 |   default_message = (
59 |       "A network error occurred while connecting to the Data Commons API.")
60 | 
61 | 
62 | class DCStatusError(APIError):
63 |   """Raised for non-2xx HTTP status code errors in the Data Commons API."""
64 | 
65 |   default_message = "The Data Commons API returned a non-2xx status code."
66 | 
67 | 
68 | class DCAuthenticationError(APIError):
69 |   """Raised for 401 Unauthorized errors in the Data Commons API."""
70 | 
71 |   default_message = "Authentication failed. Please check your API key."
72 | 
73 | 
74 | class InvalidDCInstanceError(DataCommonsError):
75 |   """Raised when an invalid Data Commons instance is provided."""
76 | 
77 |   default_message = "The specified Data Commons instance is invalid."
78 | 
79 | 
80 | class InvalidObservationSelectError(DataCommonsError):
81 |   """Raised when an invalid ObservationSelect field is provided."""
82 | 
83 |   default_message = "The ObservationSelect field is invalid."
84 | 
85 | 
86 | class NoDataForPropertyError(DataCommonsError):
87 |   """Raised when there is no data that meets the specified property filters."""
88 | 
89 |   default_message = "No available data for the specified property filters."
90 | 


--------------------------------------------------------------------------------
/run_test.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2020 Google LLC
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | set -e  # Immediately exit with failure if any command fails.
 17 | 
 18 | YAPF_STYLE='{based_on_style: google, indent_width: 2}'
 19 | FORMAT_INCLUDE_PATHS="datacommons/ datacommons_client/ datacommons_pandas/"
 20 | FORMAT_EXCLUDE_PATH="**/.env/**"
 21 | 
 22 | function setup_python {
 23 |   python3 -m pip install --upgrade pip hatch
 24 |   # here temporarily while there is an incompatibility with hatch and the newest click version
 25 |   # see https://github.com/pypa/hatch/pull/2051 for status updates from Hatch
 26 |   python3 -m pip uninstall uninstall click -y
 27 |   python3 -m pip install click==8.2.1
 28 |   hatch env create
 29 | }
 30 | 
 31 | function run_py_test {
 32 |   pytest -vv
 33 | }
 34 | 
 35 | function run_yapf {
 36 |   EXTRA_ARGS=$@
 37 |   yapf $EXTRA_ARGS --recursive --parallel --style="$YAPF_STYLE" \
 38 |     --exclude="$FORMAT_EXCLUDE_PATH" $FORMAT_INCLUDE_PATHS
 39 | }
 40 | 
 41 | function run_isort {
 42 |   EXTRA_ARGS=$@
 43 |   isort $EXTRA_ARGS --profile=google --skip-glob="$FORMAT_EXCLUDE_PATH" \
 44 |     $FORMAT_INCLUDE_PATHS
 45 | }
 46 | 
 47 | function run_lint_test {
 48 |   if ! run_yapf --diff; then
 49 |     echo "Fix lint errors by running: ./run_test.sh -f"
 50 |     exit 1
 51 |   fi
 52 |   if ! run_isort --check-only; then
 53 |     echo "Fix Python import sort orders by running ./run_test.sh -f"
 54 |     exit 1
 55 |   fi
 56 |   echo "Python style checks passed."
 57 | }
 58 | 
 59 | function run_lint_fix {
 60 |   run_yapf --in-place
 61 |   run_isort
 62 | }
 63 | 
 64 | function run_all_tests {
 65 |   run_py_test
 66 |   run_lint_test
 67 | }
 68 | 
 69 | function help {
 70 |   echo "Usage: $0 -asplf"
 71 |   echo "-a       Run all tests"
 72 |   echo "-s       Set up python environment"
 73 |   echo "-p       Run python tests"
 74 |   echo "-l       Run lint tests"
 75 |   echo "-f       Fix lint"
 76 |   exit 1
 77 | }
 78 | 
 79 | while getopts asplf OPTION; do
 80 |   case $OPTION in
 81 |     a)
 82 |         echo -e "### Running all tests"
 83 |         run_all_tests
 84 |         ;;
 85 |     s)
 86 |         echo -e "### Setting up python environment"
 87 |         setup_python
 88 |         ;;
 89 |     p)
 90 |         echo -e "### Running python tests"
 91 |         run_py_test
 92 |         ;;
 93 |     l)
 94 |         echo -e "### Running lint tests"
 95 |         run_lint_test
 96 |         ;;
 97 |     f)
 98 |         echo -e "### Fix lint errors"
 99 |         run_lint_fix
100 |         ;;
101 |     *)
102 |         help
103 |     esac
104 | done
105 | 
106 | if [ $OPTIND -eq 1 ]
107 | then
108 |   help
109 | fi
110 | 


--------------------------------------------------------------------------------
/datacommons/node.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ API to request node information.
15 | """
16 | 
17 | from typing import Dict, List
18 | 
19 | from datacommons.requests import _post
20 | from datacommons.utils import _get_arrow
21 | from datacommons.utils import _get_direction
22 | 
23 | 
24 | def properties(nodes: List[str], is_out: bool = True) -> Dict[str, List[str]]:
25 |   """Retrieves all the properties for a list of nodes.
26 | 
27 |   Note this only returns the property labels, not the values.
28 |   Args:
29 |       nodes: List of DCIDs.
30 |       is_out: Whether to return out going properties.
31 |   Returns:
32 |       A dict keyed by node DCID, with the values being a list of properties
33 |       for the queried node.
34 |   """
35 |   resp = _post('/v2/node', {'nodes': nodes, 'property': _get_arrow(is_out)})
36 |   result = {}
37 |   for node, item in resp.get('data', {}).items():
38 |     properties = item.get('properties', [])
39 |     result[node] = properties
40 |   return result
41 | 
42 | 
43 | def property_values(nodes: List[str],
44 |                     property: str,
45 |                     is_out: bool = True) -> Dict[str, List[str]]:
46 |   """Retrieves the property values for a list of nodes.
47 |   Args:
48 |       nodes: List of DCIDs.
49 |       property: The property label to query for.
50 |       is_out: Whether the property is out going.
51 |   Returns:
52 |       A dict keyed by node DCID, with the values being a list of values
53 |       for the queried property.
54 |   """
55 |   resp = _post(f'/v1/bulk/property/values/{_get_direction(is_out)}', {
56 |       'nodes': nodes,
57 |       'property': property,
58 |   })
59 |   result = {}
60 |   for item in resp.get('data', []):
61 |     node, values = item['node'], item.get('values', [])
62 |     result[node] = []
63 |     for v in values:
64 |       if 'dcid' in v:
65 |         result[node].append(v['dcid'])
66 |       else:
67 |         result[node].append(v['value'])
68 |   return result
69 | 
70 | 
71 | def triples(nodes: List[str],
72 |             is_out: bool = True) -> Dict[str, Dict[str, List[object]]]:
73 |   """Retrieves the triples for a node.
74 |   Args:
75 |       nodes: List of DCIDs.
76 |       is_out: Whether the returned property is out going for the queried
77 |           nodes.
78 |   Returns:
79 |       A two level dict keyed by node DCID, then by the arc property, with
80 |       a list of values or DCIDs.
81 |   """
82 |   resp = _post(f'/v1/bulk/triples/{_get_direction(is_out)}',
83 |                data={'nodes': nodes})
84 |   result = {}
85 |   for item in resp.get('data', []):
86 |     node, triples = item['node'], item.get('triples', {})
87 |     result[node] = {}
88 |     for property, other_nodes in triples.items():
89 |       result[node][property] = other_nodes.get('nodes', [])
90 |   return result
91 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/test_dataframes.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import MagicMock
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from datacommons_client.endpoints.node import NodeEndpoint
 6 | from datacommons_client.models.node import StatVarConstraint
 7 | from datacommons_client.models.node import StatVarConstraints
 8 | from datacommons_client.utils.dataframes import add_property_constraints_to_observations_dataframe
 9 | 
10 | 
11 | def test_add_property_constraints_to_observations_dataframe_adds_columns():
12 |   """Adds constraint id and name columns based on statvar metadata."""
13 |   # Input observations
14 |   df = pd.DataFrame([
15 |       {
16 |           "date": "2020",
17 |           "entity": "geo/1",
18 |           "variable": "sv/A",
19 |           "value": 10,
20 |           "unit": "Count",
21 |       },
22 |       {
23 |           "date": "2020",
24 |           "entity": "geo/2",
25 |           "variable": "sv/B",
26 |           "value": 20,
27 |           "unit": "Count",
28 |       },
29 |   ])
30 | 
31 |   endpoint = MagicMock(spec=NodeEndpoint)
32 | 
33 |   endpoint.fetch_statvar_constraints.return_value = StatVarConstraints.model_validate(
34 |       {
35 |           "sv/A": [
36 |               StatVarConstraint(
37 |                   constraintId="DevelopmentFinanceScheme",
38 |                   constraintName="Development Finance Scheme",
39 |                   valueId="ODAGrants",
40 |                   valueName="Official Development Assistance Grants",
41 |               ),
42 |               StatVarConstraint(
43 |                   constraintId="DevelopmentFinanceRecipient",
44 |                   constraintName="Development Finance Recipient",
45 |                   valueId="country/GTM",
46 |                   valueName="Guatemala",
47 |               ),
48 |           ],
49 |           "sv/B": [
50 |               StatVarConstraint(
51 |                   constraintId="sex",
52 |                   constraintName="Sex",
53 |                   valueId="Female",
54 |                   valueName="Female",
55 |               )
56 |           ],
57 |       })
58 | 
59 |   out = add_property_constraints_to_observations_dataframe(endpoint=endpoint,
60 |                                                            observations_df=df)
61 | 
62 |   # Columns for constraints should be present and filled per variable
63 |   assert "DevelopmentFinanceScheme" in out.columns
64 |   assert "DevelopmentFinanceScheme_name" in out.columns
65 |   assert ("DevelopmentFinanceRecipient" in out.columns and
66 |           "DevelopmentFinanceRecipient_name" in out.columns)
67 |   assert "sex" in out.columns and "sex_name" in out.columns
68 | 
69 |   # Row-wise checks
70 |   row_a = out[out["variable"] == "sv/A"].iloc[0]
71 |   assert row_a["DevelopmentFinanceScheme"] == "ODAGrants"
72 |   assert row_a[
73 |       "DevelopmentFinanceScheme_name"] == "Official Development Assistance Grants"
74 |   assert row_a["DevelopmentFinanceRecipient"] == "country/GTM"
75 |   assert row_a["DevelopmentFinanceRecipient_name"] == "Guatemala"
76 | 
77 |   row_b = out[out["variable"] == "sv/B"].iloc[0]
78 |   assert row_b["sex"] == "Female"
79 |   assert row_b["sex_name"] == "Female"
80 | 
81 | 
82 | def test_add_property_constraints_to_observations_dataframe_empty():
83 |   """Empty DataFrame returns unchanged."""
84 |   endpoint = MagicMock(spec=NodeEndpoint)
85 |   empty_df = pd.DataFrame([])
86 |   out = add_property_constraints_to_observations_dataframe(
87 |       endpoint=endpoint, observations_df=empty_df)
88 |   assert out.empty
89 | 


--------------------------------------------------------------------------------
/datacommons_client/utils/dataframes.py:
--------------------------------------------------------------------------------
 1 | from datacommons_client.endpoints.node import NodeEndpoint
 2 | from datacommons_client.utils.data_processing import flatten_names_dictionary
 3 | 
 4 | try:
 5 |   import pandas as pd
 6 | except ImportError:
 7 |   pd = None
 8 | 
 9 | from datacommons_client.utils.decorators import requires_pandas
10 | 
11 | 
12 | @requires_pandas
13 | def add_entity_names_to_observations_dataframe(
14 |     endpoint: NodeEndpoint,
15 |     observations_df: "pd.DataFrame",  # type: ignore[reportInvalidTypeForm]
16 |     entity_columns: str | list[str],
17 | ) -> "pd.DataFrame":  # type: ignore[reportInvalidTypeForm]
18 |   """
19 |     Adds entity names to the observations DataFrame.
20 | 
21 |     Args:
22 |         endpoint (NodeEndpoint): The NodeEndpoint instance for fetching entity names.
23 |         observations_df (dict): The DataFrame containing observations.
24 |         entity_columns (str | list[str]): The column(s) containing entity DCIDs.
25 |     """
26 | 
27 |   # Guard against empty DataFrame
28 |   if observations_df.empty:
29 |     return observations_df
30 | 
31 |   if not isinstance(entity_columns, list):
32 |     entity_columns = [entity_columns]
33 | 
34 |   for entity_column in entity_columns:
35 |     if entity_column not in observations_df.columns:
36 |       raise ValueError(
37 |           "The specified entity column does not exist in the DataFrame.")
38 | 
39 |     # Get unique entity DCIDs from the DataFrame
40 |     unique_values = observations_df[entity_column].dropna().unique().tolist()
41 | 
42 |     # Guard against empty unique values
43 |     if not unique_values:
44 |       continue
45 | 
46 |     # Fetch entity names from the endpoint
47 |     response = endpoint.fetch_entity_names(entity_dcids=unique_values)
48 | 
49 |     # Flatten the response to get a dictionary of names
50 |     names = flatten_names_dictionary(response)
51 | 
52 |     # Insert the names into a column next to the entity column
53 |     name_column = f"{entity_column}_name"
54 |     if name_column not in observations_df.columns:
55 |       observations_df.insert(
56 |           loc=observations_df.columns.get_loc(entity_column) + 1,
57 |           column=name_column,
58 |           value=observations_df[entity_column].map(names),
59 |       )
60 | 
61 |   return observations_df
62 | 
63 | 
64 | @requires_pandas
65 | def add_property_constraints_to_observations_dataframe(
66 |     endpoint: NodeEndpoint,
67 |     observations_df: "pd.DataFrame",  # type: ignore[reportInvalidTypeForm]
68 | ) -> "pd.DataFrame":  # type: ignore[reportInvalidTypeForm]
69 |   """
70 |     Adds property constraint dcids and names to the observations DataFrame.
71 | 
72 |     Args:
73 |         endpoint (NodeEndpoint): The NodeEndpoint instance for fetching entity names.
74 |         observations_df (dict): The DataFrame containing observations.
75 |     """
76 | 
77 |   # Guard against empty DataFrame
78 |   if observations_df.empty:
79 |     return observations_df
80 | 
81 |   # Get constraints
82 |   constraints_data = endpoint.fetch_statvar_constraints(
83 |       variable_dcids=observations_df.variable.unique().tolist())
84 | 
85 |   for statvar, constraints in constraints_data.items():
86 |     for constraint in constraints:
87 |       # Fill the columns with the corresponding values
88 |       observations_df.loc[observations_df.variable == statvar,
89 |                           constraint.constraintId] = constraint.valueId
90 | 
91 |       observations_df.loc[observations_df.variable == statvar,
92 |                           constraint.constraintId +
93 |                           "_name"] = constraint.valueName
94 | 
95 |   return observations_df
96 | 


--------------------------------------------------------------------------------
/datacommons/test/sparql_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """ Data Commons Python API unit tests.
 15 | 
 16 | Unit tests for the SPARQL query wrapper.
 17 | """
 18 | 
 19 | import unittest
 20 | from unittest.mock import patch
 21 | 
 22 | import datacommons
 23 | 
 24 | _QUERY1 = ('''
 25 | SELECT  ?name ?dcid
 26 | WHERE {
 27 |   ?a typeOf Place .
 28 |   ?a name ?name .
 29 |   ?a dcid ("geoId/06" "geoId/21" "geoId/24") .
 30 |   ?a dcid ?dcid
 31 | }
 32 | ''')
 33 | 
 34 | _QUERY2 = ('''
 35 | SELECT  ?name ?dcid
 36 | WHERE {
 37 |   ?a typeOf Place .
 38 |   ?a name ?name .
 39 |   ?a dcid ("geoId/DNE") .
 40 |   ?a dcid ?dcid
 41 | }
 42 | ''')
 43 | 
 44 | 
 45 | def _post_mock(path, data):
 46 |   """ A mock function for _post. """
 47 |   if path == "/query" and data['sparql'] == _QUERY1:
 48 |     return {
 49 |         'header': ['?name', '?dcid'],
 50 |         'rows': [{
 51 |             'cells': [{
 52 |                 'value': 'California'
 53 |             }, {
 54 |                 'value': 'geoId/06'
 55 |             }]
 56 |         }, {
 57 |             'cells': [{
 58 |                 'value': 'Kentucky'
 59 |             }, {
 60 |                 'value': 'geoId/21'
 61 |             }]
 62 |         }, {
 63 |             'cells': [{
 64 |                 'value': 'Maryland'
 65 |             }, {
 66 |                 'value': 'geoId/24'
 67 |             }]
 68 |         }]
 69 |     }
 70 |   if path == "/query" and data['sparql'] == _QUERY2:
 71 |     return {
 72 |         'header': ['?name', '?dcid'],
 73 |     }
 74 | 
 75 |   # Otherwise, return an empty response and a 404.
 76 |   return Exception('mock exception')
 77 | 
 78 | 
 79 | class TestQuery(unittest.TestCase):
 80 |   """ Unit tests for the Query object. """
 81 | 
 82 |   @patch('datacommons.sparql._post')
 83 |   def test_rows(self, _post):
 84 |     """ Sending a valid query returns the correct response. """
 85 |     _post.side_effect = _post_mock
 86 |     # Create the SPARQL query
 87 |     selector = lambda row: row['?name'] != 'California'
 88 |     # Issue the query
 89 |     results = datacommons.query(_QUERY1)
 90 |     selected_results = datacommons.query(_QUERY2, select=selector)
 91 |     # Execute the query and iterate through the results.
 92 |     for idx, row in enumerate(results):
 93 |       if idx == 0:
 94 |         self.assertDictEqual(row, {'?name': 'California', '?dcid': 'geoId/06'})
 95 |       if idx == 1:
 96 |         self.assertDictEqual(row, {'?name': 'Kentucky', '?dcid': 'geoId/21'})
 97 |       if idx == 2:
 98 |         self.assertDictEqual(row, {'?name': 'Maryland', '?dcid': 'geoId/24'})
 99 | 
100 |     # Verify that the select function works.
101 |     for idx, row in enumerate(selected_results):
102 |       if idx == 0:
103 |         self.assertDictEqual(row, {'?name': 'Kentucky', '?dcid': 'geoId/21'})
104 |       if idx == 1:
105 |         self.assertDictEqual(row, {'?name': 'Maryland', '?dcid': 'geoId/24'})
106 | 
107 |   @patch('datacommons.sparql._post')
108 |   def test_no_rows(self, _post):
109 |     """ Handles row-less response. """
110 |     _post.side_effect = _post_mock
111 |     # Issue the query
112 |     self.assertEqual(datacommons.query(_QUERY2), [])
113 | 
114 | 
115 | if __name__ == '__main__':
116 |   unittest.main()
117 | 


--------------------------------------------------------------------------------
/datacommons_client/models/node.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from pydantic import Field
  4 | 
  5 | from datacommons_client.models.base import ArcLabel
  6 | from datacommons_client.models.base import BaseDCModel
  7 | from datacommons_client.models.base import DictLikeRootModel
  8 | from datacommons_client.models.base import ListLikeRootModel
  9 | from datacommons_client.models.base import NodeDCID
 10 | from datacommons_client.models.base import Property
 11 | from datacommons_client.models.base import PropertyList
 12 | 
 13 | 
 14 | class Node(BaseDCModel):
 15 |   """Represents an individual node in the Data Commons knowledge graph.
 16 | 
 17 |     Attributes:
 18 |         dcid: The unique identifier for the node.
 19 |         name: The name of the node.
 20 |         provenanceId: The provenance ID for the node.
 21 |         types: The types associated with the node.
 22 |         value: The value of the node.
 23 |     """
 24 |   dcid: Optional[str] = None
 25 |   name: Optional[str] = None
 26 |   provenanceId: Optional[str | list[str]] = None
 27 |   types: Optional[list[str]] = None
 28 |   value: Optional[str] = None
 29 | 
 30 | 
 31 | class Name(BaseDCModel):
 32 |   """Represents a name associated with an Entity (node).
 33 | 
 34 |     Attributes:
 35 |         value: The name of the Entity
 36 |         language: The language of the name
 37 |         property: The property used to get the name
 38 |     """
 39 | 
 40 |   value: str
 41 |   language: str
 42 |   property: str
 43 | 
 44 | 
 45 | class NodeGroup(BaseDCModel):
 46 |   """Represents a group of nodes in the Data Commons knowledge graph.
 47 | 
 48 |     Attributes:
 49 |         nodes: A list of Node objects in the group.
 50 |     """
 51 | 
 52 |   nodes: list[Node] = Field(default_factory=list)
 53 | 
 54 | 
 55 | class Arcs(BaseDCModel):
 56 |   """Represents arcs in the Data Commons knowledge graph.
 57 | 
 58 |     Attributes:
 59 |         arcs: A dictionary mapping arc labels to NodeGroup objects.
 60 |     """
 61 | 
 62 |   arcs: dict[ArcLabel, NodeGroup] = Field(default_factory=dict)
 63 | 
 64 | 
 65 | class Properties(BaseDCModel):
 66 |   """Represents a group of properties in the Data Commons knowledge graph.
 67 | 
 68 |     Attributes:
 69 |         properties: A list of property strings.
 70 |     """
 71 | 
 72 |   properties: Optional[PropertyList] = None
 73 | 
 74 | 
 75 | class FlattenedPropertiesMapping(BaseDCModel,
 76 |                                  DictLikeRootModel[dict[NodeDCID,
 77 |                                                         PropertyList]]):
 78 |   """A model to represent a mapping of node DCIDs to their properties."""
 79 | 
 80 | 
 81 | class FlattenedArcsMapping(BaseDCModel,
 82 |                            DictLikeRootModel[dict[NodeDCID, dict[Property,
 83 |                                                                  list[Node]]]]):
 84 |   """A model to represent a mapping of node DCIDs to their arcs."""
 85 | 
 86 | 
 87 | class NodeList(BaseDCModel, ListLikeRootModel[list[Node]]):
 88 |   """A root model whose value is a list of Node objects."""
 89 | 
 90 | 
 91 | class NodeDCIDList(BaseDCModel, ListLikeRootModel[list[NodeDCID]]):
 92 |   """A root model whose value is a list of NodeDCID strings."""
 93 | 
 94 | 
 95 | class StatVarConstraint(BaseDCModel):
 96 |   """Represents a constraint for a statistical variable."""
 97 | 
 98 |   constraintId: NodeDCID
 99 |   constraintName: Optional[str] = None
100 |   valueId: NodeDCID
101 |   valueName: Optional[str] = None
102 | 
103 | 
104 | class StatVarConstraints(BaseDCModel,
105 |                          DictLikeRootModel[dict[NodeDCID,
106 |                                                 list[StatVarConstraint]]]):
107 |   """A root model whose value is a dictionary of statvar ids - a list of StatVarConstraint objects.
108 |     This model is used to represent constraints associated with statistical variables.
109 |     """
110 | 


--------------------------------------------------------------------------------
/datacommons/sparql.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 Google Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """ Data Commons Python API Query Module.
15 | 
16 | Implements functions for sending graph queries to the Data Commons Graph.
17 | """
18 | 
19 | from datacommons.requests import _post
20 | 
21 | 
22 | def query(query_string, select=None):
23 |   """ Returns the results of executing a SPARQL query on the Data Commons graph.
24 | 
25 |   Args:
26 |     query_string (:obj:`str`): The SPARQL query string.
27 |     select (:obj:`func` accepting a row of the query result): A function that
28 |       selects rows to be returned by :code:`query`. This function accepts a row
29 |       on the results of executing :code:`query_string` and returns True if and
30 |       only if the row is to be returned by :code:`query`. The row passed in as
31 |       an argument is represented as a :obj:`dict` that maps a query variable in
32 |       :code:`query_string` to its value in the given row.
33 | 
34 |   Returns:
35 |     A table, represented as a :obj:`list` of rows, resulting from executing the
36 |     given SPARQL query. Each row is a :obj:`dict` mapping query variable to its
37 |     value in the row. If `select` is not `None`, then a row is included in the
38 |     returned :obj:`list` if and only if `select` returns :obj:`True` for that
39 |     row.
40 | 
41 |   Raises:
42 |     ValueError: If the payload returned by the Data Commons REST API is
43 |       malformed.
44 | 
45 |   Examples:
46 |     We would like to query for the name associated with three states identified
47 |     by their dcids
48 |     `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_,
49 |     `Kentucky <https://browser.datacommons.org/kg?dcid=geoId/21>`_, and
50 |     `Maryland <https://browser.datacommons.org/kg?dcid=geoId/24>`_.
51 | 
52 |     >>> query_str = '''
53 |     ... SELECT ?name ?dcid
54 |     ... WHERE {
55 |     ...   ?a typeOf Place .
56 |     ...   ?a name ?name .
57 |     ...   ?a dcid ("geoId/06" "geoId/21" "geoId/24") .
58 |     ...   ?a dcid ?dcid
59 |     ... }
60 |     ... '''
61 |     >>> result = query(query_str)
62 |     >>> for r in result:
63 |     ...   print(r)
64 |     {"?name": "Maryland", "?dcid": "geoId/24"}
65 |     {"?name": "Kentucky", "?dcid": "geoId/21"}
66 |     {"?name": "California", "?dcid": "geoId/06"}
67 | 
68 |     Optionally, we can specify which rows are returned by setting :code:`select`
69 |     like so. The following returns all rows where the name is "Maryland".
70 | 
71 |     >>> selector = lambda row: row['?name'] == 'Maryland'
72 |     >>> result = query(query_str, select=selector)
73 |     >>> for r in result:
74 |     ...   print(r)
75 |     {"?name": "Maryland", "?dcid": "geoId/24"}
76 |   """
77 |   resp = _post('/query', {'sparql': query_string})
78 |   # Iterate through the query results
79 |   header = resp.get('header')
80 |   if header is None:
81 |     raise ValueError('Ill-formatted response: does not contain a header.')
82 |   result_rows = []
83 |   for row in resp.get('rows', []):
84 |     # Construct the map from query variable to cell value.
85 |     row_map = {}
86 |     for idx, cell in enumerate(row.get('cells', [])):
87 |       if idx > len(header):
88 |         raise ValueError('Query error: unexpected cell {}'.format(cell))
89 |       if 'value' not in cell:
90 |         raise ValueError('Query error: cell missing value {}'.format(cell))
91 |       cell_var = header[idx]
92 |       row_map[cell_var] = cell['value']
93 |     # Add the row to the result rows if it is selected
94 |     if select is None or select(row_map):
95 |       result_rows.append(row_map)
96 |   return result_rows
97 | 


--------------------------------------------------------------------------------
/datacommons_client/models/base.py:
--------------------------------------------------------------------------------
  1 | from collections.abc import Mapping, MutableSequence
  2 | from pprint import pformat
  3 | from typing import Annotated, Any, Iterable, Optional, TypeAlias
  4 | 
  5 | from pydantic import BaseModel
  6 | from pydantic import BeforeValidator
  7 | from pydantic import ConfigDict
  8 | from pydantic import RootModel
  9 | 
 10 | 
 11 | def listify(v: Any) -> list[str]:
 12 |   if isinstance(v, (str, bytes)):
 13 |     return [v]
 14 |   if not isinstance(v, Iterable):
 15 |     return [v]
 16 |   return list(v)
 17 | 
 18 | 
 19 | variableDCID: TypeAlias = str
 20 | entityDCID: TypeAlias = str
 21 | facetID: TypeAlias = str
 22 | ListOrStr = Annotated[list[str] | str, BeforeValidator(listify)]
 23 | NextToken: TypeAlias = Optional[str]
 24 | NodeDCID: TypeAlias = str
 25 | ArcLabel: TypeAlias = str
 26 | Property: TypeAlias = str
 27 | PropertyList: TypeAlias = list[Property]
 28 | Query: TypeAlias = str
 29 | DominantType: TypeAlias = str
 30 | 
 31 | 
 32 | class BaseDCModel(BaseModel):
 33 |   """Provides serialization methods for the Pydantic models used by the client."""
 34 | 
 35 |   model_config = ConfigDict(validate_by_name=True,
 36 |                             validate_default=True,
 37 |                             validate_by_alias=True,
 38 |                             use_enum_values=True,
 39 |                             serialize_by_alias=True)
 40 | 
 41 |   def __str__(self) -> str:
 42 |     """Returns a string representation of the instance."""
 43 |     return self.to_json()
 44 | 
 45 |   def to_dict(self, exclude_none: bool = True) -> dict[str, Any]:
 46 |     """Converts the instance to a dictionary.
 47 | 
 48 |         Args:
 49 |             exclude_none: If True, only include non-empty values in the response.
 50 | 
 51 |         Returns:
 52 |             Dict[str, Any]: The dictionary representation of the instance.
 53 |         """
 54 | 
 55 |     return self.model_dump(mode="python", exclude_none=exclude_none)
 56 | 
 57 |   def to_json(self, exclude_none: bool = True) -> str:
 58 |     """Converts the instance to a JSON string.
 59 | 
 60 |         Args:
 61 |             exclude_none: If True, only include non-empty values in the response.
 62 | 
 63 |         Returns:
 64 |             str: The JSON string representation of the instance.
 65 |         """
 66 |     return self.model_dump_json(exclude_none=exclude_none, indent=2)
 67 | 
 68 | 
 69 | class DictLikeRootModel(RootModel, Mapping):
 70 |   """A base class for models that can be treated as dictionaries."""
 71 | 
 72 |   def __repr__(self) -> str:
 73 |     return f"{self.__class__.__name__}({self.root})"
 74 | 
 75 |   def __str__(self) -> str:
 76 |     return pformat(self.root, compact=True, width=80)
 77 | 
 78 |   def __getitem__(self, key: str) -> Any:
 79 |     return self.root[key]
 80 | 
 81 |   def __iter__(self) -> Iterable[Any]:
 82 |     return iter(self.root)
 83 | 
 84 |   def __len__(self) -> int:
 85 |     return len(self.root)
 86 | 
 87 |   def __eq__(self, other: Any) -> bool:
 88 |     if isinstance(other, DictLikeRootModel):
 89 |       return self.root == other.root
 90 |     else:
 91 |       return self.root == other
 92 | 
 93 | 
 94 | class ListLikeRootModel(MutableSequence, RootModel):
 95 |   """A base class for models that can be treated as lists."""
 96 | 
 97 |   def __repr__(self) -> str:
 98 |     return f"{self.__class__.__name__}({self.root})"
 99 | 
100 |   def __str__(self) -> str:
101 |     return pformat(self.root, compact=True, width=80)
102 | 
103 |   def __getitem__(self, index: int) -> Any:
104 |     return self.root[index]
105 | 
106 |   def __setitem__(self, index: int, value: Any) -> None:
107 |     self.root[index] = value
108 | 
109 |   def __delitem__(self, index: int) -> None:
110 |     del self.root[index]
111 | 
112 |   def __len__(self) -> int:
113 |     return len(self.root)
114 | 
115 |   def __eq__(self, other: Any) -> bool:
116 |     if isinstance(other, ListLikeRootModel):
117 |       return self.root == other.root
118 |     else:
119 |       return self.root == other
120 | 
121 |   def insert(self, index: int, item: Any) -> None:
122 |     """Inserts an item at a specified index in the root list."""
123 |     self.root.insert(index, item)
124 | 


--------------------------------------------------------------------------------
/datacommons/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """ Data Commons Utilities Library.
 15 | 
 16 | Various functions that can aid in the extension of the Data Commons API.
 17 | """
 18 | 
 19 | from __future__ import absolute_import
 20 | from __future__ import division
 21 | from __future__ import print_function
 22 | 
 23 | import base64
 24 | from collections import defaultdict
 25 | import json
 26 | import os
 27 | import zlib
 28 | 
 29 | import six.moves.urllib.error
 30 | import six.moves.urllib.request
 31 | 
 32 | # --------------------------------- CONSTANTS ---------------------------------
 33 | 
 34 | # REST API endpoint root
 35 | _API_ROOT = "https://api.datacommons.org"
 36 | 
 37 | # REST API endpoint paths
 38 | _API_ENDPOINTS = {
 39 |     'query': '/query',
 40 |     'get_property_labels': '/node/property-labels',
 41 |     'get_property_values': '/node/property-values',
 42 |     'get_triples': '/node/triples',
 43 |     'get_places_in': '/node/places-in',
 44 |     'get_related_places': '/node/related-places',
 45 |     'get_stats': '/bulk/stats',
 46 |     'get_stat_value': '/stat/value',
 47 |     'get_stat_series': '/stat/series',
 48 |     'get_stat_all': '/stat/all',
 49 | }
 50 | 
 51 | # The default value to limit to
 52 | _MAX_LIMIT = 100
 53 | 
 54 | # Batch size for heavyweight queries.
 55 | _QUERY_BATCH_SIZE = 500
 56 | 
 57 | # Environment variable names used by the package
 58 | _ENV_VAR_API_KEY = 'DC_API_KEY'
 59 | 
 60 | # ------------------------- INTERNAL HELPER FUNCTIONS -------------------------
 61 | 
 62 | 
 63 | def _send_request(req_url,
 64 |                   req_json={},
 65 |                   compress=False,
 66 |                   post=True,
 67 |                   use_payload=True):
 68 |   """ Sends a POST/GET request to req_url with req_json, default to POST.
 69 | 
 70 |   Returns:
 71 |     The payload returned by sending the POST/GET request formatted as a dict.
 72 |   """
 73 |   headers = {'Content-Type': 'application/json'}
 74 | 
 75 |   # Pass along API key if provided
 76 |   if os.environ.get(_ENV_VAR_API_KEY):
 77 |     headers['x-api-key'] = os.environ[_ENV_VAR_API_KEY]
 78 | 
 79 |   # Send the request and verify the request succeeded
 80 |   if post:
 81 |     req = six.moves.urllib.request.Request(
 82 |         req_url, data=json.dumps(req_json).encode('utf-8'), headers=headers)
 83 |   else:
 84 |     req = six.moves.urllib.request.Request(req_url, headers=headers)
 85 |   try:
 86 |     res = six.moves.urllib.request.urlopen(req)
 87 |   except six.moves.urllib.error.HTTPError as e:
 88 |     raise ValueError(
 89 |         'Response error: An HTTP {} code was returned by the REST API. '
 90 |         'Printing response\n\n{}'.format(e.code, e.read()))
 91 |   if isinstance(res, six.moves.urllib.error.HTTPError):
 92 |     raise ValueError(
 93 |         'Response error: An HTTP {} code was returned by the REST API. '
 94 |         'Printing response\n\n{}'.format(res.code, res.reason))
 95 |   # Get the JSON
 96 |   res_json = json.loads(res.read())
 97 |   if not use_payload:
 98 |     return res_json
 99 |   if 'payload' not in res_json:
100 |     raise ValueError('Response error: Payload not found. Printing response\n\n'
101 |                      '{}'.format(res.text))
102 | 
103 |   # If the payload is compressed, decompress and decode it
104 |   payload = res_json['payload']
105 |   if compress:
106 |     payload = zlib.decompress(base64.b64decode(payload), zlib.MAX_WBITS | 32)
107 |   return json.loads(payload)
108 | 
109 | 
110 | def _format_expand_payload(payload, new_key, must_exist=[]):
111 |   """ Formats expand type payloads into dicts from dcids to lists of values. """
112 |   # Create the results dictionary from payload
113 |   results = defaultdict(set)
114 |   for entry in payload:
115 |     if 'dcid' in entry and new_key in entry:
116 |       dcid = entry['dcid']
117 |       results[dcid].add(entry[new_key])
118 | 
119 |   # Ensure all dcids in must_exist have some entry in results.
120 |   for dcid in must_exist:
121 |     results[dcid]
122 |   return {k: sorted(list(v)) for k, v in results.items()}
123 | 
124 | 
125 | def _get_direction(out: bool):
126 |   return "out" if out else "in"
127 | 
128 | 
129 | def _get_arrow(out: bool):
130 |   """Returns the arrow syntax for an arc direction.
131 | 
132 |   Args:
133 |       out: Whether the arc direction is out.
134 |   Returns:
135 |       The corresponding arrow syntax.
136 |   """
137 |   return "->" if out else "<-"
138 | 


--------------------------------------------------------------------------------
/datacommons_pandas/examples/df_builder.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Basic examples for building pandas objects using the Data Commons Pandas API."""
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import datacommons_pandas as dcpd
 21 | 
 22 | 
 23 | def build_time_series_example():
 24 | 
 25 |   print("""
 26 | # Build a pd.Series of time series for one variable and one place.
 27 | $ dcpd.build_time_series('country/CAN', 'Count_WildlandFireEvent')
 28 | {}""".format(dcpd.build_time_series('country/CAN', 'Count_WildlandFireEvent')))
 29 | 
 30 |   print("""
 31 | # Build a pd.Series of time series for one variable and one place and optional args.
 32 | $ dcpd.build_time_series('country/USA', 'Count_Person', 'CensusPEPSurvey')
 33 | {}""".format(
 34 |       dcpd.build_time_series('country/USA', 'Count_Person', 'CensusPEPSurvey')))
 35 | 
 36 | 
 37 | def build_time_series_dataframe_example():
 38 | 
 39 |   def demonstrate_build_time_series_dataframe(intro_str,
 40 |                                               places,
 41 |                                               stat_var,
 42 |                                               desc_col=False):
 43 |     arg_str = "{}, '{}'".format(places, stat_var)
 44 |     if desc_col:
 45 |       arg_str += ", desc_col=True"
 46 |     print("""
 47 |     # {}
 48 |     $ dcpd.build_time_series_dataframe({})
 49 |     {}""".format(intro_str, arg_str,
 50 |                  dcpd.build_time_series_dataframe(places, stat_var, desc_col)))
 51 | 
 52 |   build_time_series_dataframe_params = [{
 53 |       'intro_str':
 54 |           'Build a DataFrame of time series for one variable in multiple places.',
 55 |       'places': ['geoId/33', 'geoId/29', 'country/USA'],
 56 |       'stat_var':
 57 |           'Median_Income_Person'
 58 |   }, {
 59 |       'intro_str':
 60 |           'Build a DataFrame of time series with columns sorted in descending order.',
 61 |       'places': ['country/USA'],
 62 |       'stat_var':
 63 |           'Median_Income_Person',
 64 |       'desc_col':
 65 |           True
 66 |   }]
 67 | 
 68 |   for param_set in build_time_series_dataframe_params:
 69 |     demonstrate_build_time_series_dataframe(**param_set)
 70 | 
 71 | 
 72 | def build_multivariate_dataframe_example():
 73 | 
 74 |   def demonstrate_build_multivariate_dataframe(intro_str, places, stat_vars):
 75 |     print("""
 76 |     # {}
 77 |     $ dcpd.build_multivariate_dataframe({}, {})
 78 |     {}""".format(intro_str, places, stat_vars,
 79 |                  dcpd.build_multivariate_dataframe(places, stat_vars)))
 80 | 
 81 |   build_multivariate_dataframe_params = [{
 82 |       'intro_str':
 83 |           'Build a DataFrame of latest observations for multiple variables in multiple places.',
 84 |       'places': ['geoId/06', 'country/FRA'],
 85 |       'stat_vars': ['Median_Age_Person', 'Count_Person', 'Count_Household']
 86 |   }]
 87 | 
 88 |   for param_set in build_multivariate_dataframe_params:
 89 |     demonstrate_build_multivariate_dataframe(**param_set)
 90 | 
 91 | 
 92 | def expect_err_examples():
 93 | 
 94 |   print("\n\nExpect 6 errors, starting HERE:")
 95 |   try:
 96 |     dcpd.build_time_series_dataframe(['geoId/33'],
 97 |                                      ['Median_Income_Person', 'Count_Person'])
 98 |   except ValueError as e:
 99 |     print("Successfully errored on: ", e)
100 |   try:
101 |     dcpd.build_time_series_dataframe(24, ['Median_Income_Person'])
102 |   except ValueError as e:
103 |     print("Successfully errored on: ", e)
104 |   try:
105 |     dcpd.build_multivariate_dataframe([3],
106 |                                       ['Median_Income_Person', 'Count_Person'])
107 |   except ValueError as e:
108 |     print("Successfully errored on: ", e)
109 |   try:
110 |     dcpd.build_multivariate_dataframe('country/USA', True)
111 |   except ValueError as e:
112 |     print("Successfully errored on: ", e)
113 |   # If the following two do not error due to the addition of
114 |   # Median_Income_Person statistics for NUTS geos, then please
115 |   # replace either the places or the StatVar.
116 |   try:
117 |     dcpd.build_time_series_dataframe(['nuts/HU2', 'nuts/HU22'],
118 |                                      'Median_Income_Person')
119 |   except ValueError as e:
120 |     print("Successfully errored on: ", e)
121 |   try:
122 |     dcpd.build_multivariate_dataframe(['nuts/HU2', 'nuts/HU22'],
123 |                                       ['Median_Income_Person'])
124 |   except ValueError as e:
125 |     print("Successfully errored on: ", e)
126 |   print("until HERE.")
127 | 
128 | 
129 | def main():
130 |   build_time_series_example()
131 |   build_time_series_dataframe_example()
132 |   build_multivariate_dataframe_example()
133 |   expect_err_examples()
134 | 
135 | 
136 | if __name__ == '__main__':
137 |   main()
138 | 


--------------------------------------------------------------------------------
/datacommons_client/endpoints/payloads.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from pydantic import Field
  4 | from pydantic import field_serializer
  5 | from pydantic import field_validator
  6 | from pydantic import model_serializer
  7 | from pydantic import model_validator
  8 | 
  9 | from datacommons_client.models.base import BaseDCModel
 10 | from datacommons_client.models.base import ListOrStr
 11 | from datacommons_client.models.observation import ObservationDate
 12 | from datacommons_client.models.observation import ObservationSelect
 13 | from datacommons_client.models.observation import ObservationSelectList
 14 | 
 15 | 
 16 | def normalize_list_to_string(value: str | list[str]) -> str:
 17 |   """Converts a list of properties to a string."""
 18 | 
 19 |   if isinstance(value, list):
 20 |     return f"[{', '.join(value)}]"
 21 | 
 22 |   return value
 23 | 
 24 | 
 25 | class NodeRequestPayload(BaseDCModel):
 26 |   """
 27 |     A Pydantic model to structure, normalize, and validate the payload for a Node V2 API request.
 28 | 
 29 |     Attributes:
 30 |         node_dcids (str | list[str]): The DCID(s) of the nodes to query.
 31 |         expression (str): The property or relation expression(s) to query.
 32 |     """
 33 | 
 34 |   node_dcids: ListOrStr = Field(..., serialization_alias="nodes")
 35 |   expression: list | str = Field(..., serialization_alias="property")
 36 | 
 37 | 
 38 | class ObservationRequestPayload(BaseDCModel):
 39 |   """
 40 |     A Pydantic model to structure, normalize, and validate the payload for an Observation V2 API request.
 41 | 
 42 |     Attributes:
 43 |         date (str): The date for which data is being requested.
 44 |         variable_dcids (str | list[str]): One or more variable IDs for the data.
 45 |         select (list[ObservationSelect]): Fields to include in the response.
 46 |           Defaults to ["date", "variable", "entity", "value"].
 47 |         entity_dcids (Optional[str | list[str]]): One or more entity IDs to filter the data.
 48 |         entity_expression (Optional[str]): A string expression to filter entities.
 49 |         filter_facet_domains (Optional[str | list[str]]): One or more domain names to filter the data.
 50 |         filter_facet_ids (Optional[str | list[str]]): One or more facet IDs to filter the data.
 51 |     """
 52 | 
 53 |   date: ObservationDate | str = Field(default_factory=str,
 54 |                                       validate_default=True)
 55 |   variable_dcids: Optional[ListOrStr] = Field(default=None,
 56 |                                               serialization_alias="variable")
 57 |   select: Optional[list[str]] = None
 58 |   entity_dcids: Optional[ListOrStr] = None
 59 |   entity_expression: Optional[str | list[str]] = None
 60 |   filter_facet_domains: Optional[ListOrStr] = None
 61 |   filter_facet_ids: Optional[ListOrStr] = None
 62 | 
 63 |   @field_validator("date", mode="before")
 64 |   def _validate_date(cls, v):
 65 |     try:
 66 |       return ObservationDate(v)
 67 |     except ValueError:
 68 |       return v
 69 | 
 70 |   @field_validator("select", mode="before")
 71 |   def _coerce_select(cls, v):
 72 |     return ObservationSelectList.model_validate(v).select
 73 | 
 74 |   @field_validator("entity_expression", mode="before")
 75 |   def _coerce_expr(cls, v):
 76 |     if v is None:
 77 |       return v
 78 |     if isinstance(v, list):
 79 |       return normalize_list_to_string(v)
 80 |     if isinstance(v, str):
 81 |       return v
 82 |     raise TypeError("expression must be a string or list[str]")
 83 | 
 84 |   @field_serializer("variable_dcids", "entity_dcids", when_used="unless-none")
 85 |   def _serialise_dcids_fields(self, v):
 86 |     return {"dcids": v}
 87 | 
 88 |   @field_serializer("entity_expression", when_used="unless-none")
 89 |   def _serialise_expression_field(self, v):
 90 |     return {"expression": v}
 91 | 
 92 |   @model_validator(mode="after")
 93 |   def _check_one(self):
 94 |     if bool(self.entity_dcids) == bool(self.entity_expression):
 95 |       raise ValueError("Exactly one of dcids or expression must be set")
 96 |     return self
 97 | 
 98 |   @model_serializer(mode="wrap")
 99 |   def _wrap_filter(self, handler):
100 |     # Normal dump
101 |     data = handler(self)
102 | 
103 |     # pull out entity dcid or expression
104 |     entity = data.pop("entity_dcids", None) or data.pop("entity_expression",
105 |                                                         None)
106 | 
107 |     # add entity to the data dictionary
108 |     data["entity"] = entity
109 | 
110 |     # pull out the two filter keys if present
111 |     domains = data.pop("filter_facet_domains", None)
112 |     ids = data.pop("filter_facet_ids", None)
113 | 
114 |     # only add "filter" if at least one is set
115 |     if domains or ids:
116 |       filter_dict = {}
117 |       if domains is not None:
118 |         filter_dict["domains"] = domains
119 |       if ids is not None:
120 |         filter_dict["facet_ids"] = ids
121 |       data["filter"] = filter_dict
122 | 
123 |     return data
124 | 
125 | 
126 | class ResolveRequestPayload(BaseDCModel):
127 |   """
128 |     A Pydantic model to structure, normalize, and validate the payload for a Resolve V2 API request.
129 | 
130 |     Attributes:
131 |         node_dcids (str | list[str]): The DCID(s) of the nodes to query.
132 |         expression (str): The relation expression to query.
133 |     """
134 | 
135 |   node_dcids: ListOrStr = Field(..., serialization_alias="nodes")
136 |   expression: str | list[str] = Field(..., serialization_alias="property")
137 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/models/test_node_models.py:
--------------------------------------------------------------------------------
  1 | from datacommons_client.models.node import Arcs
  2 | from datacommons_client.models.node import Node
  3 | from datacommons_client.models.node import NodeGroup
  4 | from datacommons_client.models.node import Properties
  5 | from datacommons_client.models.node import StatVarConstraint
  6 | from datacommons_client.models.node import StatVarConstraints
  7 | 
  8 | 
  9 | def test_node_model_validation():
 10 |   """Test that Node.model_validate parses data correctly."""
 11 |   json_data = {
 12 |       "dcid": "node123",
 13 |       "name": "Test Node",
 14 |       "provenanceId": "prov123",
 15 |       "types": ["TypeA", "TypeB"],
 16 |       "value": "42",
 17 |   }
 18 |   node = Node.model_validate(json_data)
 19 |   assert node.dcid == "node123"
 20 |   assert node.name == "Test Node"
 21 |   assert node.provenanceId == "prov123"
 22 |   assert node.types == ["TypeA", "TypeB"]
 23 |   assert node.value == "42"
 24 | 
 25 | 
 26 | def test_node_model_validation_partial():
 27 |   """Test Node.model_validate with partial data."""
 28 |   json_data = {
 29 |       "dcid": "node123",
 30 |   }
 31 |   node = Node.model_validate(json_data)
 32 |   assert node.dcid == "node123"
 33 |   assert node.name is None
 34 |   assert node.provenanceId is None
 35 |   assert node.types is None
 36 |   assert node.value is None
 37 | 
 38 | 
 39 | def test_nodegroup_model_validation():
 40 |   """Test that NodeGroup.model_validate parses data correctly."""
 41 |   json_data = {
 42 |       "nodes": [
 43 |           {
 44 |               "dcid": "node1",
 45 |               "name": "Node 1"
 46 |           },
 47 |           {
 48 |               "dcid": "node2",
 49 |               "name": "Node 2"
 50 |           },
 51 |       ]
 52 |   }
 53 |   node_group = NodeGroup.model_validate(json_data)
 54 |   assert len(node_group.nodes) == 2
 55 |   assert node_group.nodes[0].dcid == "node1"
 56 |   assert node_group.nodes[1].name == "Node 2"
 57 | 
 58 | 
 59 | def test_nodegroup_model_validation_empty():
 60 |   """Test NodeGroup.model_validate with empty data."""
 61 |   json_data = {}
 62 |   node_group = NodeGroup.model_validate(json_data)
 63 |   assert len(node_group.nodes) == 0
 64 | 
 65 | 
 66 | def test_arcs_model_validation():
 67 |   """Test that Arcs.model_validate parses data correctly."""
 68 |   json_data = {
 69 |       "arcs": {
 70 |           "label1": {
 71 |               "nodes": [{
 72 |                   "dcid": "node1"
 73 |               }, {
 74 |                   "dcid": "node2"
 75 |               }]
 76 |           },
 77 |           "label2": {
 78 |               "nodes": [{
 79 |                   "dcid": "node3"
 80 |               }]
 81 |           },
 82 |       }
 83 |   }
 84 |   arcs = Arcs.model_validate(json_data)
 85 |   assert len(arcs.arcs) == 2
 86 |   assert "label1" in arcs.arcs
 87 |   assert len(arcs.arcs["label1"].nodes) == 2
 88 |   assert arcs.arcs["label1"].nodes[0].dcid == "node1"
 89 |   assert len(arcs.arcs["label2"].nodes) == 1
 90 |   assert arcs.arcs["label2"].nodes[0].dcid == "node3"
 91 | 
 92 | 
 93 | def test_arcs_model_validation_empty():
 94 |   """Test Arcs.model_validate with empty data."""
 95 |   json_data = {}
 96 |   arcs = Arcs.model_validate(json_data)
 97 |   assert len(arcs.arcs) == 0
 98 | 
 99 | 
100 | def test_properties_model_validation():
101 |   """Test that Properties.model_validate parses data correctly."""
102 |   json_data = {"properties": ["prop1", "prop2", "prop3"]}
103 |   properties = Properties.model_validate(json_data)
104 |   assert len(properties.properties) == 3
105 |   assert properties.properties == ["prop1", "prop2", "prop3"]
106 | 
107 | 
108 | def test_properties_model_validation_empty():
109 |   """Test Properties.model_validate with empty data."""
110 |   json_data = {}
111 |   properties = Properties.model_validate(json_data)
112 |   assert properties.properties is None
113 | 
114 | 
115 | def test_statvarconstraint_model_validation():
116 |   """Test StatVarConstraint.model_validate parses data correctly."""
117 |   data = {
118 |       "constraintId": "DevelopmentFinanceScheme",
119 |       "constraintName": "Development Finance Scheme",
120 |       "valueId": "ODAGrants",
121 |       "valueName": "Official Development Assistance Grants",
122 |   }
123 |   constraint = StatVarConstraint.model_validate(data)
124 | 
125 |   assert constraint.constraintId == "DevelopmentFinanceScheme"
126 |   assert constraint.constraintName == "Development Finance Scheme"
127 |   assert constraint.valueId == "ODAGrants"
128 |   assert constraint.valueName == "Official Development Assistance Grants"
129 | 
130 | 
131 | def test_statvarconstraints_model_validation():
132 |   """Test StatVarConstraints root model validates mapping properly."""
133 |   constraints = StatVarConstraints.model_validate({
134 |       "sv/1": [
135 |           {
136 |               "constraintId": "DevelopmentFinanceScheme",
137 |               "constraintName": "Development Finance Scheme",
138 |               "valueId": "ODAGrants",
139 |               "valueName": "Official Development Assistance Grants",
140 |           },
141 |           {
142 |               "constraintId": "DevelopmentFinanceRecipient",
143 |               "constraintName": "Development Finance Recipient",
144 |               "valueId": "country/GTM",
145 |               "valueName": "Guatemala",
146 |           },
147 |       ],
148 |       "sv/2": [],
149 |   })
150 | 
151 |   assert "sv/1" in constraints and "sv/2" in constraints
152 |   assert len(constraints["sv/1"]) == 2
153 |   assert constraints["sv/2"] == []
154 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/endpoints/test_observation_endpoint.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import MagicMock
  2 | 
  3 | from datacommons_client.endpoints.base import API
  4 | from datacommons_client.endpoints.observation import ObservationEndpoint
  5 | from datacommons_client.endpoints.response import ObservationResponse
  6 | from datacommons_client.models.observation import ByVariable
  7 | from datacommons_client.models.observation import ObservationDate
  8 | from datacommons_client.models.observation import ObservationSelect
  9 | 
 10 | 
 11 | def test_fetch():
 12 |   """Tests the fetch method of ObservationEndpoint."""
 13 |   api_mock = MagicMock(spec=API)
 14 |   api_mock.post.return_value = {"byVariable": {}}
 15 |   endpoint = ObservationEndpoint(api=api_mock)
 16 | 
 17 |   response = endpoint.fetch(variable_dcids="dcid/variableID",
 18 |                             date=ObservationDate.LATEST,
 19 |                             select=["date", "variable", "entity", "value"],
 20 |                             entity_dcids="dc/EntityID",
 21 |                             filter_facet_domains="domain1",
 22 |                             filter_facet_ids="facet1")
 23 | 
 24 |   # Check the response
 25 |   assert isinstance(response, ObservationResponse)
 26 | 
 27 |   # Check the post request
 28 |   api_mock.post.assert_called_once_with(payload={
 29 |       "date": ObservationDate.LATEST,
 30 |       "variable": {
 31 |           "dcids": ["dcid/variableID"]
 32 |       },
 33 |       "entity": {
 34 |           "dcids": ["dc/EntityID"],
 35 |       },
 36 |       "select": ["date", "variable", "entity", "value"],
 37 |       "filter": {
 38 |           "domains": ["domain1"],
 39 |           "facet_ids": ["facet1"]
 40 |       }
 41 |   },
 42 |                                         endpoint="observation",
 43 |                                         all_pages=True,
 44 |                                         next_token=None)
 45 | 
 46 | 
 47 | def test_fetch_observations_by_entity_type():
 48 |   """Tests the fetch_observations_by_entity_type method."""
 49 |   api_mock = MagicMock(spec=API)
 50 |   api_mock.post.return_value = {"byVariable": {}}
 51 |   endpoint = ObservationEndpoint(api=api_mock)
 52 | 
 53 |   response = endpoint.fetch_observations_by_entity_type(
 54 |       date="2023",
 55 |       parent_entity="Earth",
 56 |       entity_type="Country",
 57 |       select=["variable", "entity", "facet"],
 58 |       variable_dcids="dc/VariableID")
 59 | 
 60 |   # Check the response
 61 |   assert isinstance(response, ObservationResponse)
 62 | 
 63 |   # Check the post request
 64 |   api_mock.post.assert_called_once_with(payload={
 65 |       "date": "2023",
 66 |       "variable": {
 67 |           "dcids": ["dc/VariableID"]
 68 |       },
 69 |       "entity": {
 70 |           "expression": "Earth<-containedInPlace+{typeOf:Country}"
 71 |       },
 72 |       "select": ["variable", "entity", "facet"],
 73 |   },
 74 |                                         endpoint="observation",
 75 |                                         all_pages=True,
 76 |                                         next_token=None)
 77 | 
 78 | 
 79 | def test_fetch_observations_facets_by_entity_type():
 80 |   """Tests the fetch_observations_by_entity_type method."""
 81 |   api_mock = MagicMock(spec=API)
 82 |   api_mock.post.return_value = {"byVariable": {}}
 83 |   endpoint = ObservationEndpoint(api=api_mock)
 84 | 
 85 |   response = endpoint.fetch_observations_by_entity_type(
 86 |       date="2023",
 87 |       parent_entity="Earth",
 88 |       entity_type="Country",
 89 |       variable_dcids="dc/VariableID",
 90 |       select=["variable", "entity", "facet"],
 91 |   )
 92 | 
 93 |   # Check the response
 94 |   assert isinstance(response, ObservationResponse)
 95 | 
 96 |   # Check the post request
 97 |   api_mock.post.assert_called_once_with(payload={
 98 |       "date": "2023",
 99 |       "variable": {
100 |           "dcids": ["dc/VariableID"]
101 |       },
102 |       "entity": {
103 |           "expression": "Earth<-containedInPlace+{typeOf:Country}"
104 |       },
105 |       "select": ["variable", "entity", "facet"],
106 |   },
107 |                                         endpoint="observation",
108 |                                         all_pages=True,
109 |                                         next_token=None)
110 | 
111 | 
112 | def test_fetch_available_statistical_variables_single_entity():
113 |   """Test fetching variables for a single entity."""
114 |   mock_data = {
115 |       "var1": ["ent1"],
116 |       "var2": ["ent1"],
117 |   }
118 | 
119 |   # Mock the fetch method on the ObservationEndpoint instance
120 |   endpoint = ObservationEndpoint(api=MagicMock())
121 |   endpoint.fetch = MagicMock()
122 |   endpoint.fetch.return_value.get_data_by_entity = MagicMock(
123 |       return_value=mock_data)
124 | 
125 |   result = endpoint.fetch_available_statistical_variables("ent1")
126 | 
127 |   expected = {
128 |       "ent1": ["var1", "var2"],
129 |   }
130 |   assert result == expected
131 | 
132 |   endpoint.fetch.assert_called_once_with(
133 |       entity_dcids="ent1",
134 |       select=[ObservationSelect.VARIABLE, ObservationSelect.ENTITY],
135 |       variable_dcids=[])
136 | 
137 | 
138 | def test_fetch_available_statistical_variables_multiple_entities():
139 |   """Test fetching variables for multiple entities."""
140 |   mock_data = {
141 |       "var1": ["ent1", "ent2"],
142 |       "var2": ["ent2"],
143 |   }
144 | 
145 |   endpoint = ObservationEndpoint(api=MagicMock())
146 |   endpoint.fetch = MagicMock()
147 |   endpoint.fetch.return_value.get_data_by_entity = MagicMock(
148 |       return_value=mock_data)
149 | 
150 |   result = endpoint.fetch_available_statistical_variables(["ent1", "ent2"])
151 | 
152 |   expected = {
153 |       "ent1": ["var1"],
154 |       "ent2": ["var1", "var2"],
155 |   }
156 |   assert result == expected
157 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/endpoints/test_payloads.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from datacommons_client.endpoints.payloads import NodeRequestPayload
  4 | from datacommons_client.endpoints.payloads import ObservationRequestPayload
  5 | from datacommons_client.endpoints.payloads import ResolveRequestPayload
  6 | from datacommons_client.models.observation import ObservationDate
  7 | from datacommons_client.models.observation import ObservationSelect
  8 | from datacommons_client.utils.error_handling import InvalidObservationSelectError
  9 | 
 10 | 
 11 | def test_node_payload_normalize():
 12 |   """Tests that NodeRequestPayload correctly normalizes single and multiple node_dcids."""
 13 |   payload = NodeRequestPayload(node_dcids="node1", expression="prop1")
 14 |   assert payload.node_dcids == ["node1"]
 15 | 
 16 |   payload = NodeRequestPayload(node_dcids=["node1", "node2"],
 17 |                                expression="prop1")
 18 |   assert payload.node_dcids == ["node1", "node2"]
 19 | 
 20 | 
 21 | def test_node_payload_validate():
 22 |   """Tests that NodeRequestPayload validates its inputs correctly."""
 23 |   with pytest.raises(ValueError):
 24 |     NodeRequestPayload(node_dcids="node1",
 25 |                        expression=123)  # `expression` must be a string
 26 | 
 27 | 
 28 | def test_node_payload_to_dict():
 29 |   """Tests NodeRequestPayload conversion to dictionary."""
 30 |   payload = NodeRequestPayload(node_dcids="node1", expression="prop1")
 31 |   assert payload.to_dict() == {"nodes": ["node1"], "property": "prop1"}
 32 | 
 33 | 
 34 | def test_observation_payload_normalize():
 35 |   """Tests that ObservationRequestPayload normalizes inputs correctly."""
 36 |   payload = ObservationRequestPayload(
 37 |       date="LATEST",
 38 |       variable_dcids="var1",
 39 |       select=["variable", "entity"],
 40 |       entity_dcids="ent1",
 41 |       filter_facet_domains="domain1",
 42 |       filter_facet_ids="facets1",
 43 |   )
 44 |   assert payload.variable_dcids == ["var1"]
 45 |   assert payload.entity_dcids == ["ent1"]
 46 |   assert payload.filter_facet_domains == ["domain1"]
 47 |   assert payload.filter_facet_ids == ["facets1"]
 48 |   assert payload.date == ObservationDate.LATEST
 49 | 
 50 |   assert "filter" in payload.to_dict()
 51 |   assert "facet_ids" in payload.to_dict()["filter"]
 52 |   assert "domains" in payload.to_dict()["filter"]
 53 | 
 54 |   # Check that when domain and facets are not included, they are not in the payload
 55 |   payload = ObservationRequestPayload(
 56 |       date="all",
 57 |       variable_dcids=["var1"],
 58 |       select=["variable", "entity"],
 59 |       entity_dcids=["ent1"],
 60 |   )
 61 |   assert payload.date == ObservationDate.ALL
 62 |   assert payload.variable_dcids == ["var1"]
 63 |   assert payload.entity_dcids == ["ent1"]
 64 |   assert "filter" not in payload.to_dict()
 65 | 
 66 | 
 67 | def test_observation_select_invalid_value():
 68 |   """Tests that an invalid ObservationSelect value raises InvalidObservationSelectError."""
 69 |   with pytest.raises(InvalidObservationSelectError):
 70 |     ObservationSelect("invalid")
 71 | 
 72 | 
 73 | def test_observation_payload_validate():
 74 |   """Tests that ObservationRequestPayload validates its inputs."""
 75 |   with pytest.raises(InvalidObservationSelectError):
 76 |     ObservationRequestPayload(
 77 |         date="LATEST",
 78 |         variable_dcids="var1",
 79 |         select=["variable"],
 80 |         entity_dcids=None,
 81 |         entity_expression=None,
 82 |     )  # Requires either `entity_dcids` or `entity_expression`
 83 | 
 84 |   with pytest.raises(InvalidObservationSelectError):
 85 |     ObservationRequestPayload(
 86 |         date="LATEST",
 87 |         variable_dcids="var1",
 88 |         select=["value"],  # Missing required "variable" and "entity"
 89 |         entity_expression="expression",
 90 |     )
 91 | 
 92 |   with pytest.raises(ValueError):
 93 |     ObservationRequestPayload(
 94 |         date="LATEST",
 95 |         variable_dcids="var1",
 96 |         select=["variable", "entity"],
 97 |         entity_dcids="ent1",
 98 |         entity_expression=
 99 |         "expression",  # Both `entity_dcids` and `entity_expression` set
100 |     )
101 | 
102 | 
103 | def test_observation_payload_to_dict():
104 |   """Tests ObservationRequestPayload conversion to dictionary."""
105 |   payload = ObservationRequestPayload(
106 |       date="LATEST",
107 |       variable_dcids="var1",
108 |       select=["variable", "entity"],
109 |       entity_dcids="ent1",
110 |       filter_facet_ids="facets1",
111 |   )
112 |   assert payload.to_dict() == {
113 |       "date": ObservationDate.LATEST,
114 |       "variable": {
115 |           "dcids": ["var1"]
116 |       },
117 |       "entity": {
118 |           "dcids": ["ent1"]
119 |       },
120 |       "select": ["variable", "entity"],
121 |       "filter": {
122 |           "facet_ids": ["facets1"]
123 |       }
124 |   }
125 | 
126 | 
127 | def test_resolve_payload_normalize():
128 |   """Tests that ResolveRequestPayload normalizes single and multiple node_dcids."""
129 |   payload = ResolveRequestPayload(node_dcids="node1", expression="expr1")
130 |   assert payload.node_dcids == ["node1"]
131 | 
132 |   payload = ResolveRequestPayload(node_dcids=["node1", "node2"],
133 |                                   expression="expr1")
134 |   assert payload.node_dcids == ["node1", "node2"]
135 | 
136 | 
137 | def test_resolve_payload_validate():
138 |   """Tests that ResolveRequestPayload validates its inputs correctly."""
139 |   with pytest.raises(ValueError):
140 |     ResolveRequestPayload(node_dcids="node1",
141 |                           expression=123)  # `expression` must be a string
142 | 
143 | 
144 | def test_resolve_payload_to_dict():
145 |   """Tests ResolveRequestPayload conversion to dictionary."""
146 |   payload = ResolveRequestPayload(node_dcids="node1", expression="expr1")
147 |   assert payload.to_dict() == {"nodes": ["node1"], "property": "expr1"}
148 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/models/test_observation_models.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from datacommons_client.models.observation import Facet
  4 | from datacommons_client.models.observation import Observation
  5 | from datacommons_client.models.observation import ObservationSelectList
  6 | from datacommons_client.models.observation import OrderedFacet
  7 | from datacommons_client.models.observation import Variable
  8 | from datacommons_client.utils.error_handling import InvalidObservationSelectError
  9 | 
 10 | 
 11 | def test_observation_model_validation():
 12 |   """Test that Observation.model_validate parses data correctly."""
 13 |   json_data = {"date": "2024-01-01", "value": 123.45}
 14 |   observation = Observation.model_validate(json_data)
 15 |   assert observation.date == "2024-01-01"
 16 |   assert observation.value == 123.45
 17 |   assert isinstance(observation.value, float)
 18 | 
 19 | 
 20 | def test_observation_model_validation_partial():
 21 |   """Test Observation.model_validate with missing data."""
 22 |   json_data = {"date": "2024-01-01"}
 23 |   observation = Observation.model_validate(json_data)
 24 |   assert observation.date == "2024-01-01"
 25 |   assert observation.value is None
 26 | 
 27 | 
 28 | def test_ordered_facets_model_validation():
 29 |   """Test that OrderedFacet.model_validate parses data correctly."""
 30 |   json_data = {
 31 |       "earliestDate":
 32 |           "2023-01-01",
 33 |       "facetId":
 34 |           "facet123",
 35 |       "latestDate":
 36 |           "2024-01-01",
 37 |       "obsCount":
 38 |           2,
 39 |       "observations": [
 40 |           {
 41 |               "date": "2023-01-01",
 42 |               "value": 100.0
 43 |           },
 44 |           {
 45 |               "date": "2024-01-01",
 46 |               "value": 200.0
 47 |           },
 48 |       ],
 49 |   }
 50 |   ordered_facets = OrderedFacet.model_validate(json_data)
 51 |   assert ordered_facets.earliestDate == "2023-01-01"
 52 |   assert ordered_facets.facetId == "facet123"
 53 |   assert ordered_facets.latestDate == "2024-01-01"
 54 |   assert ordered_facets.obsCount == 2
 55 |   assert len(ordered_facets.observations) == 2
 56 |   assert ordered_facets.observations[0].value == 100.0
 57 | 
 58 | 
 59 | def test_ordered_facets_model_validation_empty_observations():
 60 |   """Test OrderedFacet.model_validate with empty observations."""
 61 |   json_data = {
 62 |       "earliestDate": "2023-01-01",
 63 |       "facetId": "facet123",
 64 |       "latestDate": "2024-01-01",
 65 |       "obsCount": 0,
 66 |       "observations": [],
 67 |   }
 68 |   ordered_facets = OrderedFacet.model_validate(json_data)
 69 |   assert len(ordered_facets.observations) == 0
 70 | 
 71 | 
 72 | def test_variable_model_validation():
 73 |   """Test that Variable.model_validate parses data correctly."""
 74 |   json_data = {
 75 |       "byEntity": {
 76 |           "entity1": {
 77 |               "orderedFacets": [{
 78 |                   "earliestDate":
 79 |                       "2023-01-01",
 80 |                   "facetId":
 81 |                       "facet1",
 82 |                   "latestDate":
 83 |                       "2023-12-31",
 84 |                   "obsCount":
 85 |                       2,
 86 |                   "observations": [
 87 |                       {
 88 |                           "date": "2023-01-01",
 89 |                           "value": 50.0
 90 |                       },
 91 |                       {
 92 |                           "date": "2023-12-31",
 93 |                           "value": 75.0
 94 |                       },
 95 |                   ],
 96 |               }]
 97 |           }
 98 |       }
 99 |   }
100 |   variable = Variable.model_validate(json_data)
101 |   assert "entity1" in variable.byEntity
102 |   facets = variable.byEntity["entity1"].orderedFacets
103 |   assert len(facets) == 1
104 |   assert facets[0].facetId == "facet1"
105 |   assert facets[0].observations[0].value == 50.0
106 | 
107 | 
108 | def test_variable_model_validation_empty():
109 |   """Test Variable.model_validate with empty byEntity."""
110 |   json_data = {"byEntity": {}}
111 |   variable = Variable.model_validate(json_data)
112 |   assert len(variable.byEntity) == 0
113 | 
114 | 
115 | def test_facet_model_validation():
116 |   """Test that Facet.model_validate parses data correctly."""
117 |   json_data = {
118 |       "importName": "Import 1",
119 |       "measurementMethod": "Method A",
120 |       "observationPeriod": "2023",
121 |       "provenanceUrl": "http://example.com",
122 |       "unit": "usd",
123 |   }
124 |   facet = Facet.model_validate(json_data)
125 |   assert facet.importName == "Import 1"
126 |   assert facet.measurementMethod == "Method A"
127 |   assert facet.observationPeriod == "2023"
128 |   assert facet.provenanceUrl == "http://example.com"
129 |   assert facet.unit == "usd"
130 | 
131 | 
132 | def test_facet_model_validation_partial():
133 |   """Test Facet.model_validate with missing data."""
134 |   json_data = {"importName": "Import 1", "unit": "GTQ"}
135 |   facet = Facet.model_validate(json_data)
136 |   assert facet.importName == "Import 1"
137 |   assert facet.measurementMethod is None
138 |   assert facet.unit == "GTQ"
139 |   assert facet.provenanceUrl is None
140 | 
141 | 
142 | def test_observation_select_list_defaults():
143 |   """ObservationSelectList returns default selects when none provided."""
144 |   osl = ObservationSelectList.model_validate(None)
145 |   assert osl.select == ["date", "variable", "entity", "value"]
146 | 
147 | 
148 | def test_observation_select_list_custom():
149 |   """ObservationSelectList accepts custom select lists."""
150 |   osl = ObservationSelectList.model_validate(["variable", "entity", "facet"])
151 |   assert osl.select == ["variable", "entity", "facet"]
152 | 
153 | 
154 | def test_observation_select_list_missing_required():
155 |   """Missing required select entries raises InvalidObservationSelectError."""
156 |   with pytest.raises(InvalidObservationSelectError):
157 |     ObservationSelectList.model_validate(["date", "value"])
158 | 


--------------------------------------------------------------------------------
/datacommons_client/endpoints/resolve.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from datacommons_client.endpoints.base import API
  4 | from datacommons_client.endpoints.base import Endpoint
  5 | from datacommons_client.endpoints.payloads import ResolveRequestPayload
  6 | from datacommons_client.endpoints.response import ResolveResponse
  7 | 
  8 | 
  9 | def _resolve_correspondence_expression(from_type: str,
 10 |                                        to_type: str,
 11 |                                        entity_type: str | None = None) -> str:
 12 |   """
 13 |     Constructs a relation expression for fetching correspondence between entities of two types.
 14 | 
 15 |     Args:
 16 |         from_type (str): The source entity type.
 17 |         to_type (str): The target entity type.
 18 |         entity_type (Optional[str]): Optional type of the entities.
 19 | 
 20 |     Returns:
 21 |         str: The relation expression to fetch correspondence between entities of the given types.
 22 |     """
 23 |   return (f"<-{from_type}{{typeOf:{entity_type}}}->{to_type}"
 24 |           if entity_type else f"<-{from_type}->{to_type}")
 25 | 
 26 | 
 27 | class ResolveEndpoint(Endpoint):
 28 |   """
 29 |     A class to interact with the resolve API endpoint.
 30 | 
 31 |     Args:
 32 |         api (API): The API instance providing the environment configuration
 33 |             (base URL, headers, authentication) to be used for requests.
 34 |     """
 35 | 
 36 |   def __init__(self, api: API):
 37 |     """Initializes the ResolveEndpoint instance."""
 38 |     super().__init__(endpoint="resolve", api=api)
 39 | 
 40 |   def fetch(self, node_ids: str | list[str],
 41 |             expression: str | list[str]) -> ResolveResponse:
 42 |     """
 43 |         Fetches resolved data for the given nodes and expressions, identified by name,
 44 |          coordinates, or wiki ID.
 45 | 
 46 |         Args:
 47 |             node_ids (str | list[str]): One or more node IDs to resolve.
 48 |             expression (str): The relation expression to query.
 49 | 
 50 |         Returns:
 51 |             ResolveResponse: The response object containing the resolved data.
 52 |         """
 53 |     # Check if the node_ids is a single string. If so, convert it to a list.
 54 |     if isinstance(node_ids, str):
 55 |       node_ids = [node_ids]
 56 | 
 57 |     # Construct the payload
 58 |     payload = ResolveRequestPayload(node_dcids=node_ids,
 59 |                                     expression=expression).to_dict()
 60 | 
 61 |     # Send the request and return the response
 62 |     return ResolveResponse.model_validate(self.post(payload))
 63 | 
 64 |   def fetch_dcids_by_name(self,
 65 |                           names: str | list[str],
 66 |                           entity_type: Optional[str] = None) -> ResolveResponse:
 67 |     """
 68 |         Fetches DCIDs for entities by their names.
 69 | 
 70 |         Args:
 71 |             names (str | list[str]): One or more entity names to resolve.
 72 |             entity_type (Optional[str]): Optional type of the entities.
 73 | 
 74 |         Returns:
 75 |             ResolveResponse: The response object containing the resolved DCIDs.
 76 |         """
 77 | 
 78 |     expression = _resolve_correspondence_expression(from_type="description",
 79 |                                                     to_type="dcid",
 80 |                                                     entity_type=entity_type)
 81 | 
 82 |     return self.fetch(node_ids=names, expression=expression)
 83 | 
 84 |   def fetch_dcids_by_wikidata_id(
 85 |       self,
 86 |       wikidata_ids: str | list[str],
 87 |       entity_type: Optional[str] = None) -> ResolveResponse:
 88 |     """
 89 |           Fetches DCIDs for entities by their Wikidata IDs.
 90 | 
 91 |           Args:
 92 |               wikidata_ids (str | list[str]): One or more Wikidata IDs to resolve.
 93 |               entity_type (Optional[str]): Optional type of the entities.
 94 | 
 95 |           Returns:
 96 |               ResolveResponse: The response object containing the resolved DCIDs.
 97 |           """
 98 |     expression = _resolve_correspondence_expression(from_type="wikidataId",
 99 |                                                     to_type="dcid",
100 |                                                     entity_type=entity_type)
101 | 
102 |     return self.fetch(node_ids=wikidata_ids, expression=expression)
103 | 
104 |   def fetch_dcid_by_coordinates(
105 |       self,
106 |       latitude: str,
107 |       longitude: str,
108 |       entity_type: Optional[str] = None) -> ResolveResponse:
109 |     """
110 |         Fetches DCIDs for entities by their geographic coordinates.
111 | 
112 |         Args:
113 |             latitude (str): Latitude of the entity.
114 |             longitude (str): Longitude of the entity.
115 |             entity_type (Optional[str]): Optional type of the entities to refine results
116 |             (e.g., "City", "State", "Country").
117 | 
118 |         Returns:
119 |             ResolveResponse: The response object containing the resolved DCIDs.
120 | 
121 |         Example:
122 |             To find the DCID for "Mountain View" using its latitude and longitude:
123 |             ```python
124 |             latitude = "37.42"
125 |             longitude = "-122.08"
126 |             response = client.fetch_dcid_by_coordinates(latitude=latitude, longitude=longitude)
127 |             print(response.entities)
128 |             ```
129 |             Note:
130 |              - For ambiguous results, providing an entity type (e.g., "City") can help disambiguate.
131 |              - The coordinates should be passed as strings in decimal format (e.g., "37.42", "-122.08").
132 | 
133 | 
134 |         """
135 |     expression = _resolve_correspondence_expression(from_type="geoCoordinate",
136 |                                                     to_type="dcid",
137 |                                                     entity_type=entity_type)
138 |     coordinates = f"{latitude}#{longitude}"
139 |     return self.fetch(node_ids=coordinates, expression=expression)
140 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/endpoints/test_resolve_endpoint.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import MagicMock
  2 | 
  3 | from datacommons_client.endpoints.base import API
  4 | from datacommons_client.endpoints.resolve import _resolve_correspondence_expression
  5 | from datacommons_client.endpoints.resolve import ResolveEndpoint
  6 | from datacommons_client.endpoints.response import ResolveResponse
  7 | from datacommons_client.models.resolve import Candidate
  8 | from datacommons_client.models.resolve import Entity
  9 | 
 10 | 
 11 | def test_fetch():
 12 |   """Tests the fetch method of ResolveEndpoint."""
 13 |   api_mock = MagicMock(spec=API)
 14 |   api_mock.post = MagicMock(return_value={})
 15 |   endpoint = ResolveEndpoint(api=api_mock)
 16 | 
 17 |   response = endpoint.fetch(node_ids="Node1", expression="some_expression")
 18 | 
 19 |   # Check the response
 20 |   assert isinstance(response, ResolveResponse)
 21 | 
 22 |   # Check the post request
 23 |   api_mock.post.assert_called_once_with(payload={
 24 |       "nodes": ["Node1"],
 25 |       "property": "some_expression",
 26 |   },
 27 |                                         endpoint="resolve",
 28 |                                         all_pages=True,
 29 |                                         next_token=None)
 30 | 
 31 | 
 32 | def test_fetch_dcid_by_name():
 33 |   """Tests the fetch_dcid_by_name method."""
 34 |   api_mock = MagicMock(spec=API)
 35 |   api_mock.post = MagicMock(return_value={})
 36 |   endpoint = ResolveEndpoint(api=api_mock)
 37 | 
 38 |   response = endpoint.fetch_dcids_by_name(names=["Entity1"],
 39 |                                           entity_type="Place")
 40 | 
 41 |   # Check the response
 42 |   assert isinstance(response, ResolveResponse)
 43 | 
 44 |   # Check the post request
 45 |   api_mock.post.assert_called_once_with(payload={
 46 |       "nodes": ["Entity1"],
 47 |       "property": "<-description{typeOf:Place}->dcid"
 48 |   },
 49 |                                         endpoint="resolve",
 50 |                                         all_pages=True,
 51 |                                         next_token=None)
 52 | 
 53 | 
 54 | def test_fetch_dcid_by_wikidata_id():
 55 |   """Tests the fetch_dcid_by_wikidata_id method."""
 56 |   api_mock = MagicMock(spec=API)
 57 |   api_mock.post = MagicMock(return_value={})
 58 |   endpoint = ResolveEndpoint(api=api_mock)
 59 | 
 60 |   response = endpoint.fetch_dcids_by_wikidata_id(wikidata_ids="Q12345",
 61 |                                                  entity_type="Country")
 62 | 
 63 |   # Check the response
 64 |   assert isinstance(response, ResolveResponse)
 65 | 
 66 |   # Check the post request
 67 |   api_mock.post.assert_called_once_with(payload={
 68 |       "nodes": ["Q12345"],
 69 |       "property": "<-wikidataId{typeOf:Country}->dcid",
 70 |   },
 71 |                                         endpoint="resolve",
 72 |                                         all_pages=True,
 73 |                                         next_token=None)
 74 | 
 75 | 
 76 | def test_fetch_dcids_list_by_wikidata_id():
 77 |   """Tests the fetch_dcid_by_wikidata_id method."""
 78 |   api_mock = MagicMock(spec=API)
 79 |   api_mock.post = MagicMock(return_value={})
 80 |   endpoint = ResolveEndpoint(api=api_mock)
 81 | 
 82 |   response = endpoint.fetch_dcids_by_wikidata_id(
 83 |       wikidata_ids=["Q12345", "Q695660"])
 84 | 
 85 |   # Check the response
 86 |   assert isinstance(response, ResolveResponse)
 87 | 
 88 |   # Check the post request
 89 |   api_mock.post.assert_called_once_with(payload={
 90 |       "nodes": ["Q12345", "Q695660"],
 91 |       "property": "<-wikidataId->dcid",
 92 |   },
 93 |                                         endpoint="resolve",
 94 |                                         all_pages=True,
 95 |                                         next_token=None)
 96 | 
 97 | 
 98 | def test_fetch_dcid_by_coordinates():
 99 |   """Tests the fetch_dcid_by_coordinates method."""
100 |   api_mock = MagicMock(spec=API)
101 |   api_mock.post = MagicMock(return_value={})
102 |   endpoint = ResolveEndpoint(api=api_mock)
103 | 
104 |   response = endpoint.fetch_dcid_by_coordinates(latitude="37.7749",
105 |                                                 longitude="-122.4194",
106 |                                                 entity_type="City")
107 | 
108 |   # Check the response
109 |   assert isinstance(response, ResolveResponse)
110 | 
111 |   # Check the post request
112 |   api_mock.post.assert_called_once_with(payload={
113 |       "nodes": ["37.7749#-122.4194"],
114 |       "property": "<-geoCoordinate{typeOf:City}->dcid",
115 |   },
116 |                                         endpoint="resolve",
117 |                                         all_pages=True,
118 |                                         next_token=None)
119 | 
120 | 
121 | def test_resolve_correspondence_expression():
122 |   """Tests the resolve_correspondence_expression function."""
123 |   expression = _resolve_correspondence_expression(from_type="description",
124 |                                                   to_type="dcid",
125 |                                                   entity_type="Place")
126 |   assert expression == "<-description{typeOf:Place}->dcid"
127 | 
128 |   expression_no_entity_type = _resolve_correspondence_expression(
129 |       from_type="description", to_type="dcid")
130 |   assert expression_no_entity_type == "<-description->dcid"
131 | 
132 | 
133 | def test_flatten_resolve_response():
134 |   """Tests the flatten_resolve_response function."""
135 |   # Mock ResolveResponse with multiple entities
136 |   mock_data = ResolveResponse(entities=[
137 |       Entity(node="Node1", candidates=[Candidate(dcid="Candidate1")]),
138 |       Entity(node="Node2",
139 |              candidates=[
140 |                  Candidate(dcid="Candidate2"),
141 |                  Candidate(dcid="Candidate3")
142 |              ]),
143 |       Entity(node="Node3", candidates=[])  # No candidates
144 |   ])
145 | 
146 |   # Call the function
147 |   result = mock_data.to_flat_dict()
148 | 
149 |   # Expected output
150 |   expected = {
151 |       "Node1": "Candidate1",  # Single candidate
152 |       "Node2": ["Candidate2", "Candidate3"],  # Multiple candidates
153 |       "Node3": [],  # No candidates
154 |   }
155 | 
156 |   # Assertions
157 |   assert result == expected
158 | 


--------------------------------------------------------------------------------
/datacommons_client/models/observation.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | from typing import List, Optional
  3 | 
  4 | from pydantic import Field
  5 | from pydantic import field_validator
  6 | from pydantic import model_serializer
  7 | from pydantic import RootModel
  8 | 
  9 | from datacommons_client.models.base import BaseDCModel
 10 | from datacommons_client.models.base import DictLikeRootModel
 11 | from datacommons_client.models.base import entityDCID
 12 | from datacommons_client.models.base import facetID
 13 | from datacommons_client.models.base import ListLikeRootModel
 14 | from datacommons_client.models.base import variableDCID
 15 | from datacommons_client.utils.error_handling import InvalidObservationSelectError
 16 | 
 17 | 
 18 | class ObservationDate(str, Enum):
 19 |   LATEST = "LATEST"
 20 |   ALL = ""
 21 | 
 22 |   @classmethod
 23 |   def _missing_(cls, value):
 24 |     if isinstance(value, str):
 25 |       u = value.strip().upper()
 26 |       if u == "LATEST":
 27 |         return cls.LATEST
 28 |       if u in ("ALL", ""):
 29 |         return cls.ALL
 30 |     raise ValueError(f"Invalid date value: '{value}'. Only 'LATEST' or"
 31 |                      f" '' (empty string) are allowed.")
 32 | 
 33 | 
 34 | class ObservationSelect(str, Enum):
 35 |   DATE = "date"
 36 |   VARIABLE = "variable"
 37 |   ENTITY = "entity"
 38 |   VALUE = "value"
 39 |   FACET = "facet"
 40 | 
 41 |   @classmethod
 42 |   def valid_values(cls):
 43 |     """Returns a list of valid enum values."""
 44 |     return sorted(cls._value2member_map_.keys())
 45 | 
 46 |   @classmethod
 47 |   def _missing_(cls, value):
 48 |     """Handle missing enum values by raising a custom error."""
 49 |     message = f"Invalid `select` Field: '{value}'. Only {', '.join(cls.valid_values())} are allowed."
 50 |     raise InvalidObservationSelectError(message=message)
 51 | 
 52 | 
 53 | class ObservationSelectList(RootModel[list[ObservationSelect]]):
 54 |   """A model to represent a list of ObservationSelect values.
 55 | 
 56 |     Attributes:
 57 |         select (List[ObservationSelect]): A list of ObservationSelect enum values.
 58 |     """
 59 | 
 60 |   root: Optional[list[ObservationSelect | str]] = None
 61 | 
 62 |   @field_validator("root", mode="before")
 63 |   def _validate_select(cls, v):
 64 |     if v is None:
 65 |       select = [
 66 |           ObservationSelect.DATE,
 67 |           ObservationSelect.VARIABLE,
 68 |           ObservationSelect.ENTITY,
 69 |           ObservationSelect.VALUE,
 70 |       ]
 71 |     else:
 72 |       select = v
 73 | 
 74 |     select = [ObservationSelect(s).value for s in select]
 75 | 
 76 |     required_select = {"variable", "entity"}
 77 | 
 78 |     missing_fields = required_select - set(select)
 79 |     if missing_fields:
 80 |       raise InvalidObservationSelectError(message=(
 81 |           f"The 'select' field must include at least the following: {', '.join(required_select)} "
 82 |           f"(missing: {', '.join(missing_fields)})"))
 83 | 
 84 |     return select
 85 | 
 86 |   @property
 87 |   def select(self) -> list[str]:
 88 |     """Return select values directly as list"""
 89 |     return self.root or []
 90 | 
 91 | 
 92 | class Observation(BaseDCModel):
 93 |   """Represents an observation with a date and value.
 94 | 
 95 |     Attributes:
 96 |         date (str): The date of the observation.
 97 |         value (float): Optional. The value of the observation.
 98 |     """
 99 | 
100 |   date: Optional[str] = None
101 |   value: Optional[float] = None
102 | 
103 | 
104 | class OrderedFacet(BaseDCModel):
105 |   """Represents ordered facets of observations.
106 | 
107 |     Attributes:
108 |         earliestDate (str): The earliest date in the observations.
109 |         facetId (str): The identifier for the facet.
110 |         latestDate (str): The latest date in the observations.
111 |         obsCount (int): The total number of observations.
112 |         observations (List[Observation]): A list of observations associated with the facet.
113 |     """
114 | 
115 |   earliestDate: Optional[str] = None
116 |   facetId: Optional[str] = None
117 |   latestDate: Optional[str] = None
118 |   obsCount: Optional[int] = None
119 |   observations: list[Observation] = Field(default_factory=list)
120 | 
121 | 
122 | class OrderedFacets(BaseDCModel):
123 |   """Represents a list of ordered facets.
124 |   """
125 |   orderedFacets: list[OrderedFacet] = Field(default_factory=list)
126 | 
127 | 
128 | class Variable(BaseDCModel):
129 |   """Represents a variable with data grouped by entity.
130 | 
131 |     Attributes:
132 |         byEntity (dict[entityDCID, OrderedFacets]): A dictionary mapping
133 |             entities to their ordered facets.
134 |     """
135 | 
136 |   byEntity: dict[entityDCID, OrderedFacets] = Field(default_factory=dict)
137 | 
138 | 
139 | class Facet(BaseDCModel):
140 |   """Represents metadata for a facet.
141 | 
142 |     Attributes:
143 |         importName (str): The name of the data import.
144 |         measurementMethod (str): The method used to measure the data.
145 |         observationPeriod (str): The period over which the observations were made.
146 |         provenanceUrl (str): The URL of the data's provenance.
147 |         unit (str): The unit of the observations.
148 |     """
149 | 
150 |   importName: Optional[str] = None
151 |   measurementMethod: Optional[str] = None
152 |   observationPeriod: Optional[str] = None
153 |   provenanceUrl: Optional[str] = None
154 |   unit: Optional[str] = None
155 | 
156 | 
157 | class ByVariable(BaseDCModel, DictLikeRootModel[dict[variableDCID, Variable]]):
158 |   """A root model whose value is a dict mapping variableDCID to Variable."""
159 | 
160 | 
161 | class VariableByEntity(BaseDCModel,
162 |                        DictLikeRootModel[dict[variableDCID,
163 |                                               dict[entityDCID,
164 |                                                    OrderedFacets]]]):
165 |   """A root model whose value is a dict mapping entityDCID to Variable."""
166 | 
167 | 
168 | class ObservationRecord(Observation, Facet):
169 |   """Represents a record of observations for a specific variable and entity.
170 | 
171 |     Attributes:
172 |         date (str): The date of the observation.
173 |         value (float): The value of the observation.
174 |     """
175 | 
176 |   entity: Optional[entityDCID] = None
177 |   variable: Optional[variableDCID] = None
178 |   facetId: Optional[facetID] = None
179 | 
180 |   _order = [
181 |       "date", "entity", "variable", "facetId", "importName",
182 |       "measurementMethod", "observationPeriod", "provenanceUrl", "unit", "value"
183 |   ]
184 | 
185 |   @model_serializer(mode="wrap")
186 |   def _reorder(self, helper):
187 |     """Reorders the fields for serialization."""
188 |     data = helper(self)
189 |     ordered = {}
190 | 
191 |     # Ensure the order of fields matches the specified order
192 |     for key in self._order:
193 |       if key in data:
194 |         ordered[key] = data.pop(key)
195 | 
196 |     # Add any remaining fields that were not in the order list
197 |     ordered.update(data)
198 | 
199 |     # Ensure the 'value' field is always at the end
200 |     if "value" in ordered:
201 |       ordered["value"] = ordered.pop("value")
202 | 
203 |     return ordered
204 | 
205 | 
206 | class ObservationRecords(BaseDCModel,
207 |                          ListLikeRootModel[list[ObservationRecord]]):
208 |   """A root model whose value is a list of ObservationRecord."""
209 | 


--------------------------------------------------------------------------------
/datacommons_client/endpoints/base.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from typing import Any, Dict, Optional
  3 | 
  4 | from datacommons_client.utils.request_handling import check_instance_is_valid
  5 | from datacommons_client.utils.request_handling import post_request
  6 | from datacommons_client.utils.request_handling import resolve_instance_url
  7 | 
  8 | 
  9 | class API:
 10 |   """Represents a configured API interface to the Data Commons API.
 11 | 
 12 |   This class handles environment setup, resolving the base URL, building headers,
 13 |   or optionally using a fully qualified URL directly. It can be used standalone
 14 |   to interact with the API or in combination with Endpoint classes.
 15 |   """
 16 | 
 17 |   def __init__(
 18 |       self,
 19 |       api_key: Optional[str] = None,
 20 |       dc_instance: Optional[str] = None,
 21 |       url: Optional[str] = None,
 22 |       surface_header_value: Optional[str] = None,
 23 |   ):
 24 |     """
 25 |     Initializes the API instance.
 26 | 
 27 |     Args:
 28 |         api_key: The API key for authentication. Defaults to None.
 29 |         dc_instance: The Data Commons instance domain. Ignored if `url` is provided.
 30 |                      Defaults to 'datacommons.org' if both `url` and `dc_instance` are None.
 31 |         url: A fully qualified URL for the base API. This may be useful if more granular control
 32 |             of the API is required (for local development, for example). If provided, dc_instance`
 33 |              should not be provided.
 34 |         surface_header_value: indicates which DC surface (MCP server, etc.) makes a call to the python library.
 35 |             If the call originated internally, this is null and we pass in "clientlib-python" as the surface header 
 36 | 
 37 |     Raises:
 38 |         ValueError: If both `dc_instance` and `url` are provided.
 39 |     """
 40 |     if dc_instance and url:
 41 |       raise ValueError("Cannot provide both `dc_instance` and `url`.")
 42 | 
 43 |     if not dc_instance and not url:
 44 |       dc_instance = "datacommons.org"
 45 | 
 46 |     if url is not None:
 47 |       # Use the given URL directly (strip trailing slash)
 48 |       self.base_url = check_instance_is_valid(url.rstrip("/"), api_key=api_key)
 49 |     else:
 50 |       # Resolve from dc_instance
 51 |       self.base_url = resolve_instance_url(dc_instance)
 52 | 
 53 |     self.headers = self.build_headers(surface_header_value=surface_header_value,
 54 |                                       api_key=api_key)
 55 | 
 56 |   def __repr__(self) -> str:
 57 |     """Returns a readable representation of the API object.
 58 | 
 59 |     Indicates the base URL and if it's authenticated.
 60 | 
 61 |     Returns:
 62 |         str: A string representation of the API object.
 63 |     """
 64 |     has_auth = " (Authenticated)" if "X-API-Key" in self.headers else ""
 65 |     return f"<API at {self.base_url}{has_auth}>"
 66 | 
 67 |   def post(self,
 68 |            payload: dict[str, Any],
 69 |            endpoint: Optional[str] = None,
 70 |            *,
 71 |            all_pages: bool = True,
 72 |            next_token: Optional[str] = None) -> Dict[str, Any]:
 73 |     """Makes a POST request using the configured API environment.
 74 | 
 75 |     If `endpoint` is provided, it will be appended to the base_url. Otherwise,
 76 |     it will just POST to the base URL.
 77 | 
 78 |     Args:
 79 |         payload: The JSON payload for the POST request.
 80 |         endpoint: An optional endpoint path to append to the base URL.
 81 |         all_pages: If True, fetch all pages of the response. If False, fetch only the first page.
 82 |             Defaults to True. Set to False to only fetch the first page. In that case, a
 83 |             `next_token` key in the response will indicate if more pages are available.
 84 |             That token can be used to fetch the next page.
 85 | 
 86 |     Returns:
 87 |         A dictionary containing the merged response data.
 88 | 
 89 |     Raises:
 90 |         ValueError: If the payload is not a valid dictionary.
 91 |     """
 92 |     if not isinstance(payload, dict):
 93 |       raise ValueError("Payload must be a dictionary.")
 94 | 
 95 |     url = (self.base_url if endpoint is None else f"{self.base_url}/{endpoint}")
 96 | 
 97 |     return post_request(url=url,
 98 |                         payload=payload,
 99 |                         headers=self.headers,
100 |                         all_pages=all_pages,
101 |                         next_token=next_token)
102 | 
103 |   def build_headers(self,
104 |                     surface_header_value: Optional[str],
105 |                     api_key: Optional[str] = None) -> dict[str, str]:
106 |     """Build request headers for API requests.
107 | 
108 |     Includes JSON content type. If an API key is provided, add it as `X-API-Key`.
109 | 
110 |     Args:
111 |         self: the API, which includes API key and surface header if available
112 | 
113 |     Returns:
114 |         A dictionary of headers for the request.
115 |     """
116 |     headers = {
117 |         "Content-Type": "application/json",
118 |         "x-surface": "clientlib-python"
119 |     }
120 |     if api_key:
121 |       headers["X-API-Key"] = api_key
122 | 
123 |     if surface_header_value:
124 |       headers["x-surface"] = surface_header_value
125 | 
126 |     return headers
127 | 
128 | 
129 | class Endpoint:
130 |   """Represents a specific endpoint within the Data Commons API.
131 | 
132 |   This class leverages an API instance to make requests. It does not
133 |   handle instance resolution or headers directly; that is delegated to the API instance.
134 | 
135 |   Attributes:
136 |       endpoint (str): The endpoint path (e.g., 'node').
137 |       api (API): The API instance providing configuration and the `post` method.
138 |   """
139 | 
140 |   def __init__(self, endpoint: str, api: API):
141 |     """
142 |     Initializes the Endpoint instance.
143 | 
144 |     Args:
145 |         endpoint: The endpoint path (e.g., 'node').
146 |         api: An API instance that provides the environment configuration.
147 |     """
148 |     self.endpoint = endpoint
149 |     self.api = api
150 | 
151 |   def __repr__(self) -> str:
152 |     """Returns a readable representation of the Endpoint object.
153 | 
154 |     Shows the endpoint and underlying API configuration.
155 | 
156 |     Returns:
157 |         str: A string representation of the Endpoint object.
158 |     """
159 |     return f"<{self.endpoint.title()} Endpoint using {repr(self.api)}>"
160 | 
161 |   def post(self,
162 |            payload: dict[str, Any],
163 |            all_pages: bool = True,
164 |            next_token: Optional[str] = None) -> Dict[str, Any]:
165 |     """Makes a POST request to the specified endpoint using the API instance.
166 | 
167 |     Args:
168 |         payload: The JSON payload for the POST request.
169 |         all_pages: If True, fetch all pages of the response. If False, fetch only the first page.
170 |             Defaults to True. Set to False to only fetch the first page. In that case, a
171 |             `next_token` key in the response will indicate if more pages are available.
172 |             That token can be used to fetch the next page.
173 |         next_token: Optionally, the token to fetch the next page of results. Defaults to None.
174 | 
175 |     Returns:
176 |         A dictionary with the merged API response data.
177 | 
178 |     Raises:
179 |         ValueError: If the payload is not a valid dictionary.
180 |     """
181 |     return self.api.post(payload=payload,
182 |                          endpoint=self.endpoint,
183 |                          all_pages=all_pages,
184 |                          next_token=next_token)
185 | 


--------------------------------------------------------------------------------
/datacommons_client/utils/data_processing.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import asdict
  2 | import json
  3 | from typing import Any, Dict, List
  4 | 
  5 | from datacommons_client.models.base import ArcLabel
  6 | from datacommons_client.models.base import facetID
  7 | from datacommons_client.models.base import NodeDCID
  8 | from datacommons_client.models.base import Property
  9 | from datacommons_client.models.node import Arcs
 10 | from datacommons_client.models.node import FlattenedArcsMapping
 11 | from datacommons_client.models.node import FlattenedPropertiesMapping
 12 | from datacommons_client.models.node import Name
 13 | from datacommons_client.models.node import Node
 14 | from datacommons_client.models.node import NodeGroup
 15 | from datacommons_client.models.node import Properties
 16 | from datacommons_client.models.observation import Facet
 17 | from datacommons_client.models.observation import ObservationRecord
 18 | from datacommons_client.models.observation import ObservationRecords
 19 | from datacommons_client.models.observation import OrderedFacets
 20 | from datacommons_client.models.observation import VariableByEntity
 21 | 
 22 | 
 23 | def unpack_arcs(arcs: Dict[ArcLabel, NodeGroup]) -> dict[Property, list[Node]]:
 24 |   """Simplify the 'arcs' structure."""
 25 |   # Return dictionary of property nodes
 26 |   return {
 27 |       prop: getattr(arc_data, "nodes", []) for prop, arc_data in arcs.items()
 28 |   }
 29 | 
 30 | 
 31 | def flatten_properties(
 32 |     data: Dict[NodeDCID, Arcs | Properties]
 33 | ) -> FlattenedPropertiesMapping | FlattenedArcsMapping:
 34 |   """
 35 |     Flatten the properties of a node response.
 36 | 
 37 |     Processes a dictionary of node responses, extracting and
 38 |     simplifying their properties and arcs into a flattened dictionary.
 39 | 
 40 |     Args:
 41 |         data (Dict[NodeDCID, Arcs | Properties]):
 42 |             The input dictionary containing node responses. Each node maps to
 43 |             a dictionary with potential "arcs" and "properties" keys.
 44 | 
 45 |     Returns:
 46 |         FlattenedPropertiesMapping | FlattenedArcsMapping:
 47 |             A flattened dictionary where keys are node identifiers, and values
 48 |             are the simplified properties or nodes.
 49 |     """
 50 |   if not data:
 51 |     return FlattenedPropertiesMapping.model_validate({})
 52 | 
 53 |   first_node = next(iter(data.values()))
 54 |   is_properties = isinstance(first_node, Properties)
 55 |   mapping_cls = FlattenedPropertiesMapping if is_properties else FlattenedArcsMapping
 56 | 
 57 |   # Store simplified properties
 58 |   items = {}
 59 |   for node_id, node_data in data.items():
 60 |     if is_properties:
 61 |       props = getattr(node_data, "properties", None)
 62 |       if props:
 63 |         items[node_id] = props
 64 |     else:
 65 |       arcs = getattr(node_data, "arcs", None)
 66 |       if arcs:
 67 |         items[node_id] = unpack_arcs(arcs)
 68 | 
 69 |   return mapping_cls.model_validate(items)
 70 | 
 71 | 
 72 | def extract_observations(
 73 |     variable: str, entity: str, entity_data: OrderedFacets,
 74 |     facet_metadata: dict[facetID, Facet]) -> list[ObservationRecord]:
 75 |   """
 76 |     Extracts observations for a given variable, entity, and its data.
 77 | 
 78 |     Args:
 79 |         variable (str): The variable name.
 80 |         entity (str): The entity name.
 81 |         entity_data (OrderedFacets): Data for the entity, including ordered facets.
 82 |         facet_metadata (dict[facetID, Facet]): Metadata for facets.
 83 | 
 84 |     Returns:
 85 |         list[dict]: A list of observation records.
 86 |     """
 87 |   observations = []
 88 |   for facet in entity_data.orderedFacets:
 89 |     for observation in facet.observations:
 90 |       observations.append(
 91 |           ObservationRecord.model_validate({
 92 |               "date": observation.date,
 93 |               "entity": entity,
 94 |               "variable": variable,
 95 |               "value": observation.value,
 96 |               "facetId": facet.facetId,
 97 |               **facet_metadata.get(facet.facetId, Facet()).to_dict(),
 98 |           }))
 99 | 
100 |   return observations
101 | 
102 | 
103 | def observations_as_records(data: VariableByEntity,
104 |                             facets: dict[facetID, Facet]) -> ObservationRecords:
105 |   """
106 |     Converts observation data into a list of records.
107 | 
108 |     Args:
109 |         data (VariableByEntity): A mapping of variables to entities and their data.
110 |         facets (dict): Facet metadata for the observations.
111 | 
112 |     Returns:
113 |         ObservationRecords: A flattened list of observation records.
114 |     """
115 | 
116 |   records = []
117 |   for variable, entities in data.items():
118 |     for entity, entity_data in entities.items():
119 |       for record in extract_observations(
120 |           variable=variable,
121 |           entity=entity,
122 |           entity_data=entity_data,
123 |           facet_metadata=facets,
124 |       ):
125 |         records.append(record)
126 | 
127 |   return ObservationRecords.model_validate(records)
128 | 
129 | 
130 | def group_variables_by_entity(
131 |     data: dict[str, list[str]]) -> dict[str, list[str]]:
132 |   """Groups variables by the entities they are associated with.
133 |     Takes a dictionary mapping statistical variable DCIDs to a list of entity DCIDs,
134 |     and returns a new dictionary mapping each entity DCID to a list of statistical
135 |     variables available for that entity.
136 |     Args:
137 |         data: A dictionary where each key is a variable DCID and the value is a list
138 |             of entity DCIDs that have observations for that variable.
139 |     Returns:
140 |         A dictionary where each key is an entity DCID and the value is a list of
141 |         variable DCIDs available for that entity.
142 |     """
143 |   result: dict[str, list[str]] = {}
144 |   for variable, entities in data.items():
145 |     for entity in entities:
146 |       result.setdefault(entity, []).append(variable)
147 |   return result
148 | 
149 | 
150 | class SerializableMixin:
151 |   """Provides serialization methods for the Response dataclasses."""
152 | 
153 |   def to_dict(self, exclude_none: bool = True) -> Dict[str, Any]:
154 |     """Converts the instance to a dictionary.
155 | 
156 |         Args:
157 |             exclude_none: If True, only include non-empty values in the response.
158 | 
159 |         Returns:
160 |             Dict[str, Any]: The dictionary representation of the instance.
161 |         """
162 | 
163 |     def _remove_none(data: Any) -> Any:
164 |       """Recursively removes None or empty values from a dictionary or list."""
165 |       if isinstance(data, dict):
166 |         return {k: _remove_none(v) for k, v in data.items() if v is not None}
167 |       elif isinstance(data, list):
168 |         return [_remove_none(item) for item in data]
169 |       return data
170 | 
171 |     result = asdict(self)
172 |     return _remove_none(result) if exclude_none else result
173 | 
174 |   def to_json(self, exclude_none: bool = True) -> str:
175 |     """Converts the instance to a JSON string.
176 | 
177 |         Args:
178 |             exclude_none: If True, only include non-empty values in the response.
179 | 
180 |         Returns:
181 |             str: The JSON string representation of the instance.
182 |         """
183 |     return json.dumps(self.to_dict(exclude_none=exclude_none), indent=2)
184 | 
185 | 
186 | def flatten_names_dictionary(names_dict: dict[str, Name]) -> dict[str, str]:
187 |   """
188 |     Flattens a dictionary which contains Name objects into a flattened dictionary
189 |     with DCIDs as keys and names as values.
190 | 
191 |     Args:
192 |         names_dict (dict[str, Name]): The input dictionary to flatten.
193 | 
194 |     Returns:
195 |         dict[str, str]: A flattened dictionary with DCIDs as keys and names as values.
196 |     """
197 | 
198 |   return {dcid: name.to_dict()['value'] for dcid, name in names_dict.items()}
199 | 


--------------------------------------------------------------------------------
/datacommons_client/endpoints/observation.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from datacommons_client.endpoints.base import API
  4 | from datacommons_client.endpoints.base import Endpoint
  5 | from datacommons_client.endpoints.payloads import ObservationRequestPayload
  6 | from datacommons_client.endpoints.response import ObservationResponse
  7 | from datacommons_client.models.observation import ObservationDate
  8 | from datacommons_client.models.observation import ObservationSelect
  9 | from datacommons_client.utils.data_processing import group_variables_by_entity
 10 | 
 11 | 
 12 | class ObservationEndpoint(Endpoint):
 13 |   """
 14 |     A class to interact with the observation API endpoint.
 15 | 
 16 |     Args:
 17 |         api (API): The API instance providing the environment configuration
 18 |             (base URL, headers, authentication) to be used for requests.
 19 |     """
 20 | 
 21 |   def __init__(self, api: API):
 22 |     """Initializes the ObservationEndpoint instance."""
 23 |     super().__init__(endpoint="observation", api=api)
 24 | 
 25 |   def fetch(
 26 |       self,
 27 |       variable_dcids: str | list[str],
 28 |       date: ObservationDate | str = ObservationDate.LATEST,
 29 |       select: Optional[list[ObservationSelect | str]] = None,
 30 |       entity_dcids: Optional[str | list[str]] = None,
 31 |       entity_expression: Optional[str] = None,
 32 |       filter_facet_domains: Optional[str | list[str]] = None,
 33 |       filter_facet_ids: Optional[str | list[str]] = None
 34 |   ) -> ObservationResponse:
 35 |     """
 36 |         Fetches data from the observation endpoint.
 37 | 
 38 |         Args:
 39 |             variable_dcids (str | list[str]): One or more variable IDs for the data.
 40 |             date (str | ObservationDate): The date for which data is being requested.
 41 |                 Defaults to the latest observation.
 42 |             select (list[ObservationSelect]): Fields to include in the response.
 43 |                 Defaults to ["date", "variable", "entity", "value"].
 44 |             entity_dcids (Optional[str | list[str]]): One or more entity IDs to filter the data.
 45 |             entity_expression (Optional[str]): A string expression to filter entities.
 46 |             filter_facet_domains (Optional[str | list[str]]): One or more domain names to filter the data.
 47 |             filter_facet_ids (Optional[str | list[str]]): One or more facet IDs to filter the data.
 48 | 
 49 |         Returns:
 50 |             ObservationResponse: The response object containing observations for the specified query.
 51 |         """
 52 |     # Construct the payload
 53 |     payload = ObservationRequestPayload(
 54 |         date=date,
 55 |         variable_dcids=variable_dcids,
 56 |         select=select,
 57 |         entity_dcids=entity_dcids,
 58 |         entity_expression=entity_expression,
 59 |         filter_facet_domains=filter_facet_domains,
 60 |         filter_facet_ids=filter_facet_ids,
 61 |     ).to_dict()
 62 | 
 63 |     response = self.post(payload=payload)
 64 | 
 65 |     # Send the request
 66 |     return ObservationResponse.model_validate(response)
 67 | 
 68 |   def fetch_observations_by_entity_type(
 69 |       self,
 70 |       date: ObservationDate | str,
 71 |       parent_entity: str,
 72 |       entity_type: str,
 73 |       variable_dcids: str | list[str],
 74 |       *,
 75 |       select: Optional[list[ObservationSelect | str]] = None,
 76 |       filter_facet_domains: Optional[str | list[str]] = None,
 77 |       filter_facet_ids: Optional[str | list[str]] = None
 78 |   ) -> ObservationResponse:
 79 |     """
 80 |         Fetches all observations for a given entity type.
 81 | 
 82 |         Args:
 83 |             date (ObservationDate | str): The date option for the observations.
 84 |                 Use 'all' for all dates, 'latest' for the most recent data,
 85 |                 or provide a date as a string (e.g., "2024").
 86 |             parent_entity (str): The parent entity under which the target entities fall.
 87 |                 For example, "africa" for African countries, or "Earth" for all countries.
 88 |             entity_type (str): The type of entities for which to fetch observations.
 89 |                 For example, "Country" or "Region".
 90 |             variable_dcids (str | list[str]): The variable(s) to fetch observations for.
 91 |                 This can be a single variable ID or a list of IDs.
 92 |             select (Optional[list[ObservationSelect | str]]): Fields to include in the response.
 93 |                 If not provided, defaults to ["date", "variable", "entity", "value"].
 94 |             filter_facet_domains: Optional[str | list[str]: One or more domain names to filter the data.
 95 |             filter_facet_ids: Optional[str | list[str]: One or more facet IDs to filter the data.
 96 | 
 97 |         Returns:
 98 |             ObservationResponse: The response object containing observations for the specified entity type.
 99 | 
100 |         Example:
101 |             To fetch all observations for African countries for a specific variable:
102 | 
103 |             ```python
104 |             api = API()
105 |             ObservationEndpoint(api).fetch_observations_by_entity_type(
106 |                 date="all",
107 |                 parent_entity="africa",
108 |                 entity_type="Country",
109 |                 variable_dcids="sdg/SI_POV_DAY1"
110 |             )
111 |             ```
112 |         """
113 | 
114 |     return self.fetch(
115 |         variable_dcids=variable_dcids,
116 |         date=date,
117 |         select=[s for s in ObservationSelect] if not select else select,
118 |         entity_expression=
119 |         f"{parent_entity}<-containedInPlace+{{typeOf:{entity_type}}}",
120 |         filter_facet_domains=filter_facet_domains,
121 |         filter_facet_ids=filter_facet_ids)
122 | 
123 |   def fetch_observations_by_entity_dcid(
124 |       self,
125 |       date: ObservationDate | str,
126 |       entity_dcids: str | list[str],
127 |       variable_dcids: str | list[str],
128 |       *,
129 |       select: Optional[list[ObservationSelect | str]] = None,
130 |       filter_facet_domains: Optional[str | list[str]] = None,
131 |       filter_facet_ids: Optional[str | list[str]] = None
132 |   ) -> ObservationResponse:
133 |     """
134 |         Fetches all observations for a given entity type.
135 | 
136 |         Args:
137 |             date (ObservationDate | str): The date option for the observations.
138 |                 Use 'all' for all dates, 'latest' for the most recent data,
139 |                 or provide a date as a string (e.g., "2024").
140 |             entity_dcids (str | list[str]): One or more entity IDs to filter the data.
141 |             variable_dcids (str | list[str]): The variable(s) to fetch observations for.
142 |                 This can be a single variable ID or a list of IDs.
143 |             select (Optional[list[ObservationSelect | str]]): Fields to include in the response.
144 |                 If not provided, defaults to ["date", "variable", "entity", "value"].
145 |             filter_facet_domains: Optional[str | list[str]: One or more domain names to filter the data.
146 |             filter_facet_ids: Optional[str | list[str]: One or more facet IDs to filter the data.
147 | 
148 |         Returns:
149 |             ObservationResponse: The response object containing observations for the specified entity type.
150 | 
151 |         Example:
152 |             To fetch all observations for Nigeria for a specific variable:
153 | 
154 |             ```python
155 |             api = API()
156 |             ObservationEndpoint(api).fetch_observations_by_entity_dcid(
157 |                 date="all",
158 |                 entity_dcids="country/NGA",
159 |                 variable_dcids="sdg/SI_POV_DAY1"
160 |             )
161 |             ```
162 |         """
163 | 
164 |     return self.fetch(
165 |         variable_dcids=variable_dcids,
166 |         date=date,
167 |         select=[s for s in ObservationSelect] if not select else select,
168 |         entity_dcids=entity_dcids,
169 |         filter_facet_domains=filter_facet_domains,
170 |         filter_facet_ids=filter_facet_ids)
171 | 
172 |   def fetch_available_statistical_variables(
173 |       self,
174 |       entity_dcids: str | list[str],
175 |   ) -> dict[str, list[str]]:
176 |     """
177 |         Fetches available statistical variables (which have observations) for given entities.
178 |         Args:
179 |             entity_dcids (str | list[str]): One or more entity DCIDs(s) to fetch variables for.
180 |         Returns:
181 |             dict[str, list[str]]: A dictionary mapping entity DCIDs to their available statistical variables.
182 |         """
183 | 
184 |     # Fetch observations for the given entity DCIDs. If variable is empty list
185 |     # all available variables are retrieved.
186 |     data = self.fetch(
187 |         entity_dcids=entity_dcids,
188 |         select=[ObservationSelect.VARIABLE, ObservationSelect.ENTITY],
189 |         variable_dcids=[]).get_data_by_entity()
190 | 
191 |     return group_variables_by_entity(data=data)
192 | 


--------------------------------------------------------------------------------
/datacommons/test/node_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import unittest
 16 | from unittest.mock import patch
 17 | 
 18 | import datacommons
 19 | 
 20 | 
 21 | class TestProperties(unittest.TestCase):
 22 | 
 23 |   @patch("datacommons.node._post")
 24 |   def test_with_data(self, _post):
 25 | 
 26 |     def side_effect(path, data):
 27 |       if path == "/v2/node" and data == {
 28 |           "nodes": ["City", "Count_Person", "foo"],
 29 |           "property": "->"
 30 |       }:
 31 |         return {
 32 |             "data": {
 33 |                 "City": {
 34 |                     "properties": [
 35 |                         "name", "provenance", "subClassOf", "typeOf"
 36 |                     ]
 37 |                 },
 38 |                 "Count_Person": {
 39 |                     "properties": [
 40 |                         "description", "measuredProperty", "memberOf", "name",
 41 |                         "populationType", "provenance", "statType", "typeOf"
 42 |                     ]
 43 |                 },
 44 |                 "foo": {}
 45 |             }
 46 |         }
 47 | 
 48 |     _post.side_effect = side_effect
 49 |     response = datacommons.properties(["City", "Count_Person", "foo"])
 50 |     assert response == {
 51 |         "City": ["name", "provenance", "subClassOf", "typeOf"],
 52 |         "Count_Person": [
 53 |             "description", "measuredProperty", "memberOf", "name",
 54 |             "populationType", "provenance", "statType", "typeOf"
 55 |         ],
 56 |         "foo": []
 57 |     }
 58 | 
 59 |   @patch("datacommons.node._post")
 60 |   def test_with_direction(self, _post):
 61 | 
 62 |     def side_effect(path, data):
 63 |       if path == "/v2/node" and data == {
 64 |           "nodes": ["City", "Count_Person", "foo"],
 65 |           "property": "<-"
 66 |       }:
 67 |         return {
 68 |             "data": {
 69 |                 "City": {
 70 |                     "properties": [
 71 |                         "placeType", "rangeIncludes", "schoolLocationType",
 72 |                         "typeOf"
 73 |                     ]
 74 |                 },
 75 |                 "Count_Person": {
 76 |                     "properties": [
 77 |                         "measurementDenominator", "outputProperty",
 78 |                         "relevantVariable"
 79 |                     ]
 80 |                 },
 81 |                 "foo": {}
 82 |             }
 83 |         }
 84 | 
 85 |     _post.side_effect = side_effect
 86 |     response = datacommons.properties(["City", "Count_Person", "foo"],
 87 |                                       is_out=False)
 88 |     assert response == {
 89 |         "City": ["placeType", "rangeIncludes", "schoolLocationType", "typeOf"],
 90 |         "Count_Person": [
 91 |             "measurementDenominator", "outputProperty", "relevantVariable"
 92 |         ],
 93 |         "foo": []
 94 |     }
 95 | 
 96 | 
 97 | class TestPropertyValues(unittest.TestCase):
 98 | 
 99 |   @patch("datacommons.node._post")
100 |   def test_with_data(self, _post):
101 | 
102 |     def side_effect(path, data):
103 |       print(path)
104 |       if path == "/v1/bulk/property/values/out" and data == {
105 |           "nodes": ["geoId/06"],
106 |           "property": "name",
107 |       }:
108 |         return {
109 |             "data": [{
110 |                 "node":
111 |                     "geoId/06",
112 |                 "values": [{
113 |                     "provenanceId": "dc/5n63hr1",
114 |                     "value": "California"
115 |                 }]
116 |             }]
117 |         }
118 | 
119 |     _post.side_effect = side_effect
120 |     response = datacommons.property_values(["geoId/06"], "name")
121 |     assert response == {"geoId/06": ["California"]}
122 | 
123 |   @patch("datacommons.node._post")
124 |   def test_multiple_values(self, _post):
125 | 
126 |     def side_effect(path, data):
127 |       print(path)
128 |       if path == "/v1/bulk/property/values/out" and data == {
129 |           "nodes": ["geoId/06"],
130 |           "property": "geoOverlaps",
131 |       }:
132 |         return {
133 |             "data": [{
134 |                 "node":
135 |                     "geoId/06",
136 |                 "values": [{
137 |                     "provenanceId": "dc/5n63hr1",
138 |                     "value": "geoId/05"
139 |                 }, {
140 |                     "provenanceId": "dc/5n63hr1",
141 |                     "value": "geoId/07"
142 |                 }]
143 |             }]
144 |         }
145 | 
146 |     _post.side_effect = side_effect
147 |     response = datacommons.property_values(["geoId/06"], "geoOverlaps")
148 |     assert response == {"geoId/06": ["geoId/05", "geoId/07"]}
149 | 
150 | 
151 | class TestTriples(unittest.TestCase):
152 | 
153 |   @patch("datacommons.node._post")
154 |   def test_with_data(self, _post):
155 | 
156 |     def side_effect(path, data):
157 |       print(path)
158 |       if path == "/v1/bulk/triples/out" and data == {
159 |           "nodes": ["Class"],
160 |       }:
161 |         return {
162 |             "data": [{
163 |                 "node": "Class",
164 |                 "triples": {
165 |                     "typeOf": {
166 |                         "nodes": [{
167 |                             "name": "Class",
168 |                             "types": ["Class"],
169 |                             "dcid": "Class",
170 |                             "provenanceId": "dc/5l5zxr1"
171 |                         }, {
172 |                             "name": "Class",
173 |                             "types": ["Class"],
174 |                             "dcid": "Class",
175 |                             "provenanceId": "dc/5l5zxr1"
176 |                         }]
177 |                     },
178 |                     "isPartOf": {
179 |                         "nodes": [{
180 |                             "provenanceId": "dc/5l5zxr1",
181 |                             "value": "http://meta.schema.org"
182 |                         }]
183 |                     },
184 |                     "name": {
185 |                         "nodes": [{
186 |                             "provenanceId": "dc/5l5zxr1",
187 |                             "value": "Class"
188 |                         }]
189 |                     },
190 |                     "provenance": {
191 |                         "nodes": [{
192 |                             "name": "BaseSchema",
193 |                             "types": ["Provenance"],
194 |                             "dcid": "dc/5l5zxr1",
195 |                             "provenanceId": "dc/5l5zxr1"
196 |                         }]
197 |                     },
198 |                     "sameAs": {
199 |                         "nodes": [{
200 |                             "provenanceId": "dc/5l5zxr1",
201 |                             "value": "http://www.w3.org/2000/01/rdf-schema"
202 |                         }]
203 |                     },
204 |                     "subClassOf": {
205 |                         "nodes": [{
206 |                             "name": "Intangible",
207 |                             "types": ["Class"],
208 |                             "dcid": "Intangible",
209 |                             "provenanceId": "dc/5l5zxr1"
210 |                         }]
211 |                     }
212 |                 }
213 |             }]
214 |         }
215 | 
216 |     _post.side_effect = side_effect
217 |     response = datacommons.triples(["Class"])
218 |     assert response == {
219 |         "Class": {
220 |             'isPartOf': [{
221 |                 'provenanceId': 'dc/5l5zxr1',
222 |                 'value': 'http://meta.schema.org'
223 |             }],
224 |             'name': [{
225 |                 'provenanceId': 'dc/5l5zxr1',
226 |                 'value': 'Class'
227 |             }],
228 |             'provenance': [{
229 |                 'dcid': 'dc/5l5zxr1',
230 |                 'name': 'BaseSchema',
231 |                 'provenanceId': 'dc/5l5zxr1',
232 |                 'types': ['Provenance']
233 |             }],
234 |             'sameAs': [{
235 |                 'provenanceId': 'dc/5l5zxr1',
236 |                 'value': 'http://www.w3.org/2000/01/rdf-schema'
237 |             }],
238 |             'subClassOf': [{
239 |                 'dcid': 'Intangible',
240 |                 'name': 'Intangible',
241 |                 'provenanceId': 'dc/5l5zxr1',
242 |                 'types': ['Class']
243 |             }],
244 |             'typeOf': [{
245 |                 'dcid': 'Class',
246 |                 'name': 'Class',
247 |                 'provenanceId': 'dc/5l5zxr1',
248 |                 'types': ['Class']
249 |             }, {
250 |                 'dcid': 'Class',
251 |                 'name': 'Class',
252 |                 'provenanceId': 'dc/5l5zxr1',
253 |                 'types': ['Class']
254 |             }]
255 |         },
256 |     }
257 | 


--------------------------------------------------------------------------------
/datacommons/core.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """ Data Commons Python API Core.
 15 | 
 16 | Provides primitive operations for working with collections of nodes. For a
 17 | collection of nodes identified by their dcids, this submodule implements the
 18 | following:
 19 | 
 20 | - Getting all property labels
 21 | - Getting all property values
 22 | - Getting all triples
 23 | """
 24 | 
 25 | from __future__ import absolute_import
 26 | from __future__ import division
 27 | from __future__ import print_function
 28 | 
 29 | from collections import defaultdict
 30 | 
 31 | import datacommons.utils as utils
 32 | 
 33 | # ----------------------------- WRAPPER FUNCTIONS -----------------------------
 34 | 
 35 | 
 36 | def get_property_labels(dcids, out=True):
 37 |   """ Returns the labels of properties defined for the given :code:`dcids`.
 38 | 
 39 |   Args:
 40 |     dcids (:obj:`iterable` of :obj:`str`): A list of nodes identified by their
 41 |       dcids.
 42 |     out (:obj:`bool`, optional): Whether or not the property points away from
 43 |       the given list of nodes.
 44 | 
 45 |   Returns:
 46 |     A :obj:`dict` mapping dcids to lists of property labels. If `out` is `True`,
 47 |     then property labels correspond to edges directed away from given nodes.
 48 |     Otherwise, they correspond to edges directed towards the given nodes.
 49 | 
 50 |   Raises:
 51 |     ValueError: If the payload returned by the Data Commons REST API is
 52 |       malformed.
 53 | 
 54 |   Examples:
 55 |     To get all outgoing property labels for
 56 |     `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_ and
 57 |     `Colorado <https://browser.datacommons.org/kg?dcid=geoId/08>`_, we can write
 58 |     the following.
 59 | 
 60 |     >>> get_property_labels(['geoId/06', 'geoId/08'])
 61 |     {
 62 |       "geoId/06": [
 63 |         "containedInPlace",
 64 |         "geoId",
 65 |         "kmlCoordinates",
 66 |         "name",
 67 |         "provenance",
 68 |         "typeOf"
 69 |       ],
 70 |       "geoId/08",: [
 71 |         "containedInPlace",
 72 |         "geoId",
 73 |         "kmlCoordinates",
 74 |         "name",
 75 |         "provenance",
 76 |         "typeOf"
 77 |       ]
 78 |     }
 79 | 
 80 |     We can also get incoming property labels by setting `out=False`.
 81 | 
 82 |     >>> get_property_labels(['geoId/06', 'geoId/08'], out=False)
 83 |     {
 84 |       "geoId/06": [
 85 |         "addressRegion",
 86 |         "containedInPlace",
 87 |         "location",
 88 |         "overlapsWith"
 89 |       ],
 90 |       "geoId/08",: [
 91 |         "addressRegion",
 92 |         "containedInPlace",
 93 |         "location",
 94 |         "overlapsWith"
 95 |       ]
 96 |     }
 97 |   """
 98 |   # Generate the GetProperty query and send the request
 99 |   dcids = filter(lambda v: v == v, dcids)  # Filter out NaN values
100 |   dcids = list(dcids)
101 |   url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_labels']
102 |   payload = utils._send_request(url, req_json={'dcids': dcids})
103 | 
104 |   # Return the results based on the orientation
105 |   results = {}
106 |   for dcid in dcids:
107 |     if out:
108 |       results[dcid] = payload[dcid]['outLabels']
109 |     else:
110 |       results[dcid] = payload[dcid]['inLabels']
111 |   return results
112 | 
113 | 
114 | def get_property_values(dcids,
115 |                         prop,
116 |                         out=True,
117 |                         value_type=None,
118 |                         limit=utils._MAX_LIMIT):
119 |   """ Returns property values of given :code:`dcids` along the given property.
120 | 
121 |   Args:
122 |     dcids (:obj:`iterable` of :obj:`str`): dcids to get property values for.
123 |     prop (:obj:`str`): The property to get property values for.
124 |     out (:obj:`bool`, optional): A flag that indicates the property is directed
125 |       away from the given nodes when set to true.
126 |     value_type (:obj:`str`, optional): A type to filter returned property values
127 |       by.
128 |     limit (:obj:`int`, optional): The maximum number of property values returned
129 |       aggregated over all given nodes.
130 | 
131 |   Returns:
132 |     Returned property values are formatted as a :obj:`dict` from a given dcid
133 |     to a list of its property values.
134 | 
135 |   Raises:
136 |     ValueError: If the payload returned by the Data Commons REST API is
137 |       malformed.
138 | 
139 |   Examples:
140 |     We would like to get the `name` of a list of states specified by their dcid:
141 |     `geoId/06 <https://browser.datacommons.org/kg?dcid=geoId/06>`_,
142 |     `geoId/21 <https://browser.datacommons.org/kg?dcid=geoId/21>`_, and
143 |     `geoId/24 <https://browser.datacommons.org/kg?dcid=geoId/24>`_
144 | 
145 |     First, let's try specifying the :code:`dcids` as a :obj:`list` of
146 |     :obj:`str`.
147 | 
148 |     >>> get_property_values(["geoId/06", "geoId/21", "geoId/24"], "name")
149 |     {
150 |       "geoId/06": ["California"],
151 |       "geoId/21": ["Kentucky"],
152 |       "geoId/24": ["Maryland"],
153 |     }
154 |   """
155 |   # Convert the dcids field and format the request to GetPropertyValue
156 |   dcids = filter(lambda v: v == v, dcids)  # Filter out NaN values
157 |   dcids = list(dcids)
158 |   if out:
159 |     direction = 'out'
160 |   else:
161 |     direction = 'in'
162 | 
163 |   req_json = {
164 |       'dcids': dcids,
165 |       'property': prop,
166 |       'limit': limit,
167 |       'direction': direction
168 |   }
169 |   if value_type:
170 |     req_json['value_type'] = value_type
171 | 
172 |   # Send the request
173 |   url = utils._API_ROOT + utils._API_ENDPOINTS['get_property_values']
174 |   payload = utils._send_request(url, req_json=req_json)
175 | 
176 |   # Create the result format for when dcids is provided as a list.
177 |   unique_results = defaultdict(set)
178 |   for dcid in dcids:
179 |     # Get the list of nodes based on the direction given.
180 |     nodes = []
181 |     if out:
182 |       if dcid in payload and 'out' in payload[dcid]:
183 |         nodes = payload[dcid]['out']
184 |     else:
185 |       if dcid in payload and 'in' in payload[dcid]:
186 |         nodes = payload[dcid]['in']
187 | 
188 |     # Add nodes to unique_results if it is not empty
189 |     for node in nodes:
190 |       if 'dcid' in node:
191 |         unique_results[dcid].add(node['dcid'])
192 |       elif 'value' in node:
193 |         unique_results[dcid].add(node['value'])
194 | 
195 |   # Make sure each dcid is in the results dict, and convert all sets to lists.
196 |   results = {dcid: sorted(list(unique_results[dcid])) for dcid in dcids}
197 | 
198 |   return results
199 | 
200 | 
201 | def get_triples(dcids, limit=utils._MAX_LIMIT):
202 |   """ Returns all triples associated with the given :code:`dcids`.
203 | 
204 |   A knowledge graph can be described as a collection of `triples` which are
205 |   3-tuples that take the form `(s, p, o)`. Here `s` and `o` are nodes in the
206 |   graph called the *subject* and *object* respectively while `p` is the property
207 |   label of a directed edge from `s` to `o` (sometimes also called the
208 |   *predicate*).
209 | 
210 |   Args:
211 |     dcids (:obj:`iterable` of :obj:`str`): A list of dcids to get triples for.
212 |     limit (:obj:`int`, optional): The maximum total number of triples to get.
213 | 
214 |   Returns:
215 |     A :obj:`dict` mapping dcids to a :obj:`list` of triples `(s, p, o)` where
216 |     `s`, `p`, and `o` are instances of :obj:`str` and either the subject
217 |     or object is the mapped dcid.
218 | 
219 |   Raises:
220 |     ValueError: If the payload returned by the Data Commons REST API is
221 |       malformed.
222 | 
223 |   Examples:
224 |     We would like to get five triples associated with
225 |     `California <https://browser.datacommons.org/kg?dcid=geoId/06>`_
226 | 
227 |     >>> get_triples(["geoId/06"], limit=5)
228 |     {
229 |       "geoId/06": [
230 |         ("geoId/06", "name", "California"),
231 |         ("geoId/06", "typeOf", "State"),
232 |         ("geoId/06", "geoId", "06"),
233 |         ("geoId/0687056", "containedInPlace", "geoId/06"),
234 |         ("geoId/0686440", "containedInPlace", "geoId/06")
235 |       ]
236 |     }
237 |   """
238 |   # Generate the GetTriple query and send the request.
239 |   dcids = filter(lambda v: v == v, dcids)  # Filter out NaN values
240 |   dcids = list(dcids)
241 |   url = utils._API_ROOT + utils._API_ENDPOINTS['get_triples']
242 |   payload = utils._send_request(url, req_json={'dcids': dcids, 'limit': limit})
243 | 
244 |   # Create a map from dcid to list of triples.
245 |   results = defaultdict(list)
246 |   for dcid in dcids:
247 |     # Make sure each dcid is mapped to an empty list.
248 |     results[dcid]
249 | 
250 |     # Add triples as appropriate
251 |     for t in payload[dcid]:
252 |       if 'objectId' in t:
253 |         results[dcid].append((t['subjectId'], t['predicate'], t['objectId']))
254 |       elif 'objectValue' in t:
255 |         results[dcid].append((t['subjectId'], t['predicate'], t['objectValue']))
256 |   return dict(results)
257 | 


--------------------------------------------------------------------------------
/datacommons_client/client.py:
--------------------------------------------------------------------------------
  1 | from typing import Literal, Optional
  2 | 
  3 | from datacommons_client.endpoints.base import API
  4 | from datacommons_client.endpoints.node import NodeEndpoint
  5 | from datacommons_client.endpoints.observation import ObservationEndpoint
  6 | from datacommons_client.endpoints.resolve import ResolveEndpoint
  7 | from datacommons_client.models.observation import ObservationDate
  8 | from datacommons_client.utils.dataframes import add_entity_names_to_observations_dataframe
  9 | from datacommons_client.utils.dataframes import add_property_constraints_to_observations_dataframe
 10 | from datacommons_client.utils.decorators import requires_pandas
 11 | from datacommons_client.utils.error_handling import NoDataForPropertyError
 12 | 
 13 | try:
 14 |   import pandas as pd
 15 | except ImportError:
 16 |   pd = None
 17 | 
 18 | 
 19 | class DataCommonsClient:
 20 |   """
 21 |     A client for interacting with the Data Commons API.
 22 | 
 23 |     This class provides convenient access to the V2 Data Commons API endpoints.
 24 | 
 25 |     Attributes:
 26 |         api (API): An instance of the API class that handles requests.
 27 |         node (NodeEndpoint): Provides access to node-related queries, such as fetching property labels
 28 |             and values for individual or multiple nodes in the Data Commons knowledge graph.
 29 |         observation (ObservationEndpoint): Handles observation-related queries, allowing retrieval of
 30 |             statistical observations associated with entities, variables, and dates (e.g., GDP of California in 2010).
 31 |         resolve (ResolveEndpoint): Manages resolution queries to find different DCIDs for entities.
 32 | 
 33 |     """
 34 | 
 35 |   def __init__(self,
 36 |                api_key: Optional[str] = None,
 37 |                *,
 38 |                dc_instance: Optional[str] = "datacommons.org",
 39 |                url: Optional[str] = None,
 40 |                surface_header_value: Optional[str] = None):
 41 |     """
 42 |         Initializes the DataCommonsClient.
 43 | 
 44 |         Args:
 45 |             api_key (Optional[str]): The API key for authentication. Defaults to None. Note that
 46 |                 custom DC instances do not currently require an API key.
 47 |             dc_instance (Optional[str]): The Data Commons instance to use. Defaults to "datacommons.org".
 48 |             url (Optional[str]): A custom, fully resolved URL for the Data Commons API. Defaults to None.
 49 |         """
 50 |     # If a fully resolved URL is provided, and the default dc_instance is used,
 51 |     # ignore that default value
 52 |     if dc_instance == "datacommons.org" and url:
 53 |       dc_instance = None
 54 | 
 55 |     # Create an instance of the API class which will be injected to the endpoints
 56 |     self.api = API(api_key=api_key,
 57 |                    dc_instance=dc_instance,
 58 |                    url=url,
 59 |                    surface_header_value=surface_header_value)
 60 | 
 61 |     # Create instances of the endpoints
 62 |     self.node = NodeEndpoint(api=self.api)
 63 |     self.observation = ObservationEndpoint(api=self.api)
 64 |     self.resolve = ResolveEndpoint(api=self.api)
 65 | 
 66 |   def _find_filter_facet_ids(
 67 |       self,
 68 |       fetch_by: Literal["entity", "entity_type"],
 69 |       date: ObservationDate | str,
 70 |       variable_dcids: str | list[str],
 71 |       entity_dcids: Literal["all"] | list[str] = "all",
 72 |       entity_type: Optional[str] = None,
 73 |       parent_entity: Optional[str] = None,
 74 |       property_filters: Optional[dict[str, str | list[str]]] = None,
 75 |   ) -> list[str] | None:
 76 |     """Finds matching facet IDs for property filters.
 77 | 
 78 |         Args:
 79 |             fetch_by (Literal["entity", "entity_type"]): Determines whether to fetch by entity or entity type.
 80 |             variable_dcids (str | list[str]): The variable DCIDs for which to retrieve facet IDs.
 81 |             entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs, or "all" if filtering by entity type.
 82 |             entity_type (Optional[str]): The entity type, required if fetching by entity type.
 83 |             parent_entity (Optional[str]): The parent entity, used when fetching by entity type.
 84 |             property_filters (Optional[dict[str, str | list[str]]): A dictionary of properties to match facets against.
 85 | 
 86 |         Returns:
 87 |             list[str] | None: A list of matching facet IDs, or None if no filters are applied.
 88 |         """
 89 | 
 90 |     if not property_filters:
 91 |       return None
 92 | 
 93 |     if fetch_by == "entity":
 94 |       observations = self.observation.fetch_observations_by_entity_dcid(
 95 |           date=date,
 96 |           entity_dcids=entity_dcids,
 97 |           variable_dcids=variable_dcids,
 98 |           select=["variable", "entity", "facet"],
 99 |       )
100 |     else:
101 |       observations = self.observation.fetch_observations_by_entity_type(
102 |           date=date,
103 |           entity_type=entity_type,
104 |           parent_entity=parent_entity,
105 |           variable_dcids=variable_dcids,
106 |           select=["variable", "entity", "facet"],
107 |       )
108 | 
109 |     facet_sets = [
110 |         observations.find_matching_facet_id(property_name=p, value=v)
111 |         for p, v in property_filters.items()
112 |     ]
113 | 
114 |     facet_ids = list({facet for facets in facet_sets for facet in facets})
115 | 
116 |     return facet_ids
117 | 
118 |   @requires_pandas
119 |   def observations_dataframe(
120 |       self,
121 |       variable_dcids: str | list[str],
122 |       date: ObservationDate | str,
123 |       entity_dcids: Literal["all"] | list[str] = "all",
124 |       entity_type: Optional[str] = None,
125 |       parent_entity: Optional[str] = None,
126 |       property_filters: Optional[dict[str, str | list[str]]] = None,
127 |       include_constraints_metadata: bool = False,
128 |   ):
129 |     """
130 |         Fetches statistical observations and returns them as a Pandas DataFrame.
131 | 
132 |         The Observation API fetches statistical observations linked to entities and variables
133 |         at a particular date (e.g., "population of USA in 2020", "GDP of California in 2010").
134 | 
135 |         Args:
136 |         variable_dcids (str | list[str]): One or more variable DCIDs for the observation.
137 |         date (ObservationDate | str): The date for which observations are requested. It can be
138 |             a specific date, "all" to retrieve all observations, or "latest" to get the most recent observations.
139 |         entity_dcids (Literal["all"] | list[str], optional): The entity DCIDs for which to retrieve data.
140 |             Defaults to "all".
141 |         entity_type (Optional[str]): The type of entities to filter by when `entity_dcids="all"`.
142 |             Required if `entity_dcids="all"`. Defaults to None.
143 |         parent_entity (Optional[str]): The parent entity under which the target entities fall.
144 |             Required if `entity_dcids="all"`. Defaults to None.
145 |         property_filters (Optional[dict[str, str | list[str]]): An optional dictionary used to filter
146 |             the data by using observation properties like `measurementMethod`, `unit`, or `observationPeriod`.
147 |         include_constraints_metadata (bool): If True, includes the dcid and name of any constraint
148 |             properties associated with the variable DCIDs (based on the `constraintProperties` property)
149 |             in the returned DataFrame. Defaults to False.
150 | 
151 |         Returns:
152 |             pd.DataFrame: A DataFrame containing the requested observations.
153 |         """
154 | 
155 |     if entity_dcids == "all" and not (entity_type and parent_entity):
156 |       raise ValueError(
157 |           "When 'entity_dcids' is 'all', both 'parent_entity' and 'entity_type' must be specified."
158 |       )
159 | 
160 |     if entity_dcids != "all" and (entity_type or parent_entity):
161 |       raise ValueError(
162 |           "Specify 'entity_type' and 'parent_entity' only when 'entity_dcids' is 'all'."
163 |       )
164 | 
165 |     # If property filters are provided, fetch the required facet IDs. Otherwise, set to None.
166 |     facets = self._find_filter_facet_ids(
167 |         fetch_by="entity" if entity_dcids != "all" else "entity_type",
168 |         date=date,
169 |         variable_dcids=variable_dcids,
170 |         entity_dcids=entity_dcids,
171 |         entity_type=entity_type,
172 |         parent_entity=parent_entity,
173 |         property_filters=property_filters,
174 |     )
175 | 
176 |     if not facets and property_filters:
177 |       raise NoDataForPropertyError
178 | 
179 |     if entity_dcids == "all":
180 |       observations = self.observation.fetch_observations_by_entity_type(
181 |           date=date,
182 |           parent_entity=parent_entity,
183 |           entity_type=entity_type,
184 |           variable_dcids=variable_dcids,
185 |           filter_facet_ids=facets,
186 |       )
187 |     else:
188 |       observations = self.observation.fetch_observations_by_entity_dcid(
189 |           date=date,
190 |           entity_dcids=entity_dcids,
191 |           variable_dcids=variable_dcids,
192 |           filter_facet_ids=facets,
193 |       )
194 | 
195 |     # Convert the observations to a DataFrame
196 |     df = pd.DataFrame(observations.to_observation_records().model_dump())
197 | 
198 |     # Add entity names to the DataFrame
199 |     df = add_entity_names_to_observations_dataframe(
200 |         endpoint=self.node,
201 |         observations_df=df,
202 |         entity_columns=["entity", "variable"],
203 |     )
204 | 
205 |     if include_constraints_metadata:
206 |       df = add_property_constraints_to_observations_dataframe(
207 |           endpoint=self.node,
208 |           observations_df=df,
209 |       )
210 | 
211 |     return df
212 | 


--------------------------------------------------------------------------------
/datacommons/stat_vars.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Google Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #   http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """Data Commons Python API Stat Module.
 15 | 
 16 | Provides functions for getting data on StatisticalVariables from Data Commons Graph.
 17 | """
 18 | 
 19 | from __future__ import absolute_import
 20 | from __future__ import division
 21 | from __future__ import print_function
 22 | 
 23 | import collections
 24 | 
 25 | import six
 26 | 
 27 | import datacommons.utils as utils
 28 | 
 29 | # stat_var specific batch size.
 30 | _STAT_BATCH_SIZE = 2000
 31 | 
 32 | 
 33 | def get_stat_value(place,
 34 |                    stat_var,
 35 |                    date=None,
 36 |                    measurement_method=None,
 37 |                    observation_period=None,
 38 |                    unit=None,
 39 |                    scaling_factor=None):
 40 |   """Returns a value for `place` based on the `stat_var`.
 41 | 
 42 |   Args:
 43 |     place (`str`): The dcid of Place to query for.
 44 |     stat_var (`str`): The dcid of the StatisticalVariable.
 45 |     date (`str`): Optional, the preferred date of observation
 46 |       in ISO 8601 format. If not specified, returns the latest observation.
 47 |     measurement_method (`str`): Optional, the dcid of the preferred
 48 |       `measurementMethod` value.
 49 |     observation_period (`str`): Optional, the preferred
 50 |       `observationPeriod` value.
 51 |     unit (`str`): Optional, the dcid of the preferred `unit` value.
 52 |     scaling_factor (`int`): Optional, the preferred `scalingFactor` value.
 53 |   Returns:
 54 |     A `float` the value of `stat_var` for `place`, filtered
 55 |     by optional args. If no data, returns nan.
 56 | 
 57 |   Raises:
 58 |     ValueError: If the payload returned by the Data Commons REST API is
 59 |       malformed.
 60 | 
 61 |   Examples:
 62 |     >>> get_stat_value("geoId/05", "Count_Person")
 63 |         366331
 64 |   """
 65 |   url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_value']
 66 |   url += '?place={}&stat_var={}'.format(place, stat_var)
 67 |   if date:
 68 |     url += '&date={}'.format(date)
 69 |   if measurement_method:
 70 |     url += '&measurement_method={}'.format(measurement_method)
 71 |   if observation_period:
 72 |     url += '&observation_period={}'.format(observation_period)
 73 |   if unit:
 74 |     url += '&unit={}'.format(unit)
 75 |   if scaling_factor:
 76 |     url += '&scaling_factor={}'.format(scaling_factor)
 77 | 
 78 |   try:
 79 |     res_json = utils._send_request(url, post=False, use_payload=False)
 80 |   except ValueError:
 81 |     return float('nan')
 82 |   if 'value' not in res_json:
 83 |     return float('nan')
 84 |   return res_json['value']
 85 | 
 86 | 
 87 | def get_stat_series(place,
 88 |                     stat_var,
 89 |                     measurement_method=None,
 90 |                     observation_period=None,
 91 |                     unit=None,
 92 |                     scaling_factor=None):
 93 |   """Returns a `dict` mapping dates to value of `stat_var` for `place`.
 94 | 
 95 |   Args:
 96 |     place (`str`): The dcid of Place to query for.
 97 |     stat_var (`str`): The dcid of the StatisticalVariable.
 98 |     measurement_method (`str`): Optional, the dcid of the preferred
 99 |       `measurementMethod` value.
100 |     observation_period (`str`): Optional, the preferred
101 |       `observationPeriod` value.
102 |     unit (`str`): Optional, the dcid of the preferred `unit` value.
103 |     scaling_factor (`int`): Optional, the preferred `scalingFactor` value.
104 |   Returns:
105 |     A `dict` mapping dates to value of `stat_var` for `place`,
106 |     representing a time series that satisfies all input parameters.
107 | 
108 |   Raises:
109 |     ValueError: If the payload returned by the Data Commons REST API is
110 |       malformed.
111 | 
112 |   Examples:
113 |     >>> get_stat_series("geoId/05", "Count_Person")
114 |         {"1962":17072000,"2009":36887615,"1929":5531000,"1930":5711000}
115 |   """
116 |   url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_series']
117 |   url += '?place={}&stat_var={}'.format(place, stat_var)
118 |   if measurement_method:
119 |     url += '&measurement_method={}'.format(measurement_method)
120 |   if observation_period:
121 |     url += '&observation_period={}'.format(observation_period)
122 |   if unit:
123 |     url += '&unit={}'.format(unit)
124 |   if scaling_factor:
125 |     url += '&scaling_factor={}'.format(scaling_factor)
126 | 
127 |   try:
128 |     res_json = utils._send_request(url, post=False, use_payload=False)
129 |   except ValueError:
130 |     return {}
131 | 
132 |   if 'series' not in res_json:
133 |     return {}
134 |   return res_json['series']
135 | 
136 | 
137 | def get_stat_all(places, stat_vars):
138 |   """Returns a nested `dict` of all time series for `places` and `stat_vars`.
139 | 
140 |   Args:
141 |     places (`Iterable` of `str`): The dcids of Places to query for.
142 |     stat_vars (`Iterable` of `str`): The dcids of the StatisticalVariables.
143 |   Returns:
144 |     A nested `dict` mapping Places to StatisticalVariables and all available
145 |     time series for each Place and StatisticalVariable pair.
146 | 
147 |   Raises:
148 |     ValueError: If the payload returned by the Data Commons REST API is
149 |       malformed.
150 | 
151 |   Examples:
152 |     >>> get_stat_all(["geoId/05", "geoId/06"], ["Count_Person", "Count_Person_Male"])
153 |     {
154 |       "geoId/05": {
155 |         "Count_Person": {
156 |           "sourceSeries": [
157 |             {
158 |               "val": {
159 |                 "2010": 1633,
160 |                 "2011": 1509,
161 |                 "2012": 1581,
162 |               },
163 |               "observationPeriod": "P1Y",
164 |               "importName": "Wikidata",
165 |               "provenanceDomain": "wikidata.org"
166 |             },
167 |             {
168 |               "val": {
169 |                 "2010": 1333,
170 |                 "2011": 1309,
171 |                 "2012": 131,
172 |               },
173 |               "observationPeriod": "P1Y",
174 |               "importName": "CensusPEPSurvey",
175 |               "provenanceDomain": "census.gov"
176 |             }
177 |           ],
178 |           }
179 |         },
180 |         "Count_Person_Male": {
181 |           "sourceSeries": [
182 |             {
183 |               "val": {
184 |                 "2010": 1633,
185 |                 "2011": 1509,
186 |                 "2012": 1581,
187 |               },
188 |               "observationPeriod": "P1Y",
189 |               "importName": "CensusPEPSurvey",
190 |               "provenanceDomain": "census.gov"
191 |             }
192 |           ],
193 |         }
194 |       },
195 |       "geoId/02": {
196 |         "Count_Person": {},
197 |         "Count_Person_Male": {
198 |             "sourceSeries": [
199 |               {
200 |                 "val": {
201 |                   "2010": 13,
202 |                   "2011": 13,
203 |                   "2012": 322,
204 |                 },
205 |                 "observationPeriod": "P1Y",
206 |                 "importName": "CensusPEPSurvey",
207 |                 "provenanceDomain": "census.gov"
208 |               }
209 |             ]
210 |           }
211 |       }
212 |     }
213 |   """
214 |   url = utils._API_ROOT + utils._API_ENDPOINTS['get_stat_all']
215 |   # Cast iterable-like to list.
216 |   places = list(places)
217 |   stat_vars = list(stat_vars)
218 | 
219 |   # Aiming for _STAT_BATCH_SIZE entries total.
220 |   # _STAT_BATCH_SIZE = num places x num stat_vars, so aim for
221 |   # _STAT_BATCH_SIZE/len(stat_vars) places per batch.
222 |   places_per_batch = _STAT_BATCH_SIZE // len(stat_vars)
223 |   # Get number of batches via an arithmetic ceiling trick:
224 |   # 11//10 rounds down to 1.
225 |   # -11//10 rounds down to -2.
226 |   # We can divide with, then remove the negative to get the ceiling.
227 |   batches = -(-len(places) // places_per_batch)
228 |   res = {}
229 |   for i in range(batches):
230 |     req_json = {
231 |         'stat_vars': stat_vars,
232 |         'places': places[i * places_per_batch:(i + 1) * places_per_batch]
233 |     }
234 |     # Send the request
235 |     res_json = utils._send_request(url, req_json=req_json, use_payload=False)
236 |     if 'placeData' not in res_json:
237 |       # The REST API spec will always return a dictionary under
238 |       # placeData, even if no places exist or have no
239 |       # data. If no Places are provided, REST will return an
240 |       # error, which will have been caught and passed on in
241 |       # _send_request.
242 |       raise ValueError("Unexpected response from REST stat/all API.")
243 | 
244 |     # Unnest the REST response for keys that have single-element values.
245 |     place_statvar_series = collections.defaultdict(dict)
246 |     for place_dcid, place in res_json['placeData'].items():
247 |       stat_var_data = place.get('statVarData')
248 |       if not stat_var_data:
249 |         # The REST API spec will always return a dictionary under
250 |         # statVarData, even if no StatVars exist or have no
251 |         # data. If no StatVars are provided, REST will return an
252 |         # error, which will have been caught and passed on in
253 |         # _send_request.
254 |         raise ValueError("Unexpected response from REST stat/all API.")
255 |       for stat_var_dcid, stat_var in stat_var_data.items():
256 |         place_statvar_series[place_dcid][stat_var_dcid] = stat_var
257 |     res.update(dict(place_statvar_series))
258 | 
259 |   return res
260 | 


--------------------------------------------------------------------------------
/datacommons_client/tests/utils/test_graph.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict
  2 | from unittest.mock import MagicMock
  3 | 
  4 | from datacommons_client.models.node import Node
  5 | from datacommons_client.utils.graph import _assemble_tree
  6 | from datacommons_client.utils.graph import _fetch_relationship_uncached
  7 | from datacommons_client.utils.graph import _postorder_nodes
  8 | from datacommons_client.utils.graph import build_graph_map
  9 | from datacommons_client.utils.graph import build_relationship_tree
 10 | from datacommons_client.utils.graph import fetch_relationship_lru
 11 | from datacommons_client.utils.graph import flatten_relationship
 12 | 
 13 | 
 14 | def test_fetch_parents_uncached_returns_data():
 15 |   """Test _fetch_parents_uncached delegates to endpoint correctly."""
 16 |   endpoint = MagicMock()
 17 |   endpoint.fetch_place_parents.return_value.get.return_value = [
 18 |       Node(dcid="parent1", name="Parent 1", types=["Country"])
 19 |   ]
 20 | 
 21 |   result = _fetch_relationship_uncached(endpoint,
 22 |                                         "test_dcid",
 23 |                                         contained_type=None,
 24 |                                         relationship="parents")
 25 |   assert isinstance(result, list)
 26 |   assert result[0].dcid == "parent1"
 27 |   endpoint.fetch_place_parents.assert_called_once_with(
 28 |       "test_dcid",
 29 |       as_dict=False,
 30 |   )
 31 | 
 32 | 
 33 | def test_fetch_relationship_lru_caches_results():
 34 |   """Test fetch_relationship_lru uses LRU cache and returns list."""
 35 |   endpoint = MagicMock()
 36 |   endpoint.fetch_place_parents.return_value.get.return_value = [
 37 |       Node(dcid="parentX", name="Parent X", types=["Region"])
 38 |   ]
 39 | 
 40 |   result1 = fetch_relationship_lru(endpoint,
 41 |                                    "nodeA",
 42 |                                    contained_type=None,
 43 |                                    relationship="parents")
 44 |   result2 = fetch_relationship_lru(endpoint,
 45 |                                    "nodeA",
 46 |                                    contained_type=None,
 47 |                                    relationship="parents")
 48 |   fetch_relationship_lru(endpoint,
 49 |                          "nodeA",
 50 |                          contained_type=None,
 51 |                          relationship="parents")
 52 | 
 53 |   assert isinstance(result1, list)
 54 |   assert result1[0].dcid == "parentX"
 55 |   assert result1 == result2
 56 |   assert endpoint.fetch_place_parents.call_count == 1
 57 | 
 58 | 
 59 | def test_build_ancestry_map_linear_tree():
 60 |   """A -> B -> C"""
 61 | 
 62 |   def fetch_mock(dcid):
 63 |     return {
 64 |         "C": [Node(dcid="B", name="Node B", types=["Type"])],
 65 |         "B": [Node(dcid="A", name="Node A", types=["Type"])],
 66 |         "A": [],
 67 |     }.get(dcid, [])
 68 | 
 69 |   root, graph = build_graph_map("C", fetch_mock, max_workers=2)
 70 | 
 71 |   assert root == "C"
 72 |   assert set(graph.keys()) == {"C", "B", "A"}
 73 |   assert graph["C"][0].dcid == "B"
 74 |   assert graph["B"][0].dcid == "A"
 75 |   assert graph["A"] == []
 76 | 
 77 | 
 78 | def test_build_ancestry_map_branching_graph():
 79 |   r"""
 80 |       Graph:
 81 |           F
 82 |          / \
 83 |         D   E
 84 |        / \ /
 85 |       B  C
 86 |        \/
 87 |         A
 88 |       """
 89 | 
 90 |   def fetch_mock(dcid):
 91 |     return {
 92 |         "A": (Node(dcid="B", name="Node B",
 93 |                    types=["Type"]), Node(dcid="C",
 94 |                                          name="Node C",
 95 |                                          types=["Type"])),
 96 |         "B": (Node(dcid="D", name="Node D", types=["Type"]),),
 97 |         "C": (Node(dcid="D", name="Node D",
 98 |                    types=["Type"]), Node(dcid="E",
 99 |                                          name="Node E",
100 |                                          types=["Type"])),
101 |         "D": (Node(dcid="F", name="Node F", types=["Type"]),),
102 |         "E": (Node(dcid="F", name="Node F", types=["Type"]),),
103 |         "F": tuple(),
104 |     }.get(dcid, tuple())
105 | 
106 |   root, ancestry = build_graph_map("A", fetch_mock, max_workers=4)
107 | 
108 |   assert root == "A"
109 |   assert set(ancestry.keys()) == {"A", "B", "C", "D", "E", "F"}
110 |   assert [p.dcid for p in ancestry["A"]] == ["B", "C"]  # A has two parents
111 |   assert [p.dcid for p in ancestry["B"]] == ["D"]  # B has one parent
112 |   assert [p.dcid for p in ancestry["C"]] == ["D", "E"]  # C has two parents
113 |   assert [p.dcid for p in ancestry["D"]] == ["F"]  # D has one parent
114 |   assert [p.dcid for p in ancestry["E"]] == ["F"]  # E has one parent
115 |   assert ancestry["F"] == []  # F has no parents
116 | 
117 | 
118 | def test_build_ancestry_map_cycle_detection():
119 |   """
120 |     Graph with a cycle:
121 |         A -> B -> C -> A
122 |     (Should not loop infinitely)
123 |     """
124 | 
125 |   call_count = defaultdict(int)
126 | 
127 |   def fetch_mock(dcid):
128 |     call_count[dcid] += 1
129 |     return {
130 |         "A": (Node(dcid="B", name="B", types=["Type"]),),
131 |         "B": (Node(dcid="C", name="C", types=["Type"]),),
132 |         "C": (Node(dcid="A", name="A", types=["Type"]),),  # Cycle back to A
133 |     }.get(dcid, tuple())
134 | 
135 |   root, ancestry = build_graph_map("A", fetch_mock, max_workers=2)
136 | 
137 |   assert root == "A"  # Since we start from A
138 |   assert set(ancestry.keys()) == {"A", "B", "C"}
139 |   assert [p.dcid for p in ancestry["A"]] == ["B"]  # A points to B
140 |   assert [p.dcid for p in ancestry["B"]] == ["C"]  # B points to C
141 |   assert [p.dcid for p in ancestry["C"]] == ["A"
142 |                                             ]  # C points back to A but it's ok
143 | 
144 |   # Check that each node was fetched only once (particularly for A to avoid infinite loop)
145 |   assert call_count["A"] == 1
146 |   assert call_count["B"] == 1
147 |   assert call_count["C"] == 1
148 | 
149 | 
150 | def test_postorder_nodes_simple_graph():
151 |   """Test postorder traversal on a simple graph."""
152 |   ancestry = {
153 |       "C": [Node(dcid="B", name="B", types=["Type"])],
154 |       "B": [Node(dcid="A", name="A", types=["Type"])],
155 |       "A": [],
156 |   }
157 | 
158 |   order = _postorder_nodes("C", ancestry)
159 |   assert order == ["A", "B", "C"]
160 | 
161 |   new_order = _postorder_nodes("B", ancestry)
162 |   assert new_order == ["A", "B"]
163 | 
164 | 
165 | def test_postorder_nodes_ignores_disconnected():
166 |   """
167 |     Graph:
168 |         A <- B <- C
169 |         D (disconnected)
170 |     """
171 |   graph = {
172 |       "A": [Node(dcid="B", name="B", types=["Type"])],
173 |       "B": [Node(dcid="C", name="C", types=["Type"])],
174 |       "C": [],
175 |       "D": [Node(dcid="Z", name="Z", types=["Type"])],
176 |   }
177 |   order = _postorder_nodes("A", graph)
178 |   assert order == ["C", "B", "A"]
179 |   assert "D" not in order
180 | 
181 | 
182 | def test_assemble_tree_creates_nested_structure():
183 |   """Test _assemble_tree creates a nested structure."""
184 |   ancestry = {
185 |       "C": [Node(dcid="B", name="Node B", types=["Type"])],
186 |       "B": [Node(dcid="A", name="Node A", types=["Type"])],
187 |       "A": [],
188 |   }
189 |   postorder = ["A", "B", "C"]
190 |   tree = _assemble_tree(postorder, ancestry, relationship_key="parents")
191 | 
192 |   assert tree["dcid"] == "C"
193 |   assert tree["parents"][0]["dcid"] == "B"
194 |   assert tree["parents"][0]["parents"][0]["dcid"] == "A"
195 | 
196 | 
197 | def test_postorder_nodes_ignores_unreachable_nodes():
198 |   """
199 |     Graph:
200 |         A → B → C
201 |     Ancestry map also includes D (unconnected)
202 |     """
203 |   ancestry = {
204 |       "A": [Node(dcid="B", name="B", types=["Type"])],
205 |       "B": [Node(dcid="C", name="C", types=["Type"])],
206 |       "C": [],
207 |       "D": [Node(dcid="X", name="X", types=["Type"])],
208 |   }
209 | 
210 |   postorder = _postorder_nodes("A", ancestry)
211 | 
212 |   # Only nodes reachable from A should be included
213 |   assert postorder == ["C", "B", "A"]
214 |   assert "D" not in postorder
215 | 
216 | 
217 | def test_assemble_tree_shared_parent_not_duplicated():
218 |   """
219 |     Structure:
220 |         A → C
221 |         B → C
222 |     Both A and B have same parent C
223 |     """
224 | 
225 |   ancestry = {
226 |       "A": [Node(dcid="C", name="C name", types=["City"])],
227 |       "B": [Node(dcid="C", name="C name", types=["City"])],
228 |       "C": [],
229 |   }
230 | 
231 |   postorder = ["C", "A", "B"]  # C first to allow bottom-up build
232 |   tree = _assemble_tree(postorder, ancestry, relationship_key="parents")
233 | 
234 |   assert tree["dcid"] == "B"
235 |   assert len(tree["parents"]) == 1
236 |   assert tree["parents"][0]["dcid"] == "C"
237 | 
238 |   # Confirm C only appears once
239 |   assert tree["parents"][0] is not None
240 |   assert tree["parents"][0]["name"] == "C name"
241 | 
242 | 
243 | def test_build_ancestry_tree_nested_output():
244 |   """Test build_ancestry_tree creates a nested structure."""
245 |   ancestry = {
246 |       "C": [Node(dcid="B", name="B", types=["Type"])],
247 |       "B": [Node(dcid="A", name="A", types=["Type"])],
248 |       "A": [],
249 |   }
250 | 
251 |   tree = build_relationship_tree("C", ancestry, relationship_key="parents")
252 | 
253 |   assert tree["dcid"] == "C"
254 |   assert tree["parents"][0]["dcid"] == "B"
255 |   assert tree["parents"][0]["parents"][0]["dcid"] == "A"
256 | 
257 | 
258 | def test_flatten_ancestry_deduplicates():
259 |   """Test flatten_ancestry deduplicates parents."""
260 | 
261 |   ancestry = {
262 |       "X": [Node(dcid="A", name="A", types=["Country"])],
263 |       "Y": [
264 |           Node(dcid="A", name="A", types=["Country"]),
265 |           Node(dcid="B", name="B", types=["City"])
266 |       ],
267 |   }
268 | 
269 |   flat = flatten_relationship(ancestry)
270 | 
271 |   assert {"dcid": "A", "name": "A", "types": ["Country"]} in flat
272 |   assert {"dcid": "B", "name": "B", "types": ["City"]} in flat
273 |   assert len(flat) == 2
274 | 


--------------------------------------------------------------------------------