├── .check_python_version.py ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── ci-upstream-dev.yml │ └── ci.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── changelog.md ├── changelog ├── 286.improvement.md ├── 288.improvement.md ├── 313.fix.md ├── 322.fix.md ├── 323.fix.md ├── 323.improvement.md ├── 324.fix.md └── README.md ├── codecov.yml ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── .gitignore │ ├── _static │ ├── .gitkeep │ ├── apple-touch-icon.png │ ├── favicon-48x48.png │ ├── favicon.ico │ ├── favicon.svg │ ├── primap_logo_transparent.png │ ├── primap_logo_transparent_dark.png │ ├── site.webmanifest │ ├── web-app-manifest-192x192.png │ └── web-app-manifest-512x512.png │ ├── _templates │ └── .gitkeep │ ├── api │ ├── .gitignore │ ├── generate_api_docs.py │ └── index.rst │ ├── changelog.md │ ├── conf.py │ ├── credits.md │ ├── data_format │ ├── data_format_details.md │ ├── data_format_examples.md │ ├── index.md │ ├── interchange_format_details.md │ └── interchange_format_examples.md │ ├── data_reading │ ├── .gitignore │ ├── index.md │ ├── old-PRIMAP-hist.md │ ├── test_csv_data_long.csv │ ├── test_csv_data_sec_cat.csv │ ├── test_csv_data_sec_cat_if.csv │ ├── test_csv_data_sec_cat_if.yaml │ ├── test_data_long.md │ └── test_data_wide.md │ ├── datalad.md │ ├── development.md │ ├── ideas_for_sparse_data.md │ ├── index.md │ ├── installation.md │ ├── jupytext.toml │ ├── minimal_ds.nc │ ├── opulent_ds.nc │ ├── pyproject.toml │ └── usage │ ├── add_and_overwrite.md │ ├── csg.md │ ├── downscaling.md │ ├── gas_baskets.md │ ├── index.md │ ├── logging.md │ ├── merge.md │ ├── select_and_view.md │ ├── skipna.md │ └── store_and_load.md ├── licenses ├── pint_xarray_license └── xarray_license ├── mypy.ini ├── primap-stubs.patch ├── primap2 ├── __init__.py ├── _accessor_base.py ├── _aggregate.py ├── _convert.py ├── _data_format.py ├── _dim_names.py ├── _downscale.py ├── _fill_combine.py ├── _merge.py ├── _metadata.py ├── _overview.py ├── _selection.py ├── _setters.py ├── _types.py ├── _units.py ├── accessors.py ├── csg │ ├── __init__.py │ ├── _compose.py │ ├── _models.py │ ├── _strategies │ │ ├── __init__.py │ │ ├── exceptions.py │ │ ├── gaps.py │ │ ├── global_least_squares.py │ │ ├── local_least_squares.py │ │ ├── local_trends.py │ │ └── substitution.py │ └── _wrapper.py ├── pm2io │ ├── _GHG_inventory_reading.py │ ├── __init__.py │ ├── _conversion.py │ ├── _data_reading.py │ └── _interchange_format.py └── tests │ ├── __init__.py │ ├── conftest.py │ ├── csg │ ├── test_compose.py │ ├── test_gaps.py │ ├── test_models.py │ ├── test_strategies.py │ ├── test_wrapper.py │ └── utils.py │ ├── data │ ├── BURDI_conversion.csv │ ├── Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.csv │ ├── Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.nc │ ├── Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.yaml │ ├── PRIMAP-csg-test.nc │ ├── __init__.py │ ├── long.csv │ ├── long_no_time.csv │ ├── primap2_test_data_v2.5.1_final.nc │ ├── simple_categorisation_a.yaml │ ├── simple_categorisation_b.yaml │ ├── simple_conversion.csv │ ├── test_create_category_name_conversion.csv │ ├── test_csv_data.csv │ ├── test_csv_data_category_name.csv │ ├── test_csv_data_category_name_fill_cat_code.csv │ ├── test_csv_data_category_name_long.csv │ ├── test_csv_data_sec_cat.csv │ ├── test_csv_data_sec_cat_strings.csv │ ├── test_csv_data_unit_harmonization.csv │ ├── test_empty_ds_if.csv │ ├── test_empty_ds_if.yaml │ ├── test_from_interchange_format_output.nc │ ├── test_read_wide_csv_file_no_sec_cats.csv │ ├── test_read_wide_csv_file_no_sec_cats_cat_name.csv │ ├── test_read_wide_csv_file_output.csv │ ├── test_read_wide_csv_file_output_entity_def.csv │ ├── test_read_wide_csv_file_output_unit_def.csv │ ├── test_read_wide_csv_file_output_unit_harm.csv │ └── test_sum_skip_allna_inhomogeneous_result.nc │ ├── examples.py │ ├── test_aggregate.py │ ├── test_conversion.py │ ├── test_convert.py │ ├── test_data_format.py │ ├── test_data_reading.py │ ├── test_downscale.py │ ├── test_fill_combine.py │ ├── test_interchange_format.py │ ├── test_merge.py │ ├── test_metadata.py │ ├── test_overview.py │ ├── test_selection.py │ ├── test_setters.py │ ├── test_units.py │ └── utils.py ├── pyproject.toml ├── requirements.txt ├── requirements_dev.txt ├── requirements_upstream_dev.txt ├── setup.cfg ├── setup.py ├── tbump.toml ├── towncrier_github_release_notes.toml ├── towncrier_github_release_notes_template.md ├── tox.ini └── update_citation_info.py /.check_python_version.py: -------------------------------------------------------------------------------- 1 | """Check if the used version of Python is good enough for us.""" 2 | 3 | import itertools 4 | import sys 5 | 6 | SUPPORTED_MAJOR_VERSIONS = (3,) 7 | SUPPORTED_MINOR_VERSIONS = (10, 11, 12) 8 | 9 | if ( 10 | sys.version_info.major not in SUPPORTED_MAJOR_VERSIONS 11 | or sys.version_info.minor not in SUPPORTED_MINOR_VERSIONS 12 | ): 13 | supported_versions = itertools.product(SUPPORTED_MAJOR_VERSIONS, SUPPORTED_MINOR_VERSIONS) 14 | supported_versions_human_readable = ", ".join( 15 | ".".join(str(x) for x in version) for version in supported_versions 16 | ) 17 | print( 18 | f"Python version {sys.version_info} not supported, please install Python" 19 | f" in one of the supported versions: {supported_versions_human_readable}." 20 | ) 21 | sys.exit(1) 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Write a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | 9 | A clear and concise description of what the bug is. 10 | 11 | **Failing Test** 12 | 13 | Please put the code (ideally in the form of a unit 14 | test) which fails below 15 | 16 | **Expected behavior** 17 | 18 | A clear and concise description of what you expected to happen. 19 | 20 | **Screenshots** 21 | 22 | If applicable, add screenshots to help explain your problem. 23 | 24 | **System (please complete the following information):** 25 | 26 | - OS: [e.g. Windows, Linux, macOS] 27 | - Python version [e.g. Python 3.5] and output of `conda list --export` and `pip freeze` as applicable 28 | 29 | **Additional context** 30 | 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | 9 | A clear and concise description of what the problem is. Ex. It's annoying that I always have to [...] 10 | 11 | **Describe the solution you'd like** 12 | 13 | A description of the solution you would like to see. 14 | 15 | **Describe alternatives you've considered** 16 | 17 | A description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | 21 | Add any other context or screenshots about the feature request here. 22 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Pull request 2 | 3 | Please confirm that this pull request has done the following: 4 | 5 | - [ ] Tests added 6 | - [ ] Documentation added (where applicable) 7 | - [ ] Description in a `{pr}.thing.md` file in the directory `changelog` added - see [changelog/README.md](https://github.com/pik-primap/primap2/blob/main/changelog/README.md) for details 8 | 9 | ## Description 10 | 11 | Please provide a short description what your pull request does. 12 | -------------------------------------------------------------------------------- /.github/workflows/ci-upstream-dev.yml: -------------------------------------------------------------------------------- 1 | name: CI upstream development versions 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | workflow_dispatch: # allows you to trigger the workflow run manually 8 | schedule: 9 | - cron: "5 2 * * tue" # Tuesdays at 02:05 UTC. 10 | 11 | jobs: 12 | test: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | os: [ ubuntu-latest, windows-latest ] 17 | python-version: [ "3.12" ] 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - name: Set up uv and caching 22 | uses: astral-sh/setup-uv@v2 23 | with: 24 | enable-cache: true 25 | cache-suffix: "${{ matrix.os }}-${{ matrix.python-version }}-upstream-dev" 26 | cache-dependency-glob: "setup.cfg" 27 | version: "0.5.18" 28 | 29 | - name: Create venv 30 | run: | 31 | uv venv --seed --python ${{ matrix.python-version }} 32 | 33 | - name: Install highest dependencies 34 | run: | 35 | uv pip install --resolution highest --prerelease allow .[test] 36 | uv pip install --resolution highest --prerelease allow --requirements requirements_upstream_dev.txt 37 | 38 | - name: Test with pytest (linux) 39 | if: startsWith(matrix.os, 'ubuntu') 40 | run: | 41 | source .venv/bin/activate 42 | pytest --xdoc 43 | 44 | - name: Test with pytest (windows) 45 | if: startsWith(matrix.os, 'windows') 46 | run: | 47 | .venv\Scripts\activate 48 | pytest --xdoc 49 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | workflow_dispatch: # allows you to trigger the workflow run manually 8 | 9 | jobs: 10 | test: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [ ubuntu-latest, windows-latest ] 15 | python-version: [ "3.10", "3.11", "3.12" ] 16 | resolution: [ "highest", "lowest-direct" ] 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Set up uv and caching 21 | uses: astral-sh/setup-uv@v2 22 | with: 23 | enable-cache: true 24 | cache-suffix: "${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.resolution }}" 25 | cache-dependency-glob: "setup.cfg" 26 | version: "0.4.9" 27 | 28 | - name: Create venv 29 | run: | 30 | uv venv --seed --python ${{ matrix.python-version }} 31 | 32 | - name: Install ${{ matrix.resolution }} dependencies 33 | run: | 34 | uv pip install --resolution ${{ matrix.resolution }} .[test] 35 | 36 | - name: Test with pytest (linux) 37 | if: startsWith(matrix.os, 'ubuntu') 38 | run: | 39 | source .venv/bin/activate 40 | pytest --xdoc --cov=primap2 --cov-report=xml --junitxml=junit.xml -o junit_family=legacy 41 | 42 | - name: Test with pytest (windows) 43 | if: startsWith(matrix.os, 'windows') 44 | run: | 45 | .venv\Scripts\activate 46 | pytest --xdoc --cov=primap2 --cov-report=xml --junitxml=junit.xml -o junit_family=legacy 47 | 48 | - name: Upload coverage to Codecov 49 | uses: codecov/codecov-action@v4 50 | with: 51 | files: ./coverage.xml 52 | token: ${{ secrets.CODECOV_TOKEN }} 53 | 54 | - name: Upload test results to Codecov 55 | if: ${{ !cancelled() }} 56 | uses: codecov/test-results-action@v1 57 | with: 58 | token: ${{ secrets.CODECOV_TOKEN }} 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .github_release_notes_file.md 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | docs-old/generated/ 15 | docs/generated/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | stubs/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | html/ 33 | .mutmut-cache 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs-old/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # dotenv 91 | .env 92 | 93 | # virtualenv 94 | .venv 95 | venv/ 96 | ENV/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | 111 | # IDE settings 112 | .vscode/ 113 | .idea/ 114 | /.dmypy.json 115 | 116 | # Johannes' development scripts 117 | JG_test_scripts 118 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: 'v5.0.0' 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | - id: check-ast 12 | - id: fix-byte-order-marker 13 | - id: check-case-conflict 14 | - id: check-merge-conflict 15 | - id: detect-private-key 16 | - id: mixed-line-ending 17 | - repo: https://github.com/astral-sh/ruff-pre-commit 18 | rev: 'v0.11.10' 19 | hooks: 20 | - id: ruff 21 | args: [ --fix, --exit-non-zero-on-fix ] 22 | - id: ruff-format 23 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the version of Python and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.12" 12 | 13 | # Build documentation in the docs/source/ directory with Sphinx 14 | sphinx: 15 | configuration: docs/source/conf.py 16 | 17 | python: 18 | install: 19 | - requirements: docs/requirements.txt 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are welcome, and they are greatly appreciated! Every little bit 4 | helps, and credit will always be given. 5 | 6 | You can contribute in many ways: 7 | 8 | ## Types of Contributions 9 | 10 | ### Report Bugs 11 | 12 | Report bugs at https://github.com/primap-community/primap2/issues. 13 | 14 | If you are reporting a bug, please include: 15 | 16 | * Your operating system name and version. 17 | * Any details about your local setup that might be helpful in troubleshooting. 18 | * Detailed steps to reproduce the bug. 19 | 20 | ### Fix Bugs 21 | 22 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help 23 | wanted" is open to whoever wants to implement it. 24 | 25 | ### Implement Features 26 | 27 | Look through the GitHub issues for features. Anything tagged with "enhancement" 28 | and "help wanted" is open to whoever wants to implement it. 29 | 30 | ### Write Documentation 31 | 32 | PRIMAP2 could always use more documentation, whether as part of the 33 | official PRIMAP2 docs, in docstrings, or even on the web in blog posts, 34 | articles, and such. 35 | 36 | ### Submit Feedback 37 | 38 | The best way to send feedback is to file an issue at https://github.com/primap-community/primap2/issues. 39 | 40 | If you are proposing a feature: 41 | 42 | * Explain in detail how it would work. 43 | * Keep the scope as narrow as possible, to make it easier to implement. 44 | * Remember that our time is limited, and that contributions 45 | are welcome :) 46 | 47 | ## Get Started! 48 | 49 | To get started with PRIMAP2 development, check out our development 50 | documentation at https://primap2.readthedocs.io/en/main/development.html. 51 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help virtual-environment install-pre-commit stubs update-venv README.md check-python-version 2 | .DEFAULT_GOAL := help 3 | 4 | define PRINT_HELP_PYSCRIPT 5 | import re, sys 6 | 7 | for line in sys.stdin: 8 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 9 | if match: 10 | target, help = match.groups() 11 | print("%-20s %s" % (target, help)) 12 | endef 13 | export PRINT_HELP_PYSCRIPT 14 | 15 | help: 16 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 17 | 18 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 19 | 20 | clean-build: ## remove build artifacts 21 | rm -fr build/ 22 | rm -fr dist/ 23 | rm -fr .eggs/ 24 | find . -name '*.egg-info' -exec rm -fr {} + 25 | find . -name '*.egg' -exec rm -fr {} + 26 | 27 | clean-pyc: ## remove Python file artifacts 28 | find . -name '*.pyc' -exec rm -f {} + 29 | find . -name '*.pyo' -exec rm -f {} + 30 | find . -name '*~' -exec rm -f {} + 31 | find . -name '__pycache__' -exec rm -fr {} + 32 | 33 | clean-test: ## remove test and coverage artifacts 34 | rm -fr .tox/ 35 | rm -f .coverage 36 | rm -fr htmlcov/ 37 | rm -fr .pytest_cache 38 | 39 | lint: venv ## check style with pre-commit hooks 40 | venv/bin/pre-commit run --all-files 41 | 42 | test: venv ## run tests quickly with the default Python 43 | venv/bin/pytest --xdoc -rx 44 | 45 | test-all: ## run tests on every Python version with tox 46 | venv/bin/tox -p 47 | 48 | coverage: venv ## check code coverage quickly with the default Python 49 | venv/bin/coverage run --source primap2 -m pytest 50 | venv/bin/coverage report -m 51 | venv/bin/coverage html 52 | ls htmlcov/index.html 53 | 54 | clean-docs: venv ## Remove generated parts of documentation, then build docs 55 | . venv/bin/activate ; $(MAKE) -C docs clean 56 | . venv/bin/activate ; $(MAKE) -C docs html 57 | 58 | docs: venv ## generate Sphinx HTML documentation, including API docs 59 | . venv/bin/activate ; $(MAKE) -C docs html 60 | 61 | release: venv dist ## package and upload a release 62 | venv/bin/twine upload --repository primap dist/* 63 | 64 | dist: clean venv ## builds source and wheel package 65 | # because we update the citation info after releasing on github and zenodo but 66 | # before building for pypi, we need to force the correct version. 67 | SETUPTOOLS_SCM_PRETEND_VERSION=0.12.2 venv/bin/python -m build 68 | 69 | install: clean ## install the package to the active Python's site-packages 70 | python setup.py install 71 | 72 | virtual-environment: venv ## setup a virtual environment for development 73 | 74 | venv: requirements_dev.txt setup.cfg 75 | [ -d venv ] || python3 .check_python_version.py 76 | [ -d venv ] || python3 -m venv venv 77 | venv/bin/python -m pip install --upgrade wheel uv 78 | . venv/bin/activate ; venv/bin/uv pip install --upgrade -e .[dev] 79 | touch venv 80 | 81 | update-venv: ## update all packages in the development environment 82 | [ -d venv ] || python3 -m venv venv 83 | venv/bin/python .check_python_version.py 84 | venv/bin/python -m pip install --upgrade wheel uv 85 | . venv/bin/activate ; venv/bin/uv pip install --upgrade --resolution highest -e .[dev] 86 | touch venv 87 | 88 | install-pre-commit: update-venv ## install the pre-commit hooks 89 | venv/bin/pre-commit install 90 | 91 | stubs: venv ## generate directory with xarray stubs with inserted primap2 stubs 92 | rm -rf stubs 93 | mkdir -p stubs 94 | venv/bin/stubgen -p xarray -o stubs 95 | (cd stubs; patch -s -p0 < ../primap-stubs.patch) 96 | 97 | README.md: ## Update the citation information from zenodo 98 | venv/bin/python update_citation_info.py 99 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PRIMAP2 2 | 3 | [![PyPI status](https://img.shields.io/pypi/v/primap2.svg)](https://pypi.python.org/pypi/primap2) 4 | [![Documentation Status](https://readthedocs.org/projects/primap2/badge/?version=main)](https://primap2.readthedocs.io/en/stable/?badge=main) 5 | [![Zenodo release](https://zenodo.org/badge/DOI/10.5281/zenodo.4535902.svg)](https://doi.org/10.5281/zenodo.4535902) 6 | 7 | PRIMAP2 is the next generation of the PRIMAP climate policy analysis suite. 8 | PRIMAP2 is free software, you are welcome to use it in your own research. 9 | The documentation can be found at . 10 | 11 | ## Structure 12 | 13 | PRIMAP2 is: 14 | - A flexible and powerful data format built on [xarray](https://xarray.pydata.org). 15 | - A collection of functions for common tasks when wrangling climate policy 16 | data, like aggregation and interpolation. 17 | - A format for data packages built on [datalad](https://www.datalad.org), providing 18 | metadata extraction and search on a collection of data packages. 19 | 20 | ## Status 21 | 22 | PRIMAP2 is in active development, and not everything promised above is built 23 | yet. 24 | 25 | ## License 26 | 27 | Copyright 2020-2022, Potsdam-Institut für Klimafolgenforschung e.V. 28 | 29 | Copyright 2023-2024, Climate Resource Pty Ltd 30 | 31 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this 32 | file except in compliance with the License. You may obtain a copy of the License at 33 | 34 | 35 | 36 | Unless required by applicable law or agreed to in writing, software distributed under 37 | the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 38 | KIND, either express or implied. See the License for the specific language governing 39 | permissions and limitations under the License. 40 | 41 | PRIMAP2 incorporates parts of xarray and pint_xarray, which are available under the 42 | Apache License, Version 2.0 as well. The full text of the xarray copyright statement is 43 | included in the licenses directory. 44 | 45 | ## Citation 46 | 47 | If you use this library and want to cite it, please cite it as: 48 | 49 | Mika Pflüger and Johannes Gütschow. (2025-02-07). 50 | primap-community/primap2: PRIMAP2 Version 0.12.2. 51 | Zenodo. https://doi.org/10.5281/zenodo.14831768 52 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | docs/source/changelog.md -------------------------------------------------------------------------------- /changelog/286.improvement.md: -------------------------------------------------------------------------------- 1 | Added a wrapper for the csg `compose` function to handle input data preparation (remove data which is not needed in the process) and output data handling (set coords and metadata) 2 | -------------------------------------------------------------------------------- /changelog/288.improvement.md: -------------------------------------------------------------------------------- 1 | Added a csg filling strategy using local gap filling with polynomial trends to calculate scaling factors (similar to the method used in primap1). 2 | -------------------------------------------------------------------------------- /changelog/313.fix.md: -------------------------------------------------------------------------------- 1 | Fixed conversion of nan values. 2 | -------------------------------------------------------------------------------- /changelog/322.fix.md: -------------------------------------------------------------------------------- 1 | Replaced xr.core.ops.fillna with fillna from public xarray API for compatibility with upcoming xarray releases. 2 | -------------------------------------------------------------------------------- /changelog/323.fix.md: -------------------------------------------------------------------------------- 1 | * Fix a pandas stack issue in GHG_inventory_reading 2 | * Fix `skipna` in conversions 3 | -------------------------------------------------------------------------------- /changelog/323.improvement.md: -------------------------------------------------------------------------------- 1 | Added additional non-numerical codes in data reading functions. 2 | -------------------------------------------------------------------------------- /changelog/324.fix.md: -------------------------------------------------------------------------------- 1 | Drop encoding of data sets when merging or saving to netcfd to avoid truncation of coordinate values 2 | -------------------------------------------------------------------------------- /changelog/README.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | This directory contains "news fragments", i.e. short files that contain a small markdown-formatted bit of text that will be 4 | added to the CHANGELOG when it is next compiled. 5 | 6 | The CHANGELOG will be read by users, so this description should be aimed to primap2 users instead of 7 | describing internal changes which are only relevant to developers. Merge requests in combination with our git history provides additional 8 | developer-centric information. 9 | 10 | Make sure to use phrases in the past tense and use punctuation, examples: 11 | 12 | ``` 13 | Improved verbose diff output with sequences. 14 | 15 | Terminal summary statistics now use multiple colors. 16 | ``` 17 | 18 | Each file should have a name of the form `..md`, where `` is the pull request number, and `` is one of: 19 | 20 | * `feature`: new user facing features, like new command-line options and new behaviour. 21 | * `improvement`: improvement of existing functionality, usually without requiring user intervention 22 | * `fix`: fixes a bug. 23 | * `docs`: documentation improvement, like rewording an entire section or adding missing docs. 24 | * `deprecation`: feature deprecation. 25 | * `breaking`: a change which may break existing uses, such as feature removal or behaviour change. 26 | * `trivial`: fixing a small typo or internal change that might be noteworthy. 27 | 28 | So for example: `123.feature.md`, `456.fix.md`. 29 | 30 | Since you need the pull request number for the filename, you must submit a PR first. From this PR, you can get the 31 | PR number and then create the news file. A single PR can also have multiple news items, for example a given PR may 32 | add a feature as well as deprecate some existing functionality. 33 | 34 | If you are not sure what issue type to use, don't hesitate to ask in your PR. 35 | 36 | `towncrier` preserves multiple paragraphs and formatting (code blocks, lists, and so on), but for entries other than 37 | features it is usually better to stick to a single paragraph to keep it concise. You may also use `MyST` [style 38 | cross-referencing](https://myst-parser.readthedocs.io/en/latest/syntax/cross-referencing.html) within your news items 39 | to link to other documentation. 40 | 41 | You can also run `towncrier --draft` to see the draft changelog that will be appended to [docs/source/changelog.md]() 42 | on the next release. 43 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | informational: true 6 | patch: 7 | default: 8 | informational: true 9 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | Sphinx>=4.2,<8.1.3 3 | myst-nb>=1 4 | sphinx-book-theme>=1.1 5 | numpydoc>=1.6 6 | jupytext>=1.16 7 | sphinx-copybutton>=0.5.2 8 | sphinx-autosummary-accessors>=2023.4 9 | sphinx-tippy>=0.4.3 10 | sphinx-favicon>=1.0 11 | -------------------------------------------------------------------------------- /docs/source/.gitignore: -------------------------------------------------------------------------------- 1 | *.ipynb 2 | -------------------------------------------------------------------------------- /docs/source/_static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/_static/.gitkeep -------------------------------------------------------------------------------- /docs/source/_static/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/_static/apple-touch-icon.png -------------------------------------------------------------------------------- /docs/source/_static/favicon-48x48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/_static/favicon-48x48.png -------------------------------------------------------------------------------- /docs/source/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/_static/favicon.ico -------------------------------------------------------------------------------- /docs/source/_static/primap_logo_transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/_static/primap_logo_transparent.png -------------------------------------------------------------------------------- /docs/source/_static/primap_logo_transparent_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/_static/primap_logo_transparent_dark.png -------------------------------------------------------------------------------- /docs/source/_static/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "primap2 documentation", 3 | "short_name": "primap2", 4 | "icons": [ 5 | { 6 | "src": "/web-app-manifest-192x192.png", 7 | "sizes": "192x192", 8 | "type": "image/png", 9 | "purpose": "maskable" 10 | }, 11 | { 12 | "src": "/web-app-manifest-512x512.png", 13 | "sizes": "512x512", 14 | "type": "image/png", 15 | "purpose": "maskable" 16 | } 17 | ], 18 | "theme_color": "#ffffff", 19 | "background_color": "#ffffff", 20 | "display": "standalone" 21 | } 22 | -------------------------------------------------------------------------------- /docs/source/_static/web-app-manifest-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/_static/web-app-manifest-192x192.png -------------------------------------------------------------------------------- /docs/source/_static/web-app-manifest-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/_static/web-app-manifest-512x512.png -------------------------------------------------------------------------------- /docs/source/_templates/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/_templates/.gitkeep -------------------------------------------------------------------------------- /docs/source/api/.gitignore: -------------------------------------------------------------------------------- 1 | generated 2 | generated_* 3 | -------------------------------------------------------------------------------- /docs/source/api/generate_api_docs.py: -------------------------------------------------------------------------------- 1 | """Generate API docs as we like them. 2 | 3 | autodoc and autosummary-accessors make it possible to use `autosummary` sections that 4 | automatically include functions etc. However, what exactly gets documented needs to 5 | be added manually. This script automates this. 6 | """ 7 | 8 | # add new submodules that should be documented here 9 | SUBMODULES_TO_DOCUMENT = ["pm2io", "csg"] 10 | 11 | import primap2 12 | 13 | primap2_top_level_api = [x for x in primap2.__all__ if x not in SUBMODULES_TO_DOCUMENT] 14 | primap2_top_level_api_formatted = "\n".join(f" {x}" for x in sorted(primap2_top_level_api)) 15 | 16 | sm_documentation_formatted = [] 17 | for sm in SUBMODULES_TO_DOCUMENT: 18 | exec(f"import primap2.{sm}") 19 | sm_top_level_api = getattr(primap2, sm).__all__ 20 | sm_top_level_api_formatted = "\n".join(f" {sm}.{x}" for x in sorted(sm_top_level_api)) 21 | sm_documentation_formatted.append(f""" 22 | .. _primap2.{sm}: 23 | 24 | primap2.{sm} 25 | {'~'*(len('primap2.') + len(sm))} 26 | 27 | {getattr(primap2, sm).__doc__} 28 | 29 | .. autosummary:: 30 | :toctree: generated_{sm}/ 31 | 32 | {sm_top_level_api_formatted} 33 | """) 34 | 35 | submodules_documentation_formatted = "\n".join(sm_documentation_formatted) 36 | 37 | 38 | def accessor_attrs_meths(accessor) -> tuple[list[str], list[str]]: 39 | members = dir(accessor) 40 | attrs = [] 41 | meths = [] 42 | for m in members: 43 | if m.startswith("_") and m != "__getitem__": 44 | continue 45 | if callable(getattr(accessor, m)): 46 | meths.append(m) 47 | else: 48 | attrs.append(m) 49 | return attrs, meths 50 | 51 | 52 | da_pr_attrs, da_pr_meths = accessor_attrs_meths(primap2.accessors.PRIMAP2DataArrayAccessor) 53 | da_pr_attrs_formatted = "\n".join(f" DataArray.pr.{x}" for x in sorted(da_pr_attrs)) 54 | da_pr_meths_formatted = "\n".join(f" DataArray.pr.{x}" for x in sorted(da_pr_meths)) 55 | 56 | ds_pr_attrs, ds_pr_meths = accessor_attrs_meths(primap2.accessors.PRIMAP2DatasetAccessor) 57 | ds_pr_attrs_formatted = "\n".join(f" Dataset.pr.{x}" for x in sorted(ds_pr_attrs)) 58 | ds_pr_meths_formatted = "\n".join(f" Dataset.pr.{x}" for x in sorted(ds_pr_meths)) 59 | 60 | 61 | with open("index.rst", "w") as fd: 62 | fd.write(f""" 63 | API 64 | === 65 | .. currentmodule:: primap2 66 | 67 | Top-level API 68 | ------------- 69 | 70 | .. autosummary:: 71 | :toctree: generated/ 72 | 73 | {primap2_top_level_api_formatted} 74 | 75 | 76 | Submodules 77 | ---------- 78 | 79 | {submodules_documentation_formatted} 80 | 81 | .. currentmodule:: xarray 82 | 83 | DataArray 84 | --------- 85 | 86 | .. _da.pr.attributes: 87 | 88 | Attributes 89 | ~~~~~~~~~~ 90 | 91 | .. autosummary:: 92 | :toctree: generated/ 93 | :template: autosummary/accessor_attribute.rst 94 | 95 | {da_pr_attrs_formatted} 96 | 97 | .. _da.pr.methods: 98 | 99 | Methods 100 | ~~~~~~~ 101 | 102 | .. autosummary:: 103 | :toctree: generated/ 104 | :template: autosummary/accessor_method.rst 105 | 106 | {da_pr_meths_formatted} 107 | 108 | 109 | Dataset 110 | ------- 111 | 112 | .. _ds.pr.attributes: 113 | 114 | Attributes 115 | ~~~~~~~~~~ 116 | 117 | .. autosummary:: 118 | :toctree: generated/ 119 | :template: autosummary/accessor_attribute.rst 120 | 121 | {ds_pr_attrs_formatted} 122 | 123 | .. _ds.pr.methods: 124 | 125 | Methods 126 | ~~~~~~~ 127 | 128 | .. autosummary:: 129 | :toctree: generated/ 130 | :template: autosummary/accessor_method.rst 131 | 132 | {ds_pr_meths_formatted} 133 | """) 134 | -------------------------------------------------------------------------------- /docs/source/api/index.rst: -------------------------------------------------------------------------------- 1 | 2 | API 3 | === 4 | .. currentmodule:: primap2 5 | 6 | Top-level API 7 | ------------- 8 | 9 | .. autosummary:: 10 | :toctree: generated/ 11 | 12 | Not 13 | ProcessingStepDescription 14 | TimeseriesProcessingDescription 15 | accessors 16 | open_dataset 17 | ureg 18 | 19 | 20 | Submodules 21 | ---------- 22 | 23 | 24 | .. _primap2.pm2io: 25 | 26 | primap2.pm2io 27 | ~~~~~~~~~~~~~ 28 | 29 | Data reading module of the PRIMAP2 climate policy analysis package. 30 | 31 | .. autosummary:: 32 | :toctree: generated_pm2io/ 33 | 34 | pm2io.convert_long_dataframe_if 35 | pm2io.convert_wide_dataframe_if 36 | pm2io.from_interchange_format 37 | pm2io.nir_add_unit_information 38 | pm2io.nir_convert_df_to_long 39 | pm2io.read_interchange_format 40 | pm2io.read_long_csv_file_if 41 | pm2io.read_wide_csv_file_if 42 | pm2io.write_interchange_format 43 | 44 | 45 | .. _primap2.csg: 46 | 47 | primap2.csg 48 | ~~~~~~~~~~~ 49 | 50 | 51 | Composite Source Generator 52 | 53 | Generate a composite harmonized dataset from multiple sources according to defined 54 | source priorities and matching algorithms. 55 | 56 | 57 | .. autosummary:: 58 | :toctree: generated_csg/ 59 | 60 | csg.FitParameters 61 | csg.GlobalLSStrategy 62 | csg.LocalTrendsStrategy 63 | csg.PriorityDefinition 64 | csg.StrategyDefinition 65 | csg.StrategyUnableToProcess 66 | csg.SubstitutionStrategy 67 | csg.compose 68 | csg.create_composite_source 69 | 70 | 71 | .. currentmodule:: xarray 72 | 73 | DataArray 74 | --------- 75 | 76 | .. _da.pr.attributes: 77 | 78 | Attributes 79 | ~~~~~~~~~~ 80 | 81 | .. autosummary:: 82 | :toctree: generated/ 83 | :template: autosummary/accessor_attribute.rst 84 | 85 | DataArray.pr.dim_alias_translations 86 | DataArray.pr.gwp_context 87 | DataArray.pr.loc 88 | 89 | .. _da.pr.methods: 90 | 91 | Methods 92 | ~~~~~~~ 93 | 94 | .. autosummary:: 95 | :toctree: generated/ 96 | :template: autosummary/accessor_method.rst 97 | 98 | DataArray.pr.__getitem__ 99 | DataArray.pr.add_aggregates_coordinates 100 | DataArray.pr.any 101 | DataArray.pr.combine_first 102 | DataArray.pr.convert 103 | DataArray.pr.convert_to_gwp 104 | DataArray.pr.convert_to_gwp_like 105 | DataArray.pr.convert_to_mass 106 | DataArray.pr.count 107 | DataArray.pr.coverage 108 | DataArray.pr.dequantify 109 | DataArray.pr.downscale_timeseries 110 | DataArray.pr.fill_all_na 111 | DataArray.pr.fillna 112 | DataArray.pr.merge 113 | DataArray.pr.quantify 114 | DataArray.pr.set 115 | DataArray.pr.sum 116 | DataArray.pr.to_df 117 | 118 | 119 | Dataset 120 | ------- 121 | 122 | .. _ds.pr.attributes: 123 | 124 | Attributes 125 | ~~~~~~~~~~ 126 | 127 | .. autosummary:: 128 | :toctree: generated/ 129 | :template: autosummary/accessor_attribute.rst 130 | 131 | Dataset.pr.comment 132 | Dataset.pr.contact 133 | Dataset.pr.dim_alias_translations 134 | Dataset.pr.entity_terminology 135 | Dataset.pr.institution 136 | Dataset.pr.loc 137 | Dataset.pr.publication_date 138 | Dataset.pr.references 139 | Dataset.pr.rights 140 | Dataset.pr.title 141 | 142 | .. _ds.pr.methods: 143 | 144 | Methods 145 | ~~~~~~~ 146 | 147 | .. autosummary:: 148 | :toctree: generated/ 149 | :template: autosummary/accessor_method.rst 150 | 151 | Dataset.pr.__getitem__ 152 | Dataset.pr.add_aggregates_coordinates 153 | Dataset.pr.add_aggregates_variables 154 | Dataset.pr.any 155 | Dataset.pr.combine_first 156 | Dataset.pr.count 157 | Dataset.pr.coverage 158 | Dataset.pr.dequantify 159 | Dataset.pr.downscale_gas_timeseries 160 | Dataset.pr.downscale_timeseries 161 | Dataset.pr.ensure_valid 162 | Dataset.pr.expand_dims 163 | Dataset.pr.fill_all_na 164 | Dataset.pr.fill_na_gas_basket_from_contents 165 | Dataset.pr.fillna 166 | Dataset.pr.gas_basket_contents_sum 167 | Dataset.pr.has_processing_info 168 | Dataset.pr.merge 169 | Dataset.pr.quantify 170 | Dataset.pr.remove_processing_info 171 | Dataset.pr.set 172 | Dataset.pr.sum 173 | Dataset.pr.to_df 174 | Dataset.pr.to_interchange_format 175 | Dataset.pr.to_netcdf 176 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration file for the Sphinx documentation builder. 3 | 4 | For the full list of built-in configuration values, see the documentation: 5 | https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | """ 7 | 8 | import sphinx_autosummary_accessors 9 | 10 | # -- Project information ----------------------------------------------------- 11 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 12 | 13 | project = "primap2" 14 | # put the authors in their own variable, so they can be reused later 15 | author = "Mika Pflüger and Johannes Gütschow" 16 | copyright = "2021-2023: Potsdam Institute for Climate Impact Research; 2023-2024: Climate Resource" 17 | language = "en" 18 | 19 | # -- General configuration --------------------------------------------------- 20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 21 | 22 | extensions = [ 23 | # Generate an API documentation automatically from docstrings 24 | "sphinx.ext.autodoc", 25 | # Numpy-style docstrings 26 | "numpydoc", 27 | # Better summaries for API docs 28 | "sphinx.ext.autosummary", 29 | # also for our xarray accessor 30 | "sphinx_autosummary_accessors", 31 | # jupytext rendered notebook support (also loads myst_parser) 32 | "myst_nb", 33 | # links to other docs 34 | "sphinx.ext.intersphinx", 35 | # add source code to docs 36 | "sphinx.ext.viewcode", 37 | # add copy code button to code examples 38 | "sphinx_copybutton", 39 | # math support 40 | "sphinx.ext.mathjax", 41 | # nicer tooltips 42 | "sphinx_tippy", 43 | # better favicons 44 | "sphinx_favicon", 45 | ] 46 | 47 | # general sphinx settings 48 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 49 | add_module_names = True 50 | # Add templates for sphinx autosummary accessors 51 | templates_path = ["_templates", sphinx_autosummary_accessors.templates_path] 52 | # Stop sphinx doing funny things with byte order markers 53 | source_encoding = "utf-8" 54 | 55 | # autodoc type hints settings 56 | # https://github.com/tox-dev/sphinx-autodoc-typehints 57 | # include full name of classes when expanding type hints? 58 | typehints_fully_qualified = True 59 | # Add rtype directive if needed 60 | typehints_document_rtype = True 61 | # Put the return type as part of the return documentation 62 | typehints_use_rtype = False 63 | 64 | # Generate autosummary stubs automatically 65 | autosummary_generate = True 66 | 67 | 68 | # Generate the necessary config for the API documentation 69 | def generate_api_docs(app): 70 | import subprocess 71 | import pathlib 72 | 73 | subprocess.run( 74 | ["python3", "generate_api_docs.py"], 75 | cwd=pathlib.Path(__file__).parent / "api", 76 | check=True, 77 | ) 78 | 79 | 80 | def setup(app): 81 | app.connect("builder-inited", generate_api_docs) 82 | 83 | 84 | # Nicer formatting for numpydoc 85 | numpydoc_class_members_toctree = False 86 | 87 | # Left-align maths equations 88 | mathjax3_config = {"chtml": {"displayAlign": "center"}} 89 | 90 | # myst configuration 91 | myst_enable_extensions = ["amsmath", "dollarmath"] 92 | nb_execution_mode = "cache" 93 | nb_execution_raise_on_error = True 94 | nb_execution_show_tb = True 95 | nb_execution_timeout = 120 96 | 97 | # -- Options for HTML output ------------------------------------------------- 98 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 99 | 100 | # Pick your theme for html output 101 | html_theme = "sphinx_book_theme" 102 | html_static_path = ["_static"] 103 | html_theme_options = { 104 | "repository_url": "https://github.com/primap-community/primap2/", 105 | "repository_branch": "main", 106 | "path_to_docs": "docs/source", 107 | "use_repository_button": True, 108 | "use_issues_button": True, 109 | "use_edit_page_button": True, 110 | "logo": { 111 | "text": "primap2 documentation", 112 | "image_light": "_static/primap_logo_transparent.png", 113 | "image_dark": "_static/primap_logo_transparent_dark.png", 114 | }, 115 | } 116 | html_context = { 117 | # dark mode of sphinx-book-theme doesn't play nicely with xarray 118 | "default_mode": "light", 119 | } 120 | 121 | # favicon 122 | favicons = [ 123 | {"href": "favicon-48x48.png", "sizes": "48x48"}, 124 | {"href": "favicon.svg"}, 125 | {"href": "favicon.ico", "rel": "shortcut icon"}, 126 | {"href": "apple-touch-icon.png", "rel": "apple-touch-icon", "sizes": "180x180"}, 127 | {"href": "site.webmanifest", "rel": "manifest"}, 128 | ] 129 | 130 | # Intersphinx mapping 131 | intersphinx_mapping = { 132 | "numpy": ("https://numpy.org/doc/stable", None), 133 | "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), 134 | "python": ("https://docs.python.org/3", None), 135 | "pyam": ("https://pyam-iamc.readthedocs.io/en/latest", None), 136 | "scmdata": ("https://scmdata.readthedocs.io/en/latest", None), 137 | "xarray": ("https://docs.xarray.dev/en/stable", None), 138 | "pint": ( 139 | "https://pint.readthedocs.io/en/latest", 140 | None, 141 | ), 142 | "scipy": ("https://docs.scipy.org/doc/scipy/", None), 143 | "climate_categories": ( 144 | "https://climate-categories.readthedocs.io/en/latest", 145 | None, 146 | ), 147 | } 148 | -------------------------------------------------------------------------------- /docs/source/credits.md: -------------------------------------------------------------------------------- 1 | # Credits 2 | 3 | ## Developers 4 | 5 | * [Johannes Gütschow](https://orcid.org/0000-0001-9944-3685) 6 | * [Mika Pflüger](https://orcid.org/0000-0002-7814-8916) 7 | 8 | ## Former Developers 9 | 10 | Many thanks to previous developers: 11 | * [Annika Günther](https://www.pik-potsdam.de/members/annikag) 12 | -------------------------------------------------------------------------------- /docs/source/data_format/index.md: -------------------------------------------------------------------------------- 1 | # Data Format 2 | 3 | In this section, we will dive deeper into the specifics of the primap2 data format, 4 | and we will also show the interchange format, an additional format for ingesting and 5 | exporting data. 6 | 7 | 8 | ```{toctree} 9 | :caption: primap2 data formats 10 | :maxdepth: 2 11 | 12 | data_format_examples 13 | data_format_details 14 | interchange_format_examples 15 | interchange_format_details 16 | ``` 17 | -------------------------------------------------------------------------------- /docs/source/data_format/interchange_format_details.md: -------------------------------------------------------------------------------- 1 | # Interchange format details 2 | 3 | The interchange format consists of a wide tabular data object and an additional 4 | dictionary carrying the meta data. 5 | 6 | In memory, the tabular data object is a pandas DataFrame and the meta data object 7 | is a python dictionary. 8 | For storage, the tabular data is written to a CSV file and the meta data is written 9 | to a YAML file. 10 | 11 | ## Tabular data 12 | 13 | The data is stored in a wide format. 14 | Each row is a time series. 15 | The columns list first all coordinate values for the time series, then the time points. 16 | An example table representation is: 17 | 18 | | area (ISO3) | category (IPCC2006) | entity (primap2) | unit | 2000 | 2001 | 2002 | 2003 | 19 | |-------------|---------------------|------------------|-----------------|------|------|------|------| 20 | | "COL" | "1" | "CO2" | "Gg CO2 / year" | 2.3 | 2.2 | 2.0 | 1.9 | 21 | | "COL" | "2" | "CO2" | "Gg CO2 / year" | 1.5 | 1.6 | 1.3 | 1.2 | 22 | 23 | Specifically, the columns consist of: 24 | 25 | - All dimensions except the `time` defined on the Dataset as defined in 26 | {ref}`data_format_details`, 27 | including the category set (terminology) in brackets as in the standard data format. 28 | - The entity (with its terminology in brackets, if an entity terminology is defined 29 | for the dataset), which is used to store the data variable name. The full variable 30 | name including the global warming potential if applicable is used here. 31 | - The unit in a format which can be parsed by openscm-units. 32 | - One column per value in the `time` dimension of the Dataset, formatted according 33 | to the `time_format` strftime format string given in the meta data (see below). 34 | 35 | The strictly tabular data format makes it possible to read the data e.g. into Excel, 36 | but imposes multiple inefficiencies: 37 | 38 | - In PRIMAP2 data sets, the unit is identical for all time series of the same entity. 39 | Still, the unit is stored for each time series. 40 | - In PRIMAP2 data sets, not all entities use all dimensions. For example, population 41 | data might be given together with emissions data, but only the emissions data use 42 | categories. However, the tabular format imposes to store all entities with the same 43 | dimensions. Therefore, the dimensions that each entity uses are listed in the 44 | meta data (see below) and dimensions which are not used for the entity are denoted 45 | with an empty string in the tabular data. 46 | 47 | ## Meta Data 48 | 49 | To correctly interpret the tabular data, meta data is necessary. 50 | The meta data is a dictionary with the following keys: 51 | 52 | | key | data type | meaning | 53 | |------------------------|-----------|----------------------------------------------------------------------------------| 54 | | attrs | dict | The `attrs` dictionary of the dataset as defined in {ref}`data_format_details` | 55 | | data_file | str | The relative path to the CSV data file (only when stored, not in-memory) | 56 | | dimensions | dict | Mapping of the entities to a list of the dimensions used by them | 57 | | time_format | str | strftime style time format string used for the time columns | 58 | | additional_coordinates | dict | Mapping of additional coordinate entities to the associated dimension (optional) | 59 | | dtypes | dict | Mapping of non-float entities to their data type (optional) | 60 | 61 | In the `dimensions` dictionary, the keys are entities as given in the tabular data in 62 | the entity column. The values are lists of column names as used in the tabular data, 63 | i.e. including the terminology. 64 | To avoid repeating dimension information for many entities with the same dimensions, 65 | it is possible to use `*` as the entity name in the dimensions dict, which will be used 66 | as a default for all entities not explicitly listed. 67 | Dimension information has to be given for all entities, i.e. if no default dimensions 68 | are specified using `*`, there has to exist an entry in the dimensions dict for each 69 | unique value in the entity column in the tabular data. 70 | 71 | ## On-disk format details 72 | 73 | ### CSV file 74 | 75 | Numeric values are given unquoted and string values are quoted with `"`. 76 | Missing information is denoted by an empty string `""`. 77 | 78 | ### YAML file 79 | 80 | All keys have to be sorted alphabetically. 81 | -------------------------------------------------------------------------------- /docs/source/data_reading/.gitignore: -------------------------------------------------------------------------------- 1 | PRIMAPHIST22__19-Jan-2021.csv 2 | -------------------------------------------------------------------------------- /docs/source/data_reading/index.md: -------------------------------------------------------------------------------- 1 | # Data Reading 2 | 3 | To work with emissions data in PRIMAP2 it needs to be converted into the 4 | PRIMAP2 netcdf data format. For the most important datasets we will (in 5 | the future) offer datalad packages that can automatically download and 6 | process the data. But currently and for custom data you will need to do 7 | the conversion yourself. 8 | 9 | ## General information 10 | 11 | The data reading functionality is bundled in the PRIMAP2 submodule {ref}`primap2.pm2io`. 12 | 13 | To enable a wider use of the PRIMAP2 data reading functionality we read all 14 | data into the PRIMAP2 interchange format which is a wide format pandas 15 | DataFrame with coordinates in columns and following PRIMAP2 specifications. 16 | Additional meta data is stored in `DataFrame.attrs`. As the `attrs` 17 | functionality in pandas is experimental it is just stored in the DataFrame 18 | returned by the reading functions and should be stored individually before 19 | doing any processing with the DataFrame. 20 | 21 | The PRIMAP2 interchange format can then be converted into native 22 | PRIMAP2 xarray Datasets. 23 | 24 | For details on data reading see the following sections and example code linked 25 | therein. 26 | 27 | ## Wide csv file 28 | 29 | The function {meth}`primap2.pm2io.read_wide_csv_file_if` reads wide format csv files 30 | which are widely used for emissions data. 31 | All coordinate columns can be defined using dicts 32 | as input including giving default values for coordinates not available in the csv 33 | files. 34 | Data can be filtered for wanted or unwanted coordinate values. 35 | 36 | To illustrate the use of the function we have two examples. 37 | The first example 38 | illustrates the different input parameters using a simple test dataset while 39 | the second example is a real world use of the function reading the PRIMAP-hist 40 | v2.2 dataset into PRIMAP2. 41 | 42 | ```{toctree} 43 | :caption: 'Examples wide csv:' 44 | :maxdepth: 2 45 | 46 | test_data_wide 47 | old-PRIMAP-hist 48 | ``` 49 | 50 | ## Long (tidy) csv file 51 | 52 | The function {meth}`primap2.pm2io.read_long_csv_file_if` reads long format CSV files 53 | (also often called tidy CSV files), which are for example used by the FAOstat for 54 | agriculture emissions data. 55 | The input for the function is very similar to the input for 56 | {meth}`primap2.pm2io.read_wide_csv_file_if` described previously, with the difference 57 | mainly that you have to specify the column where to find the data and time information. 58 | 59 | To illustrate the use of the function, we have again an example. 60 | The example just reads in some example data to understand how the function works. 61 | 62 | ```{toctree} 63 | :caption: 'Examples long CSV:' 64 | :maxdepth: 2 65 | 66 | test_data_long 67 | ``` 68 | 69 | ## Treatment of string codes 70 | 71 | String codes like "IE", "NA" etc. need to be mapped to numerical values. 72 | The codes have to be interpreted to select if they have to be mapped to 0 or 73 | NaN. For example "IE" stands for "included elsewhere" and thus it has to be 74 | mapped to 0 to show that emissions in this timeseries are 0 and not missing. 75 | 76 | As a default, we use easy rules combined with defined mappings for special cases. 77 | The rules are as follows and each data point is tested against the rules in the same order as below. 78 | 79 | - If the code contains `FX` it is mapped to `np.nan` 80 | - If the code contains `IE` and/or `NO` it is mapped to 0 81 | - If the code contains `NE` and/or `NA` but neither `IE` nor `NO`, it is mapped to `np.nan`. 82 | 83 | The special cases are 84 | 85 | ```python 86 | _special_codes = { 87 | "C": np.nan, 88 | "CC": np.nan, 89 | "CH4": np.nan, # TODO: move to user passed codes in CRT reading 90 | "nan": np.nan, 91 | "NaN": np.nan, 92 | "-": 0, 93 | "NE0": np.nan, 94 | "NE(1)": np.nan, 95 | "": np.nan, 96 | "FX": np.nan, 97 | } 98 | ``` 99 | 100 | `NaN` and `nan` will be detected as `np.nan`. 101 | 102 | Users can define custom rules by assigning a dict in the format of `_special_rules` 103 | to the `convert_str` parameter. 104 | 105 | ## Further formats 106 | 107 | In the future we will offer data reading functions for further formats. 108 | Information will be added here. 109 | -------------------------------------------------------------------------------- /docs/source/data_reading/old-PRIMAP-hist.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | format_version: 0.13 7 | jupytext_version: 1.16.4 8 | kernelspec: 9 | display_name: Python 3 (ipykernel) 10 | language: python 11 | name: python3 12 | --- 13 | 14 | # Data reading example 2 - PRIMAP-hist v2.2 # 15 | 16 | In this example, we read an old version of PRIMAP-hist which is not available in the 17 | native format because it was produced before the native format was invented. 18 | 19 | ```{code-cell} ipython3 20 | # imports 21 | import primap2 as pm2 22 | ``` 23 | 24 | ## Obtain the input data 25 | 26 | The PRIMAP-hist data (doi:10.5281/zenodo.4479172) is available [from Zenodo](https://zenodo.org/record/4479172), 27 | we download it directly. 28 | 29 | ```{code-cell} ipython3 30 | import requests 31 | response = requests.get("https://zenodo.org/records/4479172/files/PRIMAP-hist_v2.2_19-Jan-2021.csv?download=1") 32 | file = "PRIMAPHIST22__19-Jan-2021.csv" 33 | with open(file, "w") as fd: 34 | fd.write(response.text) 35 | ``` 36 | 37 | ## Dataset Specifications 38 | Here we define which columns of the csv file contain the coordinates. 39 | The dict `coords_cols` contains the mapping of csv columns to PRIMAP2 dimensions. 40 | Default values are set using `coords_defaults`. 41 | The terminologies (e.g. IPCC2006 for categories or the ISO3 country codes for area) are set in the `coords_terminologies` dict. 42 | `coords_value_mapping` defines conversion of metadata values, e.g. category codes. 43 | `filter_keep` and `filter_remove` filter the input data. 44 | Each entry in `filter_keep` specifies a subset of the input data which is kept while the subsets defined by `filter_remove` are removed from the input data. 45 | 46 | For details, we refer to the documentation of {py:func}`primap2.pm2io.read_wide_csv_file_if`. 47 | 48 | ```{code-cell} ipython3 49 | coords_cols = { 50 | "unit": "unit", 51 | "entity": "entity", 52 | "area": "country", 53 | "scenario": "scenario", 54 | "category": "category", 55 | } 56 | coords_defaults = { 57 | "source": "PRIMAP-hist_v2.2", 58 | } 59 | coords_terminologies = { 60 | "area": "ISO3", 61 | "category": "IPCC2006", 62 | "scenario": "PRIMAP-hist", 63 | } 64 | 65 | coords_value_mapping = { 66 | "category": "PRIMAP1", 67 | "unit": "PRIMAP1", 68 | "entity": "PRIMAP1", 69 | } 70 | 71 | filter_keep = { 72 | "f1": { 73 | "entity": "CO2", 74 | "category": ["IPC2", "IPC1"], 75 | "country": ["AUS", "BRA", "CHN", "GBR", "AFG"], 76 | }, 77 | "f2": { 78 | "entity": "KYOTOGHG", 79 | "category": ["IPCMAG", "IPC4"], 80 | "country": ["AUS", "BRA", "CHN", "GBR", "AFG"], 81 | }, 82 | } 83 | 84 | filter_remove = {"f1": {"scenario": "HISTTP"}} 85 | # filter_keep = {"f1": {"entity": "KYOTOGHG", "category": ["IPC2", "IPC1"]},} 86 | # filter_keep = {} 87 | # filter_remove = {} 88 | 89 | meta_data = {"references": "doi:10.5281/zenodo.4479172"} 90 | ``` 91 | 92 | ## Reading the data to interchange format 93 | To enable a wider use of the PRIMAP2 data reading functionality we read into the PRIMAP2 interchange format, which is a wide format pandas DataFrame with coordinates in columns and following PRIMAP2 specifications. 94 | Additional metadata not captured in this format are stored in `DataFrame.attrs` as a dictionary. 95 | As the `attrs` functionality in pandas is experimental it is just stored in the DataFrame returned by the reading functions and should be stored individually before doing any processing with the DataFrame. 96 | 97 | Here we read the data using the {meth}`primap2.pm2io.read_wide_csv_file_if` function. 98 | We have specified restrictive filters above to limit the data included in this notebook. 99 | 100 | ```{code-cell} ipython3 101 | PMH_if = pm2.pm2io.read_wide_csv_file_if( 102 | file, 103 | coords_cols=coords_cols, 104 | coords_defaults=coords_defaults, 105 | coords_terminologies=coords_terminologies, 106 | coords_value_mapping=coords_value_mapping, 107 | filter_keep=filter_keep, 108 | filter_remove=filter_remove, 109 | meta_data=meta_data, 110 | ) 111 | PMH_if.head() 112 | ``` 113 | 114 | ```{code-cell} ipython3 115 | PMH_if.attrs 116 | ``` 117 | 118 | ## Transformation to PRIMAP2 xarray format 119 | The transformation to PRIMAP2 xarray format is done using the function {meth}`primap2.pm2io.from_interchange_format` which takes an interchange format DataFrame. 120 | The resulting xr Dataset is already quantified, thus the variables are pint arrays which include a unit. 121 | 122 | ```{code-cell} ipython3 123 | PMH_pm2 = pm2.pm2io.from_interchange_format(PMH_if) 124 | PMH_pm2 125 | ``` 126 | -------------------------------------------------------------------------------- /docs/source/data_reading/test_csv_data_long.csv: -------------------------------------------------------------------------------- 1 | country,category,gas,unit,year,emissions,other 2 | AUS,IPC1,CO2,Gg,1991,4.1,a 3 | AUS,IPC1,CO2,Gg,2000,5,b 4 | AUS,IPC1,CO2,Gg,2010,6,c 5 | ZAM,IPC2,CH4,Mt,1991,7,d 6 | ZAM,IPC2,CH4,Mt,2000,8,e 7 | ZAM,IPC2,CH4,Mt,2010,9,f 8 | -------------------------------------------------------------------------------- /docs/source/data_reading/test_csv_data_sec_cat.csv: -------------------------------------------------------------------------------- 1 | country,category,classification,gas,unit,1991,2000,2010 2 | AUS,IPC1,TOTAL,CO2,Mt,4,5,6 3 | AUS,IPC0,TOTAL,KYOTOGHG,MtCO2eq,8,9,10 4 | ZAM,IPC0,TOTAL,KYOTOGHG,GgCO2eq,30,20,40 5 | ZAM,IPC2,TOTAL,CH4,Gg,7,8,9 6 | ZAM,IPC2,TOTAL,CO2,Gg,12,13,14 7 | USA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,300,200,400 8 | USA,IPC2,TOTAL,CH4,Gg,70,80,90 9 | USA,IPC3,TOTAL,CO2,Gg,120,130,140 10 | FRA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,30,20,40 11 | FRA,IPC2,TOTAL,CH4,Gg,7,8,9 12 | FRA,IPC2,TOTAL,CO2,Gg,12,13,14 13 | -------------------------------------------------------------------------------- /docs/source/data_reading/test_csv_data_sec_cat_if.csv: -------------------------------------------------------------------------------- 1 | "source","scenario (general)","area (ISO3)","entity","unit","category (IPCC2006)","Class (class)","Type (type)","1991","2000","2010" 2 | "TESTcsv2021","HISTORY","AUS","CO2","Gg CO2 / yr","1","TOTAL","fugitive",4000.0000000000005,5000.000000000001,6000.000000000001 3 | "TESTcsv2021","HISTORY","AUS","KYOTOGHG (SARGWP100)","Mt CO2 / yr","0","TOTAL","fugitive",8.0,9.0,10.0 4 | "TESTcsv2021","HISTORY","FRA","CH4","Gg CH4 / yr","2","TOTAL","fugitive",7.0,8.0,9.0 5 | "TESTcsv2021","HISTORY","FRA","CO2","Gg CO2 / yr","2","TOTAL","fugitive",12.0,13.0,14.0 6 | "TESTcsv2021","HISTORY","FRA","KYOTOGHG (SARGWP100)","Mt CO2 / yr","0","TOTAL","fugitive",0.03,0.02,0.04 7 | "TESTcsv2021","HISTORY","USA","CH4","Gg CH4 / yr","2","TOTAL","fugitive",70.0,80.0,90.0 8 | "TESTcsv2021","HISTORY","USA","CO2","Gg CO2 / yr","3","TOTAL","fugitive",120.0,130.0,140.0 9 | "TESTcsv2021","HISTORY","USA","KYOTOGHG (SARGWP100)","Mt CO2 / yr","0","TOTAL","fugitive",0.3,0.2,0.4 10 | "TESTcsv2021","HISTORY","ZAM","CH4","Gg CH4 / yr","2","TOTAL","fugitive",7.0,8.0,9.0 11 | "TESTcsv2021","HISTORY","ZAM","CO2","Gg CO2 / yr","2","TOTAL","fugitive",12.0,13.0,14.0 12 | "TESTcsv2021","HISTORY","ZAM","KYOTOGHG (SARGWP100)","Mt CO2 / yr","0","TOTAL","fugitive",0.03,0.02,0.04 13 | -------------------------------------------------------------------------------- /docs/source/data_reading/test_csv_data_sec_cat_if.yaml: -------------------------------------------------------------------------------- 1 | attrs: 2 | area: area (ISO3) 3 | cat: category (IPCC2006) 4 | scen: scenario (general) 5 | data_file: test_csv_data_sec_cat_if.csv 6 | dimensions: 7 | '*': 8 | - Class (class) 9 | - Type (type) 10 | - area (ISO3) 11 | - category (IPCC2006) 12 | - entity 13 | - scenario (general) 14 | - source 15 | - unit 16 | time_format: '%Y' 17 | -------------------------------------------------------------------------------- /docs/source/data_reading/test_data_long.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | format_version: 0.13 7 | jupytext_version: 1.16.4 8 | kernelspec: 9 | display_name: Python 3 (ipykernel) 10 | language: python 11 | name: python3 12 | --- 13 | 14 | # Data reading example 3 - minimal test dataset (long) 15 | To run this example the file `test_csv_data_long.csv` must be placed in the same folder as this notebook. 16 | You can find the notebook and the csv file in the folder `docs/source/data_reading` in the PRIMAP2 repository. 17 | 18 | ```{code-cell} ipython3 19 | # imports 20 | import primap2 as pm2 21 | ``` 22 | 23 | ## Dataset Specifications 24 | Here we define which columns of the csv file contain the metadata. 25 | The dict `coords_cols` contains the mapping of csv columns to PRIMAP2 dimensions. 26 | Default values not found in the CSV are set using `coords_defaults`. 27 | The terminologies (e.g. IPCC2006 for categories or the ISO3 country codes for area) are set in the `coords_terminologies` dict. 28 | `coords_value_mapping` defines conversion of metadata values, e.g. category codes. 29 | You can either specify a dict for a metadata column which directly defines the mapping, a function which is used to map metadata values, or a string to select one of the pre-defined functions included in PRIMAP2. 30 | `filter_keep` and `filter_remove` filter the input data. 31 | Each entry in `filter_keep` specifies a subset of the input data which is kept while the subsets defined by `filter_remove` are removed from the input data. 32 | 33 | In the example, the CSV contains the coordinates `country`, `category`, `gas`, and `year`. 34 | They are translated into their proper PRIMAP2 names by specifying the in the 35 | `coords_cols` dictionary. Additionally, columns are specified for the `unit`, and 36 | for the actual `data` (which is found in the column `emissions` in the CSV file). 37 | The format used in the `year` column is given using the `time_format` argument. 38 | Values for the `scenario` and `source` coordinate is not available in the csv file; 39 | therefore, we add them using default values defined in `coords_defaults`. 40 | Terminologies are given for `area`, `category`, `scenario`, and the secondary categories. 41 | Providing these terminologies is mandatory to create a valid PRIMAP2 dataset. 42 | 43 | Coordinate mapping is necessary for `category`, `entity`, and `unit`. 44 | They all use the PRIMAP1 specifications in the csv file. 45 | For `category` this means that e.g. `IPC1A2` would be converted to `1.A.2` for `entity` the conversion affects the way GWP information is stored in the entity name: e.g. `KYOTOGHGAR4` is mapped to `KYOTOGHG (AR4GWP100)`. 46 | 47 | In this example, we also add `meta_data` to add a reference for the data and usage rights. 48 | 49 | ```{code-cell} ipython3 50 | file = "test_csv_data_long.csv" 51 | coords_cols = { 52 | "unit": "unit", 53 | "entity": "gas", 54 | "area": "country", 55 | "category": "category", 56 | "time": "year", 57 | "data": "emissions", 58 | } 59 | coords_defaults = { 60 | "source": "TESTcsv2021", 61 | "scenario": "HISTORY", 62 | } 63 | coords_terminologies = { 64 | "area": "ISO3", 65 | "category": "IPCC2006", 66 | "scenario": "general", 67 | } 68 | coords_value_mapping = { 69 | "category": "PRIMAP1", 70 | "entity": "PRIMAP1", 71 | "unit": "PRIMAP1", 72 | } 73 | meta_data = { 74 | "references": "Just ask around.", 75 | "rights": "public domain", 76 | } 77 | data_if = pm2.pm2io.read_long_csv_file_if( 78 | file, 79 | coords_cols=coords_cols, 80 | coords_defaults=coords_defaults, 81 | coords_terminologies=coords_terminologies, 82 | coords_value_mapping=coords_value_mapping, 83 | meta_data=meta_data, 84 | time_format="%Y", 85 | ) 86 | data_if.head() 87 | ``` 88 | 89 | ```{code-cell} ipython3 90 | data_if.attrs 91 | ``` 92 | 93 | ## Transformation to PRIMAP2 xarray format 94 | The transformation to PRIMAP2 xarray format is done using the function {meth}`primap2.pm2io.from_interchange_format` which takes an interchange format DataFrame. 95 | The resulting xr Dataset is already quantified, thus the variables are pint arrays which include a unit. 96 | 97 | ```{code-cell} ipython3 98 | data_pm2 = pm2.pm2io.from_interchange_format(data_if) 99 | data_pm2 100 | ``` 101 | -------------------------------------------------------------------------------- /docs/source/data_reading/test_data_wide.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | format_version: 0.13 7 | jupytext_version: 1.16.4 8 | kernelspec: 9 | display_name: Python 3 (ipykernel) 10 | language: python 11 | name: python3 12 | --- 13 | 14 | # Data reading example 1 - minimal test dataset 15 | To run this example the file `test_csv_data_sec_cat.csv` must be placed in the same folder as this notebook. You can find the notebook and the csv file in the folder `docs/data_reading_examples` in the PRIMAP2 repository. 16 | 17 | ```{code-cell} ipython3 18 | import primap2 as pm2 19 | ``` 20 | 21 | ## Dataset Specifications 22 | Here we define which columns of the csv file contain the metadata. The dict `coords_cols` contains the mapping of csv columns to PRIMAP2 dimensions. 23 | Default values are set using `coords_defaults`. 24 | The terminologies (e.g. IPCC2006 for categories or the ISO3 country codes for area) are set in the `coords_terminologies` dict. 25 | `coords_value_mapping` defines conversion of metadata values, e.g. category codes. 26 | You can either specify a dict for a metadata column which directly defines the mapping, a function which is used to map metadata values, or a string to select one of the pre-defined functions included in PRIMAP2. 27 | `filter_keep` and `filter_remove` filter the input data. 28 | Each entry in `filter_keep` specifies a subset of the input data which is kept while the subsets defined by `filter_remove` are removed from the input data. 29 | 30 | For details, we refer to the documentation of {py:func}`primap2.pm2io.read_wide_csv_file_if`. 31 | 32 | In the example, the CSV contains the coordinates `entity`, `area`, `category`, and the secondary category `class`. 33 | As secondary categories have free names, they are prefixed with `sec_cats__` to make clear that it is a secondary category. 34 | Values for the secondary category `type`, and the `scenario` coordinate is not available in the csv file; 35 | therefore, we add them using default values defined in `coords_defaults`. 36 | Terminologies are given for `area`, `category`, `scenario`, and the secondary categories. 37 | Providing these terminologies is mandatory to create a valid PRIMAP2 dataset. 38 | 39 | Coordinate mapping is necessary for `category`, `entity`, and `unit`. 40 | They all use the PRIMAP1 specifications in the csv file. 41 | For `category` this means that e.g. `IPC1A2` would be converted to `1.A.2` for `entity` the conversion affects the way GWP information is stored in the entity name: e.g. `KYOTOGHGAR4` is mapped to `KYOTOGHG (AR4GWP100)`. 42 | 43 | In this example, we also add `meta_data` to add a reference for the data and usage rights. 44 | 45 | For examples on using filters we refer to the [second example which reads the PRIMAP-hist data](./old-PRIMAP-hist). 46 | 47 | ```{code-cell} ipython3 48 | file = "test_csv_data_sec_cat.csv" 49 | coords_cols = { 50 | "unit": "unit", 51 | "entity": "gas", 52 | "area": "country", 53 | "category": "category", 54 | "sec_cats__Class": "classification", 55 | } 56 | coords_defaults = { 57 | "source": "TESTcsv2021", 58 | "sec_cats__Type": "fugitive", 59 | "scenario": "HISTORY", 60 | } 61 | coords_terminologies = { 62 | "area": "ISO3", 63 | "category": "IPCC2006", 64 | "sec_cats__Type": "type", 65 | "sec_cats__Class": "class", 66 | "scenario": "general", 67 | } 68 | coords_value_mapping = { 69 | "category": "PRIMAP1", 70 | "entity": "PRIMAP1", 71 | "unit": "PRIMAP1", 72 | } 73 | meta_data = { 74 | "references": "Just ask around.", 75 | "rights": "public domain", 76 | } 77 | data_if = pm2.pm2io.read_wide_csv_file_if( 78 | file, 79 | coords_cols=coords_cols, 80 | coords_defaults=coords_defaults, 81 | coords_terminologies=coords_terminologies, 82 | coords_value_mapping=coords_value_mapping, 83 | meta_data=meta_data, 84 | ) 85 | data_if.head() 86 | ``` 87 | 88 | ```{code-cell} ipython3 89 | data_if.attrs 90 | ``` 91 | 92 | ## Transformation to PRIMAP2 xarray format ## 93 | The transformation to PRIMAP2 xarray format is done using the function {meth}`primap2.pm2io.from_interchange_format` which takes an interchange format DataFrame. 94 | The resulting xr Dataset is already quantified, thus the variables are pint arrays which include a unit. 95 | 96 | ```{code-cell} ipython3 97 | data_pm2 = pm2.pm2io.from_interchange_format(data_if) 98 | data_pm2 99 | ``` 100 | -------------------------------------------------------------------------------- /docs/source/datalad.md: -------------------------------------------------------------------------------- 1 | # Data Packages 2 | 3 | Individual PRIMAP2 datasets are stored in netcdf files, which preserve all meta 4 | data and the structure of the data. 5 | One or multiple datasets are stored together with the input data and python scripts 6 | needed to generate them in data packages, which are managed with 7 | [datalad](https://www.datalad.org/). 8 | Documentation about datalad can be found in 9 | [its handbook](https://handbook.datalad.org). 10 | 11 | ## Installing datalad 12 | 13 | Datalad depends on multiple components (python, git, and git-annex) and therefore the 14 | installation differs for each platform. 15 | Please refer to the 16 | [datalad handbook](http://handbook.datalad.org/en/latest/intro/installation.html) 17 | for detailed installation instructions. 18 | 19 | ## Creating a data package 20 | 21 | Detailed information on creating datasets can be found in the 22 | [corresponding section](http://handbook.datalad.org/en/latest/basics/101-101-create.html) 23 | in the datalad handbook. 24 | Here, we will show the commands needed to create a dataset for use with PRIMAP2. 25 | To create an empty dataset use the `datalad create` command: 26 | 27 | ```shell 28 | $ datalad create -c text2git 29 | ``` 30 | 31 | This will create a new folder and populate it with configuration for git, git-annex, 32 | and datalad. 33 | Additionally, it will add configuration such that all text files such as python code 34 | are stored in git with full change tracking, while all binary files such as netcdf files 35 | are added to the annex so that they are transmitted only on demand. 36 | -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- 1 | ```{include} ../../README.md 2 | ``` 3 | ## Documentation 4 | 5 | We have divided the documentation into chapters, most of the focussing on using the 6 | library. 7 | We also have a chapter for developers working on the library itself. 8 | 9 | ```{toctree} 10 | :caption: Contents 11 | :maxdepth: 2 12 | 13 | installation 14 | usage/index 15 | data_format/index 16 | data_reading/index 17 | datalad 18 | development 19 | credits 20 | changelog 21 | api/index 22 | ideas_for_sparse_data 23 | ``` 24 | -------------------------------------------------------------------------------- /docs/source/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Releases 4 | 5 | To install PRIMAP2, run this command in your terminal: 6 | 7 | ```shell 8 | $ pip install primap2 9 | ``` 10 | 11 | This is the preferred method to install PRIMAP2, as it will always install the 12 | most recent release. 13 | 14 | If you don't have [pip] installed, this [Python installation guide] can guide 15 | you through the process. 16 | 17 | ## From sources 18 | 19 | The sources for PRIMAP2 can be downloaded from the 20 | [Github repo](https://github.com/primap-community/primap2). 21 | 22 | Simply clone the public repository using git: 23 | 24 | ```shell 25 | $ git clone git://github.com/primap-community/primap2 26 | ``` 27 | 28 | Once you have a copy of the source, you can install it with: 29 | 30 | ```shell 31 | $ python setup.py install 32 | ``` 33 | 34 | [pip]: https://pip.pypa.io 35 | [python installation guide]: http://docs.python-guide.org/en/latest/starting/installation/ 36 | -------------------------------------------------------------------------------- /docs/source/jupytext.toml: -------------------------------------------------------------------------------- 1 | formats = "ipynb,myst" 2 | cell_metadata_filter = "-pycharm" 3 | -------------------------------------------------------------------------------- /docs/source/minimal_ds.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/minimal_ds.nc -------------------------------------------------------------------------------- /docs/source/opulent_ds.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/docs/source/opulent_ds.nc -------------------------------------------------------------------------------- /docs/source/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | # Extend the general settings 3 | extend = "../../pyproject.toml" 4 | 5 | [tool.ruff.lint] 6 | ignore = [ 7 | "B018", # Useless expression - common and fine in notebooks. 8 | "E402", # Module level import not at top of file - fine in notebooks 9 | ] 10 | -------------------------------------------------------------------------------- /docs/source/usage/add_and_overwrite.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | format_version: 0.13 8 | jupytext_version: 1.16.4 9 | kernelspec: 10 | display_name: Python 3 (ipykernel) 11 | language: python 12 | name: python3 13 | --- 14 | 15 | # Add and overwrite data 16 | 17 | Generally, datasets in primap2 follow the xarray convention that the data within 18 | datasets is immutable. 19 | To change any data, you need to create a view or a copy of the dataset with the changes 20 | applied. 21 | To this end, we provide a `set` function to set specified data. 22 | It can be used to only fill gaps, add wholly new data, or overwrite existing data in 23 | the dataset. 24 | 25 | ## The `set` functions 26 | 27 | We provide {py:meth}`xarray.DataArray.pr.set` and {py:meth}`xarray.Dataset.pr.set` functions, 28 | for `DataArray`s (individual gases) and `Dataset`s (multiple gases), respectively. 29 | 30 | The basic signature of the `set` functions is `set(dimension, keys, values)`, and it 31 | returns the changed object without changing the original one. 32 | Use it like this: 33 | 34 | ```{code-cell} 35 | # setup: import library and open dataset 36 | import primap2 37 | 38 | ds_min = primap2.open_dataset("../minimal_ds.nc") 39 | 40 | # Now, select CO2 a slice of the CO2 data as an example to use 41 | da = ds_min["CO2"].loc[{"time": slice("2000", "2005")}] 42 | da 43 | ``` 44 | 45 | ```{code-cell} 46 | import numpy as np 47 | 48 | from primap2 import ureg 49 | 50 | # generate new data for Cuba 51 | new_data_cuba = np.linspace(0, 20, 6) * ureg("Gg CO2 / year") 52 | 53 | # Actually modify our original data 54 | modified = da.pr.set("area", "CUB", new_data_cuba) 55 | modified 56 | ``` 57 | 58 | By default, existing non-NaN values are not overwritten: 59 | 60 | ```{code-cell} 61 | try: 62 | da.pr.set("area", "COL", np.linspace(0, 20, 6) * ureg("Gg CO2 / year")) 63 | except ValueError as err: 64 | print(err) 65 | ``` 66 | 67 | You can overwrite existing values by specifying `existing="overwrite"` 68 | to overwrite all values or `existing="fillna"` to overwrite only NaNs. 69 | 70 | ```{code-cell} 71 | da.pr.set( 72 | "area", 73 | "COL", 74 | np.linspace(0, 20, 6) * ureg("Gg CO2 / year"), 75 | existing="overwrite", 76 | ) 77 | ``` 78 | 79 | By default, the `set()` function extends the specified dimension automatically to 80 | accommodate new values if not all key values are in the specified dimension yet. 81 | You can change this by specifying `new="error"`, which will raise a KeyError if any of 82 | the keys is not found: 83 | 84 | ```{code-cell} 85 | try: 86 | da.pr.set( 87 | "area", 88 | ["COL", "CUB"], 89 | np.linspace(0, 20, 6) * ureg("Gg CO2 / year"), 90 | existing="overwrite", 91 | new="error", 92 | ) 93 | except KeyError as err: 94 | print(err) 95 | ``` 96 | 97 | ## Example: computing super-categories 98 | 99 | In particular, the `set()` functions can also be used with xarray's arithmetic 100 | functions to derive values from existing data and store the result in the Dataset. 101 | As an example, we will derive better values for category 0 by adding all 102 | its subcategories and store the result. 103 | 104 | First, let's load a dataset and see the current data for a small subset of the data: 105 | 106 | ```{code-cell} 107 | ds = primap2.open_dataset("../opulent_ds.nc") 108 | 109 | sel = { 110 | "area": "COL", 111 | "category": ["0", "1", "2", "3", "4", "5"], 112 | "animal": "cow", 113 | "product": "milk", 114 | "scenario": "highpop", 115 | "source": "RAND2020", 116 | } 117 | subset = ds.pr.loc[sel].squeeze() 118 | 119 | # TODO: currently, plotting with units still emits a warning 120 | import warnings 121 | 122 | with warnings.catch_warnings(): 123 | warnings.simplefilter("ignore") 124 | subset["CO2"].plot.line(x="time", hue="category (IPCC 2006)") 125 | ``` 126 | 127 | While it is hard to see any details in this plot, it is clearly visible 128 | that category 0 is not the sum of the other categories (which should not 129 | come as a surprise because the data were generated at random). 130 | 131 | We will now recompute category 0 for the entire dataset using set(): 132 | 133 | ```{code-cell} 134 | cat0_new = ds.pr.loc[{"category": ["1", "2", "3", "4", "5"]}].pr.sum("category") 135 | 136 | ds = ds.pr.set( 137 | "category", 138 | "0", 139 | cat0_new, 140 | existing="overwrite", 141 | ) 142 | 143 | # plot a small subset of the result 144 | subset = ds.pr.loc[sel].squeeze() 145 | # TODO: currently, plotting with units still emits a warning 146 | import warnings 147 | 148 | with warnings.catch_warnings(): 149 | warnings.simplefilter("ignore") 150 | subset["CO2"].plot.line(x="time", hue="category (IPCC 2006)") 151 | ``` 152 | 153 | As you can see in the plot, category 0 is now computed from its subcategories. 154 | The set() method of Datasets works on all data variables in the dataset which 155 | have the corresponding dimension. In this example, the "population" variable 156 | does not have categories, so it was unchanged. 157 | -------------------------------------------------------------------------------- /docs/source/usage/downscaling.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | format_version: 0.13 8 | jupytext_version: 1.16.4 9 | kernelspec: 10 | display_name: Python 3 (ipykernel) 11 | language: python 12 | name: python3 13 | --- 14 | 15 | # Downscaling 16 | 17 | 18 | To downscale a super-category (for example, regional data) to sub-categories 19 | (for example, country-level data in the same region), the 20 | {py:meth}`xarray.DataArray.pr.downscale_timeseries` 21 | function is available. It determines shares from available data points, then 22 | does downscaling for years where full information is not available. 23 | 24 | Let's first create an example dataset with regional data and some country data 25 | missing. 26 | 27 | ```{code-cell} ipython3 28 | --- 29 | mystnb: 30 | code_prompt_show: Logging setup for the docs 31 | tags: [hide-cell] 32 | --- 33 | # setup logging for the docs - we don't need debug logs 34 | import sys 35 | from loguru import logger 36 | 37 | logger.remove() 38 | logger.add(sys.stderr, level="INFO") 39 | ``` 40 | 41 | ```{code-cell} ipython3 42 | import primap2 43 | import numpy as np 44 | import xarray as xr 45 | 46 | # select an example dataset 47 | da = primap2.open_dataset("../minimal_ds.nc")["CO2"].loc[{"time": slice("2000", "2003"), "source": "RAND2020"}] 48 | da.pr.to_df() 49 | ``` 50 | 51 | ```{code-cell} ipython3 52 | # compute regional data as sum of country-level data 53 | temp = da.sum(dim="area (ISO3)") 54 | temp = temp.expand_dims({"area (ISO3)": ["LATAM"]}) 55 | # delete data from the country level for the years 2002-2003 (inclusive) 56 | da.loc[{"time": slice("2002", "2003")}].pint.magnitude[:] = np.nan 57 | # add regional data to the array 58 | da = xr.concat([da, temp], dim="area (ISO3)") 59 | da.pr.to_df() 60 | ``` 61 | 62 | As you can see, for 2000 and 2001, country-level data is available, but for later 63 | years, only regional ("LATAM") data is available. We now want to extrapolate the 64 | missing data using the shares from early years and the regional data. 65 | 66 | ```{code-cell} ipython3 67 | # Do the downscaling to fill in country-level data from regional data 68 | da.pr.downscale_timeseries( 69 | basket="LATAM", 70 | basket_contents=["BOL", "MEX", "COL", "ARG"], 71 | dim="area (ISO3)", 72 | ) 73 | ``` 74 | 75 | For the downscaling, shares for the countries at the points in time where data for 76 | all countries is available are determined, the shares are inter- and extrapolated where 77 | data is missing, and then the regional data is downscaled using these shares. 78 | -------------------------------------------------------------------------------- /docs/source/usage/gas_baskets.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | format_version: 0.13 8 | jupytext_version: 1.16.4 9 | kernelspec: 10 | display_name: Python 3 (ipykernel) 11 | language: python 12 | name: python3 13 | --- 14 | 15 | # Gas baskets 16 | 17 | Gas baskets like `KyotoGHG` are essentially the sum of individual emissions. Usually, 18 | gas baskets are specified in units of warming equivalent CO2, so they necessarily 19 | always have to specify a global warming potential metric as well. 20 | 21 | We offer a few specialized functions for handling gas baskets. 22 | 23 | ## Summation 24 | 25 | To sum the contents of gas baskets , the function 26 | {py:meth}`xarray.Dataset.pr.gas_basket_contents_sum` is available. 27 | 28 | Let's first create an example dataset. 29 | 30 | ```{code-cell} ipython3 31 | import primap2 32 | import xarray as xr 33 | import numpy as np 34 | 35 | # select example dataset 36 | ds = primap2.open_dataset("../minimal_ds.nc").loc[{"time": slice("2000", "2003")}][ 37 | ["CH4", "CO2", "SF6"] 38 | ] 39 | ds 40 | ``` 41 | 42 | ```{code-cell} 43 | # add (empty) gas basket with corresponding metadata 44 | ds["KyotoGHG (AR4GWP100)"] = xr.full_like(ds["CO2"], np.nan).pr.quantify(units="Gg CO2 / year") 45 | ds["KyotoGHG (AR4GWP100)"].attrs = {"entity": "KyotoGHG", "gwp_context": "AR4GWP100"} 46 | 47 | ds 48 | ``` 49 | 50 | Now, we can compute `KyotoGHG` from its contents (assuming for the moment that this 51 | only includes CO$_2$, SF$_6$ and CH$_4$) 52 | 53 | ```{code-cell} 54 | # compute gas basket from its contents, which have to be given explicitly 55 | ds.pr.gas_basket_contents_sum( 56 | basket="KyotoGHG (AR4GWP100)", 57 | basket_contents=["CO2", "SF6", "CH4"], 58 | ) 59 | ``` 60 | 61 | Note that like all PRIMAP2 functions, 62 | {py:meth}`xarray.Dataset.pr.gas_basket_contents_sum` 63 | returns the result without overwriting anything in the original dataset, 64 | so you have to explicitly overwrite existing data if you want that: 65 | 66 | ```{code-cell} 67 | ds["KyotoGHG (AR4GWP100)"] = ds.pr.gas_basket_contents_sum( 68 | basket="KyotoGHG (AR4GWP100)", 69 | basket_contents=["CO2", "SF6", "CH4"], 70 | ) 71 | ``` 72 | 73 | ## Filling in missing information 74 | 75 | To fill in missing data in a gas basket, use 76 | {py:meth}`xarray.Dataset.pr.fill_na_gas_basket_from_contents` 77 | 78 | ```{code-cell} 79 | # delete all data about the years 2002-2003 (inclusive) from the 80 | # KyotoGHG data 81 | ds["KyotoGHG (AR4GWP100)"].loc[{"time": slice("2002", "2003")}].pint.magnitude[:] = np.nan 82 | ds["KyotoGHG (AR4GWP100)"] 83 | ``` 84 | 85 | ```{code-cell} 86 | ds.pr.fill_na_gas_basket_from_contents( 87 | basket="KyotoGHG (AR4GWP100)", basket_contents=["CO2", "SF6", "CH4"] 88 | ) 89 | ``` 90 | 91 | The reverse case is that you are missing some data in the timeseries of 92 | individual gases and want to fill those in using downscaled data from 93 | a gas basket. 94 | Here, use 95 | {py:meth}`xarray.Dataset.pr.downscale_gas_timeseries` 96 | 97 | ```{code-cell} 98 | # delete all data about the years 2005-2009 from the individual gas data 99 | sel = {"time": slice("2002", "2003")} 100 | ds["CO2"].loc[sel].pint.magnitude[:] = np.nan 101 | ds["SF6"].loc[sel].pint.magnitude[:] = np.nan 102 | ds["CH4"].loc[sel].pint.magnitude[:] = np.nan 103 | ds 104 | ``` 105 | 106 | ```{code-cell} 107 | # This determines gas shares at the points in time where individual gas 108 | # data is available, interpolates the shares where data is missing, and 109 | # then downscales the gas basket data using the interpolated shares 110 | ds.pr.downscale_gas_timeseries(basket="KyotoGHG (AR4GWP100)", basket_contents=["CO2", "SF6", "CH4"]) 111 | ``` 112 | -------------------------------------------------------------------------------- /docs/source/usage/index.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | Because PRIMAP2 builds on xarray, all xarray functionality is available 3 | right away. 4 | Additional functionality is provided in the `primap2` package and 5 | in the `pr` namespace on xarray objects. 6 | In this section, we will show the fundamentals of how to work with primap2 data. 7 | 8 | 9 | ```{toctree} 10 | :caption: Usage documentation 11 | :maxdepth: 2 12 | 13 | select_and_view 14 | store_and_load 15 | add_and_overwrite 16 | logging 17 | merge 18 | skipna 19 | downscaling 20 | gas_baskets 21 | csg 22 | ``` 23 | -------------------------------------------------------------------------------- /docs/source/usage/logging.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | format_version: 0.13 8 | jupytext_version: 1.16.4 9 | kernelspec: 10 | display_name: Python 3 (ipykernel) 11 | language: python 12 | name: python3 13 | --- 14 | # Log messages 15 | 16 | Many primap2 functions emit log messages, which have an associated severity. 17 | The severities we use are shown in the table. 18 | 19 | | severity | used for | default | 20 | |----------|----------------------------------------------------------------------------------|---------| 21 | | debug | useful for understanding what functions do internally | ✗ | 22 | | info | noteworthy information during normal processing | ✓ | 23 | | warning | problems which are not necessarily fatal, but should be acknowledged by the user | ✓ | 24 | | error | problems which need to be solved by the user | ✓ | 25 | 26 | As noted, by default `debug` messages are not shown, all other messages are shown. 27 | 28 | ## Changing what is shown 29 | 30 | As said, by default `debug` messages are not shown, as you can see here: 31 | 32 | ```{code-cell} ipython3 33 | import primap2 34 | import sys 35 | 36 | from loguru import logger 37 | 38 | logger.debug("This message will not be shown") 39 | logger.info("This message will be shown") 40 | ``` 41 | 42 | To change this, remove the standard logger and add a new logger: 43 | 44 | ```{code-cell} ipython3 45 | logger.remove() 46 | logger.add(sys.stderr, level="DEBUG") 47 | 48 | logger.debug("Now you see debug messages") 49 | logger.info("You still also see info messages") 50 | ``` 51 | 52 | Instead of showing more, you can also show less: 53 | 54 | ```{code-cell} ipython3 55 | logger.remove() 56 | logger.add(sys.stderr, level="WARNING") 57 | 58 | logger.debug("You don't see debug messages") 59 | logger.info("You also don't see info messages") 60 | logger.warning("But you do see all warnings") 61 | ``` 62 | 63 | ## Advanced usage 64 | 65 | It is also possible to log to a file or add more information to the logs. See the 66 | [loguru documentation](https://loguru.readthedocs.io/) for details. 67 | -------------------------------------------------------------------------------- /docs/source/usage/merge.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | format_version: 0.13 8 | jupytext_version: 1.16.4 9 | kernelspec: 10 | display_name: Python 3 (ipykernel) 11 | language: python 12 | name: python3 13 | --- 14 | 15 | # Merging datasets 16 | 17 | xarray provides different functions to combine Datasets and DataArrays. 18 | However, these are not built to combine data which contain duplicates 19 | with rounding / processing errors. 20 | Unfortunately, when reading data e.g. from country 21 | reports this is often needed as some sectors are included in several tables 22 | and might use different numbers of decimals. 23 | Thus, PRIMAP2 has added the {py:meth}`xarray.Dataset.pr.merge` 24 | function that can accept data discrepancies not exceeding a given tolerance 25 | level. 26 | The merging of attributes is handled by xarray and the `combine_attrs` 27 | parameter is just passed on to the xarray functions. 28 | The default is to `drop_conflicts`. 29 | 30 | Below is an example using the built-in `opulent_ds`. 31 | 32 | ```{code-cell} ipython3 33 | :tags: [hide-cell] 34 | :mystnb: 35 | : code_prompt_show: "Logging setup for the docs" 36 | 37 | # setup logging for the docs - we don't need debug logs 38 | import sys 39 | from loguru import logger 40 | 41 | logger.remove() 42 | logger.add(sys.stderr, level="INFO") 43 | ``` 44 | 45 | ```{code-cell} ipython3 46 | import xarray as xr 47 | 48 | from primap2.tests.examples import opulent_ds 49 | 50 | op_ds = opulent_ds() 51 | 52 | # only take part of the countries to have something to actually merge 53 | da_start = op_ds["CO2"].pr.loc[{"area": ["ARG", "COL", "MEX"]}] 54 | 55 | # modify some data 56 | data_to_modify = op_ds["CO2"].pr.loc[{"area": ["ARG"]}].pr.sum("area") 57 | data_to_modify.data = data_to_modify.data * 1.009 58 | da_merge = op_ds["CO2"].pr.set("area", "ARG", data_to_modify, existing="overwrite") 59 | 60 | # merge with tolerance such that it will pass 61 | da_result = da_start.pr.merge(da_merge, tolerance=0.01) 62 | ``` 63 | 64 | ```{code-cell} ipython3 65 | # merge with lower tolerance such that it will fail 66 | try: 67 | # the logged message is very large, only show a small part 68 | logger.disable("primap2") 69 | da_result = da_start.pr.merge(da_merge, tolerance=0.005) 70 | except xr.MergeError as err: 71 | err_short = "\n".join(str(err).split("\n")[0:6]) 72 | print(f"An error occured during merging: {err_short}") 73 | logger.enable("primap2") 74 | 75 | # you could also only log a warning and not raise an error 76 | # using the error_on_discrepancy=False argument to `merge` 77 | ``` 78 | -------------------------------------------------------------------------------- /docs/source/usage/skipna.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | format_version: 0.13 8 | jupytext_version: 1.16.4 9 | kernelspec: 10 | display_name: Python 3 (ipykernel) 11 | language: python 12 | name: python3 13 | --- 14 | 15 | # Dealing with missing information 16 | 17 | ## Aggregation 18 | 19 | xarray provides robust functions for aggregation ({py:meth}`xarray.DataArray.sum`). 20 | PRIMAP2 adds functions which skip missing data points if the 21 | information is missing at all points along certain axes, for example for 22 | a whole time series. 23 | Let's first create an example with missing information: 24 | 25 | ```{code-cell} ipython3 26 | import pandas as pd 27 | import numpy as np 28 | import xarray as xr 29 | import primap2 30 | 31 | time = pd.date_range("2000-01-01", "2003-01-01", freq="YS") 32 | area_iso3 = np.array(["COL", "ARG", "MEX"]) 33 | coords = [("area (ISO3)", area_iso3), ("time", time)] 34 | da = xr.DataArray( 35 | data=[ 36 | [1, 2, 3, 4], 37 | [np.nan, np.nan, np.nan, np.nan], 38 | [1, 2, 3, np.nan], 39 | ], 40 | coords=coords, 41 | name="test data" 42 | ) 43 | 44 | da.pr.to_df() 45 | ``` 46 | 47 | Now, we can use the primap2 {py:meth}`xarray.DataArray.pr.sum` function to evaluate the sum of countries 48 | while ignoring only those countries where the whole timeseries is missing, using the 49 | `skipna_evaluation_dims` parameter: 50 | 51 | ```{code-cell} ipython3 52 | da.pr.sum(dim="area", skipna_evaluation_dims="time").pr.to_df() 53 | ``` 54 | 55 | If you instead want to skip all NA values, use the `skipna` parameter: 56 | 57 | ```{code-cell} ipython3 58 | da.pr.sum(dim="area", skipna=True).pr.to_df() 59 | ``` 60 | 61 | ```{code-cell} ipython3 62 | # compare this to the result of the standard xarray sum - it also skips NA values by default: 63 | 64 | da.sum(dim="area (ISO3)").pr.to_df() 65 | ``` 66 | 67 | ## Infilling 68 | 69 | The same functionality is available for filling in missing information using the 70 | {py:meth}`xarray.DataArray.pr.fill_all_na` function. 71 | In this example, we fill missing information only where the whole time series is missing. 72 | 73 | ```{code-cell} ipython3 74 | da.pr.fill_all_na("time", value=10).pr.to_df() 75 | ``` 76 | 77 | ## Bulk aggregation 78 | 79 | For larger aggregation tasks, e.g. aggregating several gas baskets from individual gases or aggregating a full category tree from leaves we have the functions {py:meth}`xarray.Dataset.pr.add_aggregates_variables`, {py:meth}`xarray.Dataset.pr.add_aggregates_coordinates`, and {py:meth}`xarray.DataArray.pr.add_aggregates_coordinates` which are highly configurable, but can also be used in a simplified mode for quick aggregation tasks. In the following we give a few examples. For the full feature set we refer to function descriptions linked above. The functions internally work with {py:meth}`xarray.Dataset.pr.merge` / {py:meth}`xarray.DataArray.pr.merge` to allow for consistency checks when target timeseries exist. 80 | 81 | ### Add aggregates for variables 82 | 83 | The {py:meth}`xarray.Dataset.pr.add_aggregates_variables` function aggregates data from individual variables to new variables (usually gas baskets). Several variables can be created in one call where the order of definition is the order of creation. Filters can be specified to limit aggregation to certain coordinate values. 84 | 85 | #### Examples 86 | 87 | Sum gases in the minimal example dataset 88 | 89 | ```{code-cell} ipython3 90 | ds_min = primap2.open_dataset("../minimal_ds.nc") 91 | summed_ds = ds_min.pr.add_aggregates_variables( 92 | gas_baskets={ 93 | "test (SARGWP100)": { 94 | "sources": ["CO2", "SF6", "CH4"], 95 | }, 96 | }, 97 | ) 98 | summed_ds["test (SARGWP100)"] 99 | ``` 100 | 101 | We can also use a filter / selector to limit the aggregation to a selection e.g. a single country: 102 | 103 | ```{code-cell} ipython3 104 | filtered_ds = ds_min.pr.add_aggregates_variables( 105 | gas_baskets={ 106 | "test (SARGWP100)": { 107 | "sources": ["CO2", "SF6", "CH4"], 108 | "sel": {"area (ISO3)": ["COL"]}, 109 | }, 110 | }, 111 | ) 112 | filtered_ds["test (SARGWP100)"] 113 | ``` 114 | When filtering it is important to note that entities and variables are not the same thing. The difference between the `entity` and `variable` filters / selectors is that `'entity': ['SF6']` will match both variables `'SF6'` and `'SF6 (SARGWP100)'` (as both variables are for the entity `'SF6'`) while `'variable': ['SF6']` will match only the variable `'SF6'`. 115 | 116 | If we recompute an existing timeseries it has to be consistent with the existing data. Here we use the simple mode to specify the aggregation rules. The example below fails because the result is inconsistent with existing data. 117 | 118 | ```{code-cell} ipython3 119 | from xarray import MergeError 120 | 121 | try: 122 | recomputed_ds = filtered_ds.pr.add_aggregates_variables( 123 | gas_baskets={ 124 | "test (SARGWP100)": ["CO2", "CH4"], 125 | }, 126 | ) 127 | recomputed_ds["test (SARGWP100)"] 128 | except MergeError as err: 129 | print(err) 130 | ``` 131 | 132 | We can set the tolerance high enough such that the test passes and no error is thrown. This is only possible in the complex mode for the aggregation rules. 133 | 134 | ```{code-cell} ipython3 135 | recomputed_ds = filtered_ds.pr.add_aggregates_variables( 136 | gas_baskets={ 137 | "test (SARGWP100)": { 138 | "sources": ["CO2", "CH4"], 139 | "tolerance": 1, # 100% 140 | }, 141 | }, 142 | ) 143 | recomputed_ds["test (SARGWP100)"] 144 | ``` 145 | 146 | ### Add aggregates for coordinates 147 | 148 | The {py:meth}`xarray.Dataset.pr.add_aggregates_coordinates` function aggregates data from individual coordinate values to new values (e.g. from subcategories to categories). Several values for several coordinates can be created in one call where the order of definition is the order of creation. Filters can be specified to limit aggregation to certain coordinate values, entities or variables. Most of the operation is similar to the variable aggregation. Thus we keep the examples here shorter. The {py:meth}`xarray.DataArray.pr.add_aggregates_coordinates` function uses the same syntax. 149 | 150 | #### Examples 151 | 152 | Sum countries in the minimal example dataset 153 | 154 | ```{code-cell} ipython3 155 | test_ds = ds_min.pr.add_aggregates_coordinates( 156 | agg_info={ 157 | "area (ISO3)": { 158 | "all": { 159 | "sources": ["COL", "ARG", "MEX", "BOL"], 160 | } 161 | } 162 | } 163 | ) 164 | test_ds 165 | ``` 166 | -------------------------------------------------------------------------------- /docs/source/usage/store_and_load.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | formats: md:myst 4 | text_representation: 5 | extension: .md 6 | format_name: myst 7 | format_version: 0.13 8 | jupytext_version: 1.16.4 9 | kernelspec: 10 | display_name: Python 3 (ipykernel) 11 | language: python 12 | name: python3 13 | --- 14 | 15 | # Store and load datasets 16 | 17 | The native storage format for primap2 datasets is [netcdf](https://www.unidata.ucar.edu/software/netcdf/), 18 | which supports to store all 19 | data and metadata in one file, as well as compression. 20 | We again use a toy example dataset to show how to store and reload datasets. 21 | 22 | ```{code-cell} ipython3 23 | :tags: [hide-cell] 24 | :mystnb: 25 | : code_prompt_show: "Logging setup for the docs" 26 | 27 | # setup logging for the docs - we don't need debug logs 28 | import sys 29 | from loguru import logger 30 | 31 | logger.remove() 32 | logger.add(sys.stderr, level="INFO") 33 | ``` 34 | 35 | ```{code-cell} ipython3 36 | import primap2 37 | import primap2.tests 38 | 39 | ds = primap2.tests.examples.toy_ds() 40 | 41 | ds 42 | ``` 43 | 44 | ## Store to disk 45 | 46 | Storing a dataset to disk works using the {py:meth}`xarray.Dataset.pr.to_netcdf` function. 47 | 48 | ```{code-cell} ipython3 49 | import tempfile 50 | import pathlib 51 | 52 | # setup temporary directory to save things to in this example 53 | with tempfile.TemporaryDirectory() as tdname: 54 | td = pathlib.Path(tdname) 55 | 56 | # simple saving without compression 57 | ds.pr.to_netcdf(td / "toy_ds.nc") 58 | 59 | # using zlib compression for all gases 60 | compression = {"zlib": True, "complevel": 9} 61 | encoding = {var: compression for var in ds.data_vars} 62 | ds.pr.to_netcdf(td / "toy_ds_compressed.nc", encoding=encoding) 63 | ``` 64 | 65 | ```{caution} 66 | `netcdf` files are not reproducible. 67 | 68 | `netcdf` is a very flexible format, which e.g. supports compression using a range 69 | of libraries, therefore the exact same `Dataset` can be represented by different 70 | `netcdf` files on disk. Unfortunately, even if you specify the compression options, 71 | `netcdf` files additionally contain metadata about all software versions used to 72 | produce the file. Therefore, if you reproduce a `Dataset` containing the same data 73 | and metadata and store it to a `netcdf` file, it will generally not create a file 74 | which is identical. 75 | ``` 76 | 77 | ## Load from disk 78 | 79 | We also provide the function {py:func}`primap2.open_dataset` to load datasets back into memory. 80 | In this example, we load a minimal dataset. 81 | 82 | ```{code-cell} ipython3 83 | ds = primap2.open_dataset("../minimal_ds.nc") 84 | 85 | ds 86 | ``` 87 | 88 | Note how units were read and attributes restored. 89 | -------------------------------------------------------------------------------- /licenses/pint_xarray_license: -------------------------------------------------------------------------------- 1 | Copyright 2020, pint-xarray developers 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | https://www.apache.org/licenses/LICENSE-2.0 6 | 7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 8 | -------------------------------------------------------------------------------- /licenses/xarray_license: -------------------------------------------------------------------------------- 1 | Copyright 2014-2019, xarray Developers 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | https://www.apache.org/licenses/LICENSE-2.0 6 | 7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 8 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | warn_unused_configs = True 3 | ignore_missing_imports = True 4 | -------------------------------------------------------------------------------- /primap-stubs.patch: -------------------------------------------------------------------------------- 1 | diff '--color=auto' -ru xarray.orig/core/dataarray.pyi xarray/core/dataarray.pyi 2 | --- xarray.orig/core/dataarray.pyi 2024-04-17 16:39:22.871662707 +0200 3 | +++ xarray/core/dataarray.pyi 2024-04-17 16:38:30.763522838 +0200 4 | @@ -1,6 +1,7 @@ 5 | import datetime 6 | import numpy as np 7 | import pandas as pd 8 | +import primap2 9 | from _typeshed import Incomplete 10 | from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence 11 | from dask.dataframe import DataFrame as DaskDataFrame 12 | @@ -56,6 +57,8 @@ 13 | @name.setter 14 | def name(self, value: Hashable | None) -> None: ... 15 | + @property 16 | + def pr(self) -> primap2.accessors.PRIMAP2DataArrayAccessor: ... 17 | @property 18 | def variable(self) -> Variable: ... 19 | @property 20 | def dtype(self) -> np.dtype: ... 21 | diff '--color=auto' -ru xarray.orig/core/dataset.pyi xarray/core/dataset.pyi 22 | --- xarray.orig/core/dataset.pyi 2024-04-17 16:39:22.871662707 +0200 23 | +++ xarray/core/dataset.pyi 2024-04-17 16:39:00.667603556 +0200 24 | @@ -1,6 +1,7 @@ 25 | import datetime 26 | import numpy as np 27 | import pandas as pd 28 | +import primap2 29 | from _typeshed import Incomplete 30 | from collections.abc import Collection, Hashable, Iterable, Iterator, Mapping, MutableMapping, Sequence 31 | from dask.dataframe import DataFrame as DaskDataFrame 32 | @@ -69,6 +70,8 @@ 33 | @attrs.setter 34 | def attrs(self, value: Mapping[Any, Any]) -> None: ... 35 | + @property 36 | + def pr(self) -> primap2.accessors.PRIMAP2DatasetAccessor: ... 37 | @property 38 | def encoding(self) -> dict[Any, Any]: ... 39 | @encoding.setter 40 | def encoding(self, value: Mapping[Any, Any]) -> None: ... 41 | -------------------------------------------------------------------------------- /primap2/__init__.py: -------------------------------------------------------------------------------- 1 | """The PRIMAP2 climate policy analysis package.""" 2 | 3 | __author__ = """Mika Pflüger and Johannes Gütschow""" 4 | __email__ = "mika.pflueger@climate-resource.com" 5 | __version__ = "0.12.2" 6 | 7 | import sys 8 | 9 | from loguru import logger 10 | 11 | from . import accessors, pm2io 12 | from ._data_format import ( 13 | ProcessingStepDescription, 14 | TimeseriesProcessingDescription, 15 | open_dataset, 16 | ) 17 | from ._selection import Not 18 | from ._units import ureg 19 | 20 | logger.remove() 21 | logger.add( 22 | sys.stderr, 23 | format="{time} {level} {message}", 24 | level="INFO", 25 | colorize=True, 26 | ) 27 | 28 | __all__ = [ 29 | "Not", 30 | "ProcessingStepDescription", 31 | "TimeseriesProcessingDescription", 32 | "accessors", 33 | "open_dataset", 34 | "pm2io", 35 | "ureg", 36 | ] 37 | -------------------------------------------------------------------------------- /primap2/_accessor_base.py: -------------------------------------------------------------------------------- 1 | """Base classes for accessor mix-in classes.""" 2 | 3 | import typing 4 | 5 | import xarray as xr 6 | 7 | XrObj = typing.TypeVar("XrObj", xr.Dataset, xr.Dataset) 8 | 9 | 10 | class BaseDataArrayAccessor: 11 | def __init__(self, da: xr.DataArray): 12 | self._da = da 13 | 14 | 15 | class BaseDatasetAccessor: 16 | def __init__(self, ds: xr.Dataset): 17 | self._ds = ds 18 | -------------------------------------------------------------------------------- /primap2/_dim_names.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | from primap2._types import DatasetOrDataArray 4 | 5 | 6 | def dim_names(obj: DatasetOrDataArray) -> tuple[typing.Hashable]: 7 | """Extract the names of dimensions compatible with all xarray versions.""" 8 | return obj.sizes.keys() 9 | -------------------------------------------------------------------------------- /primap2/_metadata.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from . import _accessor_base 4 | 5 | 6 | class DatasetMetadataAccessor(_accessor_base.BaseDatasetAccessor): 7 | @property 8 | def entity_terminology(self) -> str: 9 | """The terminology of the entity attributes in this dataset's data variables""" 10 | return self._ds.attrs["entity_terminology"] 11 | 12 | @entity_terminology.setter 13 | def entity_terminology(self, value: str): 14 | self._ds.attrs["entity_terminology"] = value 15 | 16 | @property 17 | def references(self) -> str: 18 | """Citable reference(s) describing the data 19 | 20 | If the references start with ``doi:``, it is a doi, otherwise it is a 21 | free-form literature reference. 22 | """ 23 | return self._ds.attrs["references"] 24 | 25 | @references.setter 26 | def references(self, value: str): 27 | self._ds.attrs["references"] = value 28 | 29 | @property 30 | def rights(self) -> str: 31 | """License or other usage restrictions of the data""" 32 | return self._ds.attrs["rights"] 33 | 34 | @rights.setter 35 | def rights(self, value: str): 36 | self._ds.attrs["rights"] = value 37 | 38 | @property 39 | def contact(self) -> str: 40 | """Who can answer questions about the data""" 41 | return self._ds.attrs["contact"] 42 | 43 | @contact.setter 44 | def contact(self, value: str): 45 | self._ds.attrs["contact"] = value 46 | 47 | @property 48 | def title(self) -> str: 49 | """A succinct description""" 50 | return self._ds.attrs["title"] 51 | 52 | @title.setter 53 | def title(self, value: str): 54 | self._ds.attrs["title"] = value 55 | 56 | @property 57 | def comment(self) -> str: 58 | """Longer form description""" 59 | return self._ds.attrs["comment"] 60 | 61 | @comment.setter 62 | def comment(self, value: str): 63 | self._ds.attrs["comment"] = value 64 | 65 | @property 66 | def institution(self) -> str: 67 | """Where the data originates""" 68 | return self._ds.attrs["institution"] 69 | 70 | @institution.setter 71 | def institution(self, value: str): 72 | self._ds.attrs["institution"] = value 73 | 74 | @property 75 | def publication_date(self) -> datetime.date: 76 | """The original date of publication of the dataset, if published.""" 77 | return self._ds.attrs["publication_date"] 78 | 79 | @publication_date.setter 80 | def publication_date(self, value: datetime.date): 81 | self._ds.attrs["publication_date"] = value 82 | -------------------------------------------------------------------------------- /primap2/_overview.py: -------------------------------------------------------------------------------- 1 | """Summarization and descriptive statistics functions to get an overview of a data 2 | set. 3 | """ 4 | 5 | import typing 6 | 7 | import pandas as pd 8 | 9 | from . import _accessor_base 10 | from ._selection import alias_dims 11 | 12 | 13 | class DataArrayOverviewAccessor(_accessor_base.BaseDataArrayAccessor): 14 | def to_df(self, name: str | None = None) -> pd.DataFrame | pd.Series: 15 | """Convert this array into an unstacked (i.e. non-tidy) pandas.DataFrame. 16 | 17 | Converting to an unstacked :py:class:`pandas.DataFrame` is most useful for two-dimensional 18 | data because then there is no MultiIndex, making the result very easy to read. 19 | 20 | If you want a tidy dataframe, use :py:meth:`xarray.DataArray.to_dataframe` instead. 21 | 22 | Parameters 23 | ---------- 24 | name: str 25 | Name to give to this array (required if unnamed). 26 | 27 | Returns 28 | ------- 29 | df : pandas.DataFrame 30 | """ 31 | if name is None: 32 | name = self._da.name 33 | pandas_obj = self._da.reset_coords(drop=True).to_dataframe(name)[name] 34 | pandas_obj.name = name 35 | if isinstance(pandas_obj, pd.DataFrame) or isinstance(pandas_obj.index, pd.MultiIndex): 36 | return pandas_obj.unstack() 37 | else: # Series without MultiIndex can't be unstacked, return them as-is 38 | return pandas_obj 39 | 40 | @alias_dims(["dims"]) 41 | def coverage(self, *dims: typing.Hashable) -> pd.DataFrame | pd.Series: 42 | """Summarize how many data points exist for a dimension combination. 43 | 44 | For each combinations of values in the given dimensions, count the number of 45 | non-NaN data points in the array. The result is returned as an 46 | N-dimensional pandas DataFrame. 47 | 48 | If the array's dtype is ``bool``, count the number of True values instead. This 49 | makes it possible to easily apply preprocessing. For example, to count the 50 | number of valid time series use ``da.notnull().any("time").coverage(...)``. 51 | 52 | Parameters 53 | ---------- 54 | *dims: str 55 | Names or aliases of the dimensions to be used for summarizing. 56 | You can specify any number of dimensions, but the readability 57 | of the result is best for one or two dimensions. 58 | 59 | Returns 60 | ------- 61 | coverage: pandas.DataFrame or pandas.Series 62 | N-dimensional dataframe (series for N=1) summarizing the number of non-NaN 63 | data points for each combination of values in the given dimensions. 64 | """ 65 | if not dims: 66 | raise ValueError("Specify at least one dimension.") 67 | da = self._da 68 | 69 | if da.name is None: 70 | name = "coverage" 71 | else: 72 | name = da.name 73 | 74 | if da.dtype != bool: 75 | da = da.notnull() 76 | 77 | return da.pr.sum(reduce_to_dim=dims).transpose(*dims).pr.to_df(name) 78 | 79 | 80 | class DatasetOverviewAccessor(_accessor_base.BaseDatasetAccessor): 81 | def to_df( 82 | self, 83 | name: str | None = None, 84 | ) -> pd.DataFrame: 85 | """Convert this dataset into a pandas.DataFrame. 86 | 87 | It returns mostly the same as :py:meth:`xarray..Dataset.to_dataframe`, but discards 88 | additional coordinates instead of including them in the output. 89 | 90 | Parameters 91 | ---------- 92 | name: str, optional 93 | Name to give to the output columns. 94 | 95 | Returns 96 | ------- 97 | df: pandas.DataFrame 98 | """ 99 | df = self._ds.pr.remove_processing_info().reset_coords(drop=True).to_dataframe() 100 | if name is not None: 101 | df.columns.name = name 102 | return df 103 | 104 | @alias_dims(["dims"], additional_allowed_values=["entity"]) 105 | def coverage(self, *dims: typing.Hashable) -> pd.DataFrame | pd.Series: 106 | """Summarize how many data points exist for a dimension combination. 107 | 108 | For each combinations of values in the given dimensions, count the number of 109 | non-NaN data points in the dataset. The result is returned as an 110 | N-dimensional pandas DataFrame. 111 | 112 | Only those data variables in the dataset are considered which are defined on 113 | all given dims, i.e. each dim is in ``ds[key].dims``. 114 | 115 | If the dataset only contains boolean arrays, count the number of True values 116 | instead. This makes it possible to easily apply preprocessing. For example, 117 | to count the number of valid time series use 118 | ``ds.notnull().any("time").coverage(...)``. 119 | 120 | Parameters 121 | ---------- 122 | *dims: str 123 | Names or aliases of the dimensions to be used for summarizing. 124 | To use the name of the data variables (usually, the gases) as a coordinate, 125 | use "entity". You can specify any number of dimensions, but the readability 126 | of the result is best for one or two dimensions. 127 | 128 | Returns 129 | ------- 130 | coverage: pandas.DataFrame or pandas.Series 131 | N-dimensional dataframe (series for N=1) summarizing the number of non-NaN 132 | data points for each combination of values in the given dimensions. 133 | """ 134 | if not dims: 135 | raise ValueError("Specify at least one dimension.") 136 | 137 | ds = self._ds 138 | 139 | for dim in dims: 140 | if dim == "entity": 141 | continue 142 | ds = ds.drop_vars([x for x in ds if dim not in ds[x].dims]) 143 | 144 | all_boolean = all(ds[var].dtype == bool for var in ds) 145 | if not all_boolean: # Convert into boolean coverage array 146 | ds = ds.notnull() 147 | 148 | da = ds.pr.sum(reduce_to_dim=dims) 149 | if "entity" in dims: 150 | da = da.to_array("entity") 151 | 152 | return da.transpose(*dims).pr.to_df("coverage") 153 | -------------------------------------------------------------------------------- /primap2/_types.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | import xarray as xr 4 | 5 | DatasetOrDataArray = typing.TypeVar("DatasetOrDataArray", xr.Dataset, xr.DataArray) 6 | KeyT = typing.TypeVar("KeyT", str, typing.Mapping[typing.Hashable, typing.Any]) 7 | DimOrDimsT = typing.TypeVar( 8 | "DimOrDimsT", 9 | str, 10 | typing.Hashable, 11 | typing.Iterable[str], 12 | typing.Iterable[typing.Hashable], 13 | ) 14 | FunctionT = typing.TypeVar("FunctionT", bound=typing.Callable[..., typing.Any]) 15 | -------------------------------------------------------------------------------- /primap2/accessors.py: -------------------------------------------------------------------------------- 1 | """xarray extension accessors providing an API under the 'pr' namespace.""" 2 | 3 | import xarray as xr 4 | 5 | from ._aggregate import DataArrayAggregationAccessor, DatasetAggregationAccessor 6 | from ._convert import DataArrayConversionAccessor 7 | from ._data_format import DatasetDataFormatAccessor 8 | from ._downscale import DataArrayDownscalingAccessor, DatasetDownscalingAccessor 9 | from ._fill_combine import DataArrayFillAccessor, DatasetFillAccessor 10 | from ._merge import DataArrayMergeAccessor, DatasetMergeAccessor 11 | from ._metadata import DatasetMetadataAccessor 12 | from ._overview import DataArrayOverviewAccessor, DatasetOverviewAccessor 13 | from ._selection import ( 14 | DataArrayAliasSelectionAccessor, 15 | DatasetAliasSelectionAccessor, 16 | ) 17 | from ._setters import DataArraySettersAccessor, DatasetSettersAccessor 18 | from ._units import DataArrayUnitAccessor, DatasetUnitAccessor 19 | 20 | 21 | @xr.register_dataset_accessor("pr") 22 | class PRIMAP2DatasetAccessor( 23 | DatasetAggregationAccessor, 24 | DatasetAliasSelectionAccessor, 25 | DatasetDataFormatAccessor, 26 | DatasetDownscalingAccessor, 27 | DatasetMergeAccessor, 28 | DatasetMetadataAccessor, 29 | DatasetOverviewAccessor, 30 | DatasetSettersAccessor, 31 | DatasetUnitAccessor, 32 | DatasetFillAccessor, 33 | ): 34 | """Collection of methods useful for climate policy analysis.""" 35 | 36 | 37 | @xr.register_dataarray_accessor("pr") 38 | class PRIMAP2DataArrayAccessor( 39 | DataArrayAggregationAccessor, 40 | DataArrayAliasSelectionAccessor, 41 | DataArrayConversionAccessor, 42 | DataArrayDownscalingAccessor, 43 | DataArrayMergeAccessor, 44 | DataArrayOverviewAccessor, 45 | DataArraySettersAccessor, 46 | DataArrayUnitAccessor, 47 | DataArrayFillAccessor, 48 | ): 49 | """Collection of methods useful for climate policy analysis.""" 50 | 51 | 52 | __all__ = ["PRIMAP2DataArrayAccessor", "PRIMAP2DatasetAccessor"] 53 | -------------------------------------------------------------------------------- /primap2/csg/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Composite Source Generator 3 | 4 | Generate a composite harmonized dataset from multiple sources according to defined 5 | source priorities and matching algorithms. 6 | """ 7 | 8 | from ._compose import compose 9 | from ._models import ( 10 | PriorityDefinition, 11 | StrategyDefinition, 12 | ) 13 | from ._strategies.exceptions import StrategyUnableToProcess 14 | from ._strategies.gaps import FitParameters 15 | from ._strategies.global_least_squares import GlobalLSStrategy 16 | from ._strategies.local_trends import LocalTrendsStrategy 17 | from ._strategies.substitution import SubstitutionStrategy 18 | from ._wrapper import create_composite_source 19 | 20 | __all__ = [ 21 | "FitParameters", 22 | "GlobalLSStrategy", 23 | "LocalTrendsStrategy", 24 | "PriorityDefinition", 25 | "StrategyDefinition", 26 | "StrategyUnableToProcess", 27 | "SubstitutionStrategy", 28 | "compose", 29 | "create_composite_source", 30 | ] 31 | -------------------------------------------------------------------------------- /primap2/csg/_strategies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/primap2/csg/_strategies/__init__.py -------------------------------------------------------------------------------- /primap2/csg/_strategies/exceptions.py: -------------------------------------------------------------------------------- 1 | class StrategyUnableToProcess(Exception): 2 | """The filling strategy is unable to process the given timeseries, possibly due 3 | to missing data. 4 | """ 5 | 6 | def __init__(self, reason: str): 7 | """Specify the reason why the filling strategy is unable to process the data.""" 8 | self.reason = reason 9 | -------------------------------------------------------------------------------- /primap2/csg/_strategies/global_least_squares.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xarray as xr 3 | from attrs import frozen 4 | from scipy.linalg import lstsq 5 | from scipy.optimize import least_squares 6 | 7 | import primap2 8 | 9 | from .exceptions import StrategyUnableToProcess 10 | 11 | 12 | @frozen 13 | class GlobalLSStrategy: 14 | """Fill missing data by global least square matching. 15 | 16 | The NaNs in the first timeseries :math:`\\textrm{ts}(t)` are filled using harmonized data 17 | from the lower priority timeseries :math:`\\textrm{fill_ts}(t)`. For harmonization we use 18 | 19 | .. math:: 20 | 21 | \\textrm{fill_ts}_h(t) = \\textrm{fill_ts}(t) \\times a + b, 22 | 23 | where :math:`\\textrm{fill_ts}_h(t)` is the harmonized dataset and :math:`a` and :math:`b` are 24 | determined by minimizing 25 | the least squares distance between :math:`\\textrm{ts}(t)` and :math:`\\textrm{fill_ts}_h(t)`. 26 | 27 | If the class is initialized with ``allow_shift = True`` the faster 28 | :py:func:`scipy.linalg.lstsq` function is used and :math:`b` can be arbitrary. 29 | For the case ``allow_shift = False`` (:math:`b = 0`) :py:func:`scipy.optimize.least_squares` 30 | is used. 31 | 32 | If there is no overlap in non-NaN data between :math:`\\textrm{ts}(t)` and 33 | :math:`\\textrm{fill_ts}(t)` a :py:class:`StrategyUnableToProcess` error will be raised. 34 | 35 | If ``allow_negative = False`` and the harmonized time-series :math:`\\textrm{fill_ts}_h(t)` 36 | contains negative data a :py:class:`StrategyUnableToProcess` error will be raised. 37 | 38 | Attributes 39 | ---------- 40 | allow_shift: bool, default True 41 | Allow the filling time series to shift up and down using the additive constant 42 | :math:`b \\neq 0`. 43 | allow_negative: bool, default False 44 | Allow the filling time series to contain negative data initially. 45 | """ 46 | 47 | allow_shift: bool = True 48 | allow_negative: bool = False 49 | type = "globalLS" 50 | 51 | def _factor_mult(self, a, e, e_ref): 52 | return a * e - e_ref 53 | 54 | def _jac(self, a, e, e_ref): 55 | J = np.empty((e.size, 1)) 56 | J[:, 0] = e 57 | return J 58 | 59 | def fill( 60 | self, 61 | *, 62 | ts: xr.DataArray, 63 | fill_ts: xr.DataArray, 64 | fill_ts_repr: str, 65 | ) -> tuple[xr.DataArray, list[primap2.ProcessingStepDescription]]: 66 | """Fill missing data by global least square matching. 67 | 68 | For a description of the algorithm, see the documentation of this class. 69 | 70 | Parameters 71 | ---------- 72 | ts 73 | Base timeseries. Missing data (NaNs) in this timeseries will be filled. 74 | This function does not modify the data in ts. 75 | fill_ts 76 | Fill timeseries. Data from this timeseries will be used (possibly after 77 | modification) to fill missing data in the base timeseries. 78 | This function does not modify the data in fill_ts. 79 | fill_ts_repr 80 | String representation of fill_ts. Human-readable short representation of 81 | the fill_ts (e.g. the source). 82 | 83 | Returns 84 | ------- 85 | filled_ts, descriptions. 86 | filled_ts contains the result, where missing 87 | data in ts is (partly) filled using scaled data from fill_ts. 88 | descriptions contains information about which years were affected and 89 | filled how. 90 | """ 91 | filled_mask = ts.isnull() & ~fill_ts.isnull() 92 | time_filled = filled_mask["time"][filled_mask].to_numpy() 93 | 94 | if time_filled.any(): 95 | # check if we have overlap. If not raise error so users can define a fallback 96 | # strategy 97 | overlap = ts.notnull() & fill_ts.notnull() 98 | if overlap.any(): 99 | if self.allow_shift: 100 | e = fill_ts[overlap.data].data 101 | A = np.vstack((e, np.ones_like(e))).transpose() 102 | e_ref = ts[overlap.data].data 103 | x, res, rank, s = lstsq(A, e_ref) 104 | fill_ts_harmo = fill_ts * x[0] + x[1] 105 | if any(fill_ts_harmo < 0): 106 | # use filling without shift 107 | raise StrategyUnableToProcess( 108 | reason="Negative data after harmonization excluded by configuration" 109 | ) 110 | else: 111 | ts_aligned, fill_ts_aligned = xr.align(ts, fill_ts_harmo, join="exact") 112 | filled_ts = ts_aligned.fillna(fill_ts_aligned) 113 | 114 | descriptions = [ 115 | primap2.ProcessingStepDescription( 116 | time=time_filled, 117 | description=f"filled with least squares matched data from " 118 | f"{fill_ts_repr}. a*x+b with a={x[0]:0.3f}, " 119 | f"b={x[1]:0.3f}", 120 | function=self.type, 121 | source=fill_ts_repr, 122 | ) 123 | ] 124 | else: 125 | e = fill_ts[overlap.data].data 126 | e_ref = ts[overlap.data].data 127 | a0 = [1] # start with 1 as scaling factor 128 | res = least_squares(self._factor_mult, a0, jac=self._jac, args=(e, e_ref)) 129 | 130 | fill_ts_h = fill_ts * res["x"][0] 131 | 132 | ts_aligned, fill_ts_aligned = xr.align(ts, fill_ts_h, join="exact") 133 | filled_ts = ts_aligned.fillna(fill_ts_aligned) 134 | 135 | descriptions = [ 136 | primap2.ProcessingStepDescription( 137 | time=time_filled, 138 | description="filled with least squares matched data from " 139 | f"{fill_ts_repr}. Factor={res['x'][0]:0.3f}", 140 | function=self.type, 141 | source=fill_ts_repr, 142 | ) 143 | ] 144 | else: 145 | raise StrategyUnableToProcess(reason="No overlap between timeseries, can't match") 146 | 147 | else: 148 | # if we don't have anything to fill we don't need to calculate anything 149 | filled_ts = ts 150 | descriptions = [ 151 | primap2.ProcessingStepDescription( 152 | time=time_filled, 153 | description=f"no additional data in {fill_ts_repr}", 154 | function=self.type, 155 | source=fill_ts_repr, 156 | ) 157 | ] 158 | 159 | return filled_ts, descriptions 160 | -------------------------------------------------------------------------------- /primap2/csg/_strategies/local_least_squares.py: -------------------------------------------------------------------------------- 1 | # TODO 2 | # local matching with least squares instead of lineaer trends 3 | # optimization: adaptive area LS matching where areas with high overlap and a good fit 4 | # are identified and the matching is done using these areas ignoring other areas 5 | -------------------------------------------------------------------------------- /primap2/csg/_strategies/substitution.py: -------------------------------------------------------------------------------- 1 | """Simple strategy which replaces NaNs by datapoints from second timeseries.""" 2 | 3 | import attrs 4 | import xarray as xr 5 | 6 | import primap2 7 | 8 | 9 | @attrs.define(frozen=True) 10 | class SubstitutionStrategy: 11 | """Fill missing data in the result dataset by copying. 12 | 13 | The NaNs in the result dataset are substituted with data from the filling 14 | dataset. 15 | """ 16 | 17 | type = "substitution" 18 | 19 | def fill( 20 | self, 21 | *, 22 | ts: xr.DataArray, 23 | fill_ts: xr.DataArray, 24 | fill_ts_repr: str, 25 | ) -> tuple[xr.DataArray, list[primap2.ProcessingStepDescription]]: 26 | """Fill gaps in ts using data from the fill_ts. 27 | 28 | Parameters 29 | ---------- 30 | ts 31 | Base timeseries. Missing data (NaNs) in this timeseries will be filled. 32 | This function does not modify the data in ts. 33 | fill_ts 34 | Fill timeseries. Data from this timeseries will be used (possibly after 35 | modification) to fill missing data in the base timeseries. 36 | This function does not modify the data in fill_ts. 37 | fill_ts_repr 38 | String representation of fill_ts. Human-readable short representation of 39 | the fill_ts (e.g. the source). 40 | 41 | Returns 42 | ------- 43 | filled_ts, descriptions. filled_ts contains the result, where missing 44 | data in ts is (partly) filled using unmodified data from fill_ts. 45 | descriptions contains information about which years were affected and 46 | filled how. 47 | """ 48 | ts_aligned, fill_ts_aligned = xr.align(ts, fill_ts, join="exact") 49 | filled_ts = ts_aligned.fillna(fill_ts_aligned) 50 | filled_mask = ts.isnull() & ~fill_ts.isnull() 51 | time_filled = "all" if filled_mask.all() else filled_mask["time"][filled_mask].to_numpy() 52 | description = primap2.ProcessingStepDescription( 53 | time=time_filled, 54 | description=f"substituted with corresponding values from {fill_ts_repr}", 55 | function=self.type, 56 | source=fill_ts_repr, 57 | ) 58 | return filled_ts, [description] 59 | -------------------------------------------------------------------------------- /primap2/csg/_wrapper.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import tqdm 6 | import xarray as xr 7 | 8 | from ._compose import compose 9 | from ._models import PriorityDefinition, StrategyDefinition 10 | 11 | 12 | def set_priority_coords( 13 | ds: xr.Dataset, 14 | dims: dict[str, dict[str, str]], 15 | ) -> xr.Dataset: 16 | """Set values for priority coordinates in output dataset. 17 | 18 | Parameters 19 | ---------- 20 | ds 21 | Input dataset. 22 | dims 23 | Values to be set for priority coordinates. The format is 24 | {"name": {"value": value, "terminology": terminology}}, where the 25 | terminology is optional. 26 | Examples: 27 | {"source": {"value": "PRIMAP-hist"}} sets the "source" to "PRIMAP-hist". 28 | {"area": {"value": "WORLD", "terminology": "ISO3_primap"}} adds the dimension 29 | "area (ISO3_primap)" to "WORLD". 30 | """ 31 | for dim in dims: 32 | if "terminology" in dims[dim]: 33 | terminology = dims[dim]["terminology"] 34 | else: 35 | terminology = None 36 | ds = ds.pr.expand_dims(dim=dim, coord_value=dims[dim]["value"], terminology=terminology) 37 | 38 | return ds 39 | 40 | 41 | def create_composite_source( 42 | input_ds: xr.Dataset, 43 | priority_definition: PriorityDefinition, 44 | strategy_definition: StrategyDefinition, 45 | result_prio_coords: dict[str, dict[str, str]], 46 | limit_coords: dict[str, str | list[str]] | None = None, 47 | time_range: tuple[str | np.datetime64, str | np.datetime64] | pd.DatetimeIndex | None = None, 48 | metadata: dict[str, str] | None = None, 49 | progress_bar: type[tqdm.tqdm] | None = tqdm.tqdm, 50 | ) -> xr.Dataset: 51 | """Create a composite data source 52 | 53 | This is a wrapper around `primap2.csg.compose` that prepares the input data and sets result 54 | values for the priority coordinates. 55 | 56 | Parameters 57 | ---------- 58 | input_ds 59 | Dataset containing all input data 60 | priority_definition 61 | Defines the priorities to select timeseries from the input data. Priorities 62 | are formed by a list of selections and are used "from left to right", where the 63 | first matching selection has the highest priority. Each selection has to specify 64 | values for all priority dimensions (so that exactly one timeseries is selected 65 | from the input data), but can also specify other dimensions. That way it is, 66 | e.g., possible to define a different priority for a specific country by listing 67 | it early (i.e. with high priority) before the more general rules which should 68 | be applied for all other countries. 69 | You can also specify the "entity" or "variable" in the selection, which will 70 | limit the rule to a specific entity or variable, respectively. For each 71 | DataArray in the input_data Dataset, the variable is its name, the entity is 72 | the value of the key `entity` in its attrs. 73 | strategy_definition 74 | Defines the filling strategies to be used when filling timeseries with other 75 | timeseries. Again, the priority is defined by a list of selections and 76 | corresponding strategies which are used "from left to right". Selections can use 77 | any dimension and don't have to apply to only one timeseries. For example, to 78 | define a default strategy which should be used for all timeseries unless 79 | something else is configured, configure an empty selection as the last 80 | (rightmost) entry. 81 | You can also specify the "entity" or "variable" in the selection, which will 82 | limit the rule to a specific entity or variable, respectively. For each 83 | DataArray in the input_data Dataset, the variable is its name, the entity is 84 | the value of the key `entity` in its attrs. 85 | result_prio_coords 86 | Defines the vales for the priority coordinates in the output dataset. As the 87 | priority coordinates differ for all input sources there is no canonical value 88 | for the result and it has to be explicitly defined. 89 | limit_coords 90 | Optional parameter to remove data for coordinate values not needed for the 91 | composition from the input data. The time coordinate is treated separately. 92 | time_range 93 | Optional parameter to limit the time coverage of the input data. 94 | Can either be a pandas `DatetimeIndex` or a tuple of `str` or `np.datetime64` in 95 | the form (year_from, year_to) where both boundaries are included in the range. 96 | Only the overlap of the supplied index or index created from the tuple with 97 | the time coordinate of the input dataset will be used. 98 | metadata 99 | Set metadata values such as title and references. 100 | progress_bar 101 | By default, show progress bars using the tqdm package during the 102 | operation. If None, don't show any progress bars. You can supply a class 103 | compatible to tqdm.tqdm's protocol if you want to customize the progress bar. 104 | 105 | Returns 106 | ------- 107 | xr.Dataset with composed data according to the given priority and strategy 108 | definitions 109 | """ 110 | # limit input data to these values 111 | if limit_coords is not None: 112 | if "variable" in limit_coords: 113 | variable = limit_coords.pop("variable") 114 | input_ds = input_ds[variable].pr.loc[limit_coords] 115 | else: 116 | input_ds = input_ds.pr.loc[limit_coords] 117 | 118 | # set time range according to input 119 | if time_range is not None: 120 | time_index = create_time_index(time_range) 121 | time_index = time_index.intersection(input_ds.coords["time"]) 122 | input_ds = input_ds.pr.loc[{"time": time_index}] 123 | 124 | # run compose 125 | result_ds = compose( 126 | input_data=input_ds, 127 | priority_definition=priority_definition, 128 | strategy_definition=strategy_definition, 129 | progress_bar=progress_bar, 130 | ) 131 | 132 | # set priority coordinates 133 | result_ds = set_priority_coords(result_ds, result_prio_coords) 134 | 135 | if metadata is not None: 136 | for key in metadata.keys(): 137 | result_ds.attrs[key] = metadata[key] 138 | 139 | result_ds.pr.ensure_valid() 140 | return result_ds 141 | 142 | 143 | def create_time_index( 144 | time_range: tuple[ 145 | str | np.datetime64 | datetime | pd.Timestamp, str | np.datetime64 | datetime | pd.Timestamp 146 | ] 147 | | pd.DatetimeIndex 148 | | None = None, 149 | ) -> pd.DatetimeIndex: 150 | """ 151 | Unify different input options for a time range to a `pd.DatetimeIndex`. 152 | 153 | Parameters 154 | ---------- 155 | time_range 156 | Can either be pandas `DatetimeIndex` or a tuple of `str` or datetime-like in 157 | the form (year_from, year_to) where both boundaries are included in the range. 158 | Only the overlap of the supplied index or index created from the tuple with 159 | the time coordinate of the input dataset will be used. 160 | 161 | Returns 162 | ------- 163 | Pandas DatetimeIndex according to the time range input 164 | """ 165 | 166 | if isinstance(time_range, pd.DatetimeIndex): 167 | time_index = time_range 168 | elif isinstance(time_range, tuple): 169 | time_index = pd.date_range(time_range[0], time_range[1], freq="YS", inclusive="both") 170 | else: 171 | raise ValueError("time_range must be a datetime index or a tuple") 172 | 173 | return time_index 174 | -------------------------------------------------------------------------------- /primap2/pm2io/_GHG_inventory_reading.py: -------------------------------------------------------------------------------- 1 | """This file contains functions for reading of country GHG inventories 2 | from National Inventory Reports (NIR), biannual Update Reports (BUR), 3 | and other official country emissions inventories 4 | Most of the functions in this file are exposed to the outside yet they 5 | currently do not undergo the strict testing applied to the rest of PRIMAP2 as 6 | they are added during the process of reading an preparing data for the PRIMAP-hist 7 | update. Testing will be added in the future. 8 | """ 9 | 10 | import re 11 | 12 | import pandas as pd 13 | 14 | 15 | def nir_add_unit_information( 16 | df_nir: pd.DataFrame, 17 | *, 18 | unit_row: str | int, 19 | entity_row: str | int | None = None, 20 | regexp_entity: str, 21 | regexp_unit: str | None = None, 22 | manual_repl_unit: dict[str, str] | None = None, 23 | manual_repl_entity: dict[str, str] | None = None, 24 | default_unit: str, 25 | ) -> pd.DataFrame: 26 | """Add unit information to a National Inventory Report (NIR) style DataFrame. 27 | 28 | Add unit information to the header of an "entity-wide" file as 29 | present in the standard table format of National Inventory Reports (NIRs). The 30 | unit and entity information is extracted from combined unit and entity information 31 | in the row defined by `unit_row`. The parameters `regexp_unit` and `regexp_entity` 32 | determines how this is done by regular expressions for unit and entity. 33 | Additionally, manual mappings can be defined in the `manual_repl_unit` and 34 | `manual_repl_entity` dicts. For each column the routine tries to extract a unit 35 | using the regular expression. If this fails it looks in the `manual_repl_unit` 36 | dict for unit and in `manual_repl_entity` for entity information. If there is no 37 | information the default unit given in `default_unit` is used. In this case the 38 | analyzed value is used as entity unchanged. 39 | 40 | Parameters 41 | ---------- 42 | df_nir : pd.DataFrame 43 | Pandas DataFrame with the table to process 44 | unit_row : str or int 45 | String "header" to indicate that the column header should be used to derive the 46 | unit information or an integer specifying the row to use for unit information. 47 | If entity and unit information are given in the same row use only unit_row. 48 | entity_row : str or int 49 | String "header" to indicate that the column header should be used to derive the 50 | unit information or an integer specifying the row to use for entity information. 51 | If entity and unit information are given in the same row use only unit_row 52 | regexp_entity : str 53 | regular expression that extracts the entity from the cell value 54 | regexp_unit : str (optional) 55 | regular expression that extracts the unit from the cell value 56 | manual_repl_unit : dict (optional) 57 | dict defining unit for given cell values 58 | manual_repl_entity : dict (optional) 59 | dict defining entity for given cell values 60 | default_unit : str 61 | unit to be used if no unit can be extracted an no unit is given 62 | 63 | Returns 64 | ------- 65 | pd.DataFrame 66 | DataFrame with explicit unit information (as column header) 67 | """ 68 | if manual_repl_unit is None: 69 | manual_repl_unit = {} 70 | 71 | if manual_repl_entity is None: 72 | manual_repl_entity = {} 73 | 74 | cols_to_drop = [] 75 | 76 | # get the data to extract the units and entities from 77 | # can be either the header row or a regular row 78 | if unit_row == "header": 79 | values_for_units = list(df_nir.columns) 80 | else: 81 | # unit_row must be an integer 82 | values_for_units = list(df_nir.iloc[unit_row]) 83 | cols_to_drop.append(unit_row) 84 | 85 | if entity_row is not None: 86 | if entity_row == "header": 87 | values_for_entities = list(df_nir.columns) 88 | else: 89 | values_for_entities = list(df_nir.iloc[entity_row]) 90 | if entity_row != unit_row: 91 | cols_to_drop.append(entity_row) 92 | else: 93 | values_for_entities = values_for_units 94 | 95 | if regexp_unit is not None: 96 | re_unit = re.compile(regexp_unit) 97 | re_entity = re.compile(regexp_entity) 98 | 99 | units = values_for_units.copy() 100 | entities = values_for_entities.copy() 101 | 102 | for idx, value in enumerate(values_for_units): 103 | if str(value) in manual_repl_unit: 104 | units[idx] = manual_repl_unit[str(value)] 105 | else: 106 | if regexp_unit is not None: 107 | unit = re_unit.findall(str(value)) 108 | else: 109 | unit = False 110 | 111 | if unit: 112 | units[idx] = unit[0] 113 | else: 114 | units[idx] = default_unit 115 | 116 | for idx, value in enumerate(values_for_entities): 117 | if str(value) in manual_repl_entity: 118 | entities[idx] = manual_repl_entity[str(value)] 119 | else: 120 | entity = re_entity.findall(str(value)) 121 | if entity: 122 | entities[idx] = entity[0] 123 | else: 124 | entities[idx] = value 125 | 126 | new_header = [entities, units] 127 | 128 | df_out = df_nir.copy() 129 | df_out.columns = new_header 130 | if cols_to_drop: 131 | df_out = df_out.drop(df_out.index[cols_to_drop]) 132 | 133 | return df_out 134 | 135 | 136 | def nir_convert_df_to_long( 137 | df_nir: pd.DataFrame, year: int, header_long: list[str] | None = None 138 | ) -> pd.DataFrame: 139 | """Convert an entity-wide NIR table for a single year to a long format 140 | DataFrame. 141 | 142 | The input DataFrame is required to have the following structure: 143 | * Columns for category, original category name, and data in this order, where 144 | category and original category name form a multiindex. 145 | * Column header as multiindex for entity and unit 146 | A column for the year is added during the conversion. 147 | 148 | Parameters 149 | ---------- 150 | df_nir: pd.DataFrame 151 | Pandas DataFrame with the NIR table to be converted 152 | year: int 153 | Year of the given data 154 | header_long: list, optional 155 | specify a non-standard column header, e.g. with only category code 156 | or orig_cat_name 157 | 158 | Returns 159 | ------- 160 | pd.DataFrame 161 | converted DataFrame 162 | """ 163 | if header_long is None: 164 | header_long = ["category", "orig_cat_name", "entity", "unit", "time", "data"] 165 | 166 | df_stacked = df_nir.stack([0, 1], future_stack=True).to_frame() 167 | df_stacked.insert(0, "year", str(year)) 168 | df_stacked = df_stacked.reset_index() 169 | df_stacked.columns = header_long 170 | return df_stacked 171 | -------------------------------------------------------------------------------- /primap2/pm2io/__init__.py: -------------------------------------------------------------------------------- 1 | """Data reading module of the PRIMAP2 climate policy analysis package.""" 2 | 3 | from ._data_reading import ( 4 | convert_long_dataframe_if, 5 | convert_wide_dataframe_if, 6 | read_long_csv_file_if, 7 | read_wide_csv_file_if, 8 | ) 9 | from ._GHG_inventory_reading import nir_add_unit_information, nir_convert_df_to_long 10 | from ._interchange_format import ( 11 | from_interchange_format, 12 | read_interchange_format, 13 | write_interchange_format, 14 | ) 15 | 16 | __all__ = [ 17 | "convert_long_dataframe_if", 18 | "convert_wide_dataframe_if", 19 | "from_interchange_format", 20 | "nir_add_unit_information", 21 | "nir_convert_df_to_long", 22 | "read_interchange_format", 23 | "read_long_csv_file_if", 24 | "read_wide_csv_file_if", 25 | "write_interchange_format", 26 | ] 27 | -------------------------------------------------------------------------------- /primap2/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Unit tests for primap2.""" 2 | 3 | from .examples import minimal_ds # noqa: F401 4 | -------------------------------------------------------------------------------- /primap2/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import pytest 4 | import xarray as xr 5 | from loguru import logger 6 | 7 | import primap2 # noqa: F401 8 | 9 | from . import examples 10 | 11 | 12 | # monkey-patch caplog to work with loguru 13 | # see https://loguru.readthedocs.io/en/stable/resources/migration.html#making-things-work-with-pytest-and-caplog 14 | @pytest.fixture 15 | def caplog(caplog): 16 | class PropogateHandler(logging.Handler): 17 | def emit(self, record): 18 | logging.getLogger(record.name).handle(record) 19 | 20 | handler_id = logger.add(PropogateHandler(), format="{message} {extra}") 21 | yield caplog 22 | logger.remove(handler_id) 23 | 24 | 25 | @pytest.fixture 26 | def minimal_ds() -> xr.Dataset: 27 | """A valid, minimal dataset.""" 28 | return examples._cached_minimal_ds.copy(deep=True) 29 | 30 | 31 | @pytest.fixture 32 | def opulent_ds() -> xr.Dataset: 33 | """A valid dataset using lots of features.""" 34 | return examples._cached_opulent_ds.copy(deep=True) 35 | 36 | 37 | @pytest.fixture 38 | def opulent_str_ds() -> xr.Dataset: 39 | """Like the opulent dataset, but additionally with a stringly typed data variable 40 | "method". 41 | """ 42 | return examples._cached_opulent_str_ds.copy(deep=True) 43 | 44 | 45 | @pytest.fixture 46 | def empty_ds() -> xr.Dataset: 47 | """An empty hull of a dataset with missing data.""" 48 | return examples._cached_empty_ds.copy(deep=True) 49 | 50 | 51 | @pytest.fixture 52 | def opulent_processing_ds() -> xr.Dataset: 53 | """Like the opulent dataset, but additionally with processing information.""" 54 | return examples._cached_opulent_processing_ds.copy(deep=True) 55 | 56 | 57 | @pytest.fixture(params=["opulent", "opulent_str", "opulent_processing", "minimal", "empty"]) 58 | def any_ds(request) -> xr.Dataset: 59 | """Test with all available valid example Datasets.""" 60 | if request.param == "opulent": 61 | return examples._cached_opulent_ds.copy(deep=True) 62 | elif request.param == "opulent_str": 63 | return examples._cached_opulent_str_ds.copy(deep=True) 64 | elif request.param == "opulent_processing": 65 | return examples._cached_opulent_processing_ds.copy(deep=True) 66 | elif request.param == "minimal": 67 | return examples._cached_minimal_ds.copy(deep=True) 68 | elif request.param == "empty": 69 | return examples._cached_empty_ds.copy(deep=True) 70 | -------------------------------------------------------------------------------- /primap2/tests/csg/test_wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Tests for csg/_wrapper.py""" 3 | 4 | from datetime import datetime 5 | from pathlib import Path 6 | 7 | import pandas as pd 8 | 9 | import primap2.csg 10 | from primap2.csg import create_composite_source 11 | from primap2.csg._wrapper import create_time_index, set_priority_coords 12 | from primap2.tests.utils import assert_ds_aligned_equal 13 | 14 | DATA_PATH = Path(__file__).parent.parent / "data" 15 | 16 | 17 | def test_set_priority_coords(minimal_ds): 18 | prio_coords = {"scenario": {"value": "HISTORY", "terminology": "PRIMAP"}} 19 | 20 | prio_coord_ds = set_priority_coords(minimal_ds, prio_coords) 21 | 22 | assert "scenario (PRIMAP)" in prio_coord_ds.coords 23 | assert prio_coord_ds.coords["scenario (PRIMAP)"].values == ["HISTORY"] 24 | 25 | 26 | def test_create_time_index(): 27 | start = "1990" 28 | end = "2000" 29 | start_dt = datetime.strptime(start, "%Y") 30 | end_dt = datetime.strptime(end, "%Y") 31 | start_ts = pd.Timestamp(start) 32 | end_ts = pd.Timestamp(end) 33 | expected = pd.date_range(start=start, end=end, freq="YS") 34 | 35 | # string tuple 36 | pd.testing.assert_index_equal(create_time_index((start, end)), expected) 37 | 38 | # datatime tuple 39 | pd.testing.assert_index_equal(create_time_index((start_dt, end_dt)), expected) 40 | 41 | # timestamp tuple 42 | pd.testing.assert_index_equal(create_time_index((start_ts, end_ts)), expected) 43 | 44 | # mixed tuple 45 | pd.testing.assert_index_equal(create_time_index((start, end_dt)), expected) 46 | 47 | # DatetimeIndex returned unchanged 48 | pd.testing.assert_index_equal(create_time_index(expected), expected) 49 | 50 | 51 | def test_create_composite_source(): 52 | cat_terminology = "IPCC2006_PRIMAP" 53 | 54 | main_categories = ["1.A", "1.B.2", "2.A", "M.AG.ELV", "M.LULUCF", "4"] 55 | FGAS_categories = ["2"] 56 | 57 | native_entities = ["CO2", "CH4", "N2O", "SF6"] 58 | GWP_entities = ["HFCS"] 59 | GWPs = ["AR6GWP100"] 60 | GWP_variables = [f"{entity} ({GWP})" for entity in GWP_entities for GWP in GWPs] 61 | FGAS_entities = ["SF6", *GWP_entities] 62 | FGAS_variables = ["SF6", *GWP_variables] 63 | variables = native_entities + GWP_variables 64 | # priority 65 | priorities = [ 66 | {"source": "CRF 2023, 240108"}, 67 | {"source": "UNFCCC NAI, 240223"}, 68 | { 69 | "source": "CDIAC 2023, HISTORY", 70 | f"category ({cat_terminology})": ["1.A", "2.A"], 71 | "entity": "CO2", 72 | }, 73 | { 74 | "source": "Andrew cement, HISTORY", 75 | f"category ({cat_terminology})": ["2.A"], 76 | "entity": "CO2", 77 | }, 78 | { 79 | "source": "EI 2023, HISTORY", 80 | f"category ({cat_terminology})": ["1.A", "1.B.2"], 81 | "entity": "CO2", 82 | }, 83 | {"source": "Houghton, HISTORY", f"category ({cat_terminology})": "M.LULUCF"}, 84 | {"source": "FAOSTAT 2023, HISTORY", f"category ({cat_terminology})": ["M.AG.ELV"]}, 85 | {"source": "EDGAR 8.0, HISTORY", "entity": ["CO2", "CH4", "N2O"]}, 86 | { 87 | "source": "EDGAR 7.0, HISTORY", 88 | f"category ({cat_terminology})": FGAS_categories, 89 | "variable": FGAS_variables, 90 | }, 91 | ] 92 | 93 | used_sources = [prio["source"] for prio in priorities] 94 | FGAS_sources = [ 95 | "CRF 2023, 240108", 96 | "CRF 2022, 230510", 97 | "UNFCCC NAI, 240223", 98 | "EDGAR 7.0, HISTORY", 99 | ] 100 | 101 | result_prio_coords = { 102 | "source": {"value": "PRIMAP-test"}, 103 | "scenario": {"value": "HISTORY", "terminology": "PRIMAP"}, 104 | } 105 | 106 | metadata = {"references": "test-data", "contact": "test@example.xx"} 107 | 108 | input_data = primap2.open_dataset(DATA_PATH / "primap2_test_data_v2.5.1_final.nc") 109 | 110 | # we use source as priority dimension, everything else are fixed coordinates. 111 | # we have one country-specific exception for each country in the prioritization 112 | # that's likely a bit more than realistic, but let's aim high 113 | priority_definition = primap2.csg.PriorityDefinition( 114 | priority_dimensions=["source"], 115 | priorities=priorities, 116 | exclude_result=[ 117 | { 118 | "entity": ["CO2", "CH4", "N2O"], 119 | f"category ({cat_terminology})": FGAS_categories, 120 | }, 121 | { 122 | "entity": FGAS_entities, 123 | f"category ({cat_terminology})": main_categories, 124 | }, 125 | ], 126 | ) 127 | 128 | strategies_FGAS = [ 129 | ( 130 | { 131 | "source": FGAS_sources, 132 | "entity": FGAS_entities, 133 | f"category ({cat_terminology})": FGAS_categories, 134 | }, 135 | primap2.csg.GlobalLSStrategy(), 136 | ), 137 | ( 138 | { 139 | "source": FGAS_sources, 140 | "entity": FGAS_entities, 141 | f"category ({cat_terminology})": FGAS_categories, 142 | }, 143 | primap2.csg.SubstitutionStrategy(), 144 | ), 145 | ] 146 | 147 | strategies_CO2CH4N2O = [ 148 | ( 149 | { 150 | "source": used_sources, 151 | "entity": ["CO2", "CH4", "N2O"], 152 | f"category ({cat_terminology})": main_categories, 153 | }, 154 | primap2.csg.GlobalLSStrategy(), 155 | ), 156 | ( 157 | { 158 | "source": used_sources, 159 | "entity": ["CO2", "CH4", "N2O"], 160 | f"category ({cat_terminology})": main_categories, 161 | }, 162 | primap2.csg.SubstitutionStrategy(), 163 | ), 164 | ] 165 | 166 | strategy_definition = primap2.csg.StrategyDefinition( 167 | strategies=strategies_CO2CH4N2O + strategies_FGAS 168 | ) 169 | 170 | test_time_range = ("1962", "2022") # cut a few years to make sure that works 171 | # test_limit_coords = {'entity': ['CO2', 'CH4', 'N2O']} 172 | test_limit_coords = { 173 | "variable": variables, 174 | "category": main_categories + FGAS_categories, 175 | "source": used_sources, 176 | } 177 | 178 | result = create_composite_source( 179 | input_data, 180 | priority_definition=priority_definition, 181 | strategy_definition=strategy_definition, 182 | result_prio_coords=result_prio_coords, 183 | limit_coords=test_limit_coords, 184 | time_range=test_time_range, 185 | progress_bar=None, 186 | metadata=metadata, 187 | ) 188 | 189 | # remove processing info as following functions can't deal with it yet 190 | # in this case to_netcdf can't deal with the None values in processing info 191 | result = result.pr.remove_processing_info() 192 | 193 | # assert results 194 | # load comparison data 195 | comp_filename = "PRIMAP-csg-test.nc" 196 | file_to_load = DATA_PATH / comp_filename 197 | data_comp = primap2.open_dataset(file_to_load) 198 | 199 | assert_ds_aligned_equal(data_comp, result, equal_nan=True) 200 | -------------------------------------------------------------------------------- /primap2/tests/csg/utils.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Sequence 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import xarray as xr 6 | 7 | 8 | def get_single_ts( 9 | *, 10 | time: pd.DatetimeIndex | None = None, 11 | data: np.ndarray | None = None, 12 | dims: Sequence[str] | None = None, 13 | coords: dict[str, str | Sequence[str]] | None = None, 14 | entity: str = "CH4", 15 | gwp_context: str | None = None, 16 | ) -> xr.DataArray: 17 | if time is None: 18 | time = pd.date_range("1850-01-01", "2022-01-01", freq="YS") 19 | if dims is None: 20 | dims = [] 21 | if data is None: 22 | data = np.linspace(0.0, 1.0, len(time)) 23 | if coords is None: 24 | coords = {} 25 | if gwp_context is None: 26 | name = entity 27 | attrs = {"entity": entity} 28 | else: 29 | name = f"{entity} ({gwp_context})" 30 | attrs = {"entity": entity, "gwp_context": gwp_context} 31 | return xr.DataArray( 32 | data, 33 | dims=["time", *dims], 34 | coords={"time": time, **coords}, 35 | name=name, 36 | attrs=attrs, 37 | ) 38 | -------------------------------------------------------------------------------- /primap2/tests/data/BURDI_conversion.csv: -------------------------------------------------------------------------------- 1 | # references: non_annex1_data repo 2 | # last_update: 2024-10-14 3 | BURDI,IPCC2006_PRIMAP,comment 4 | 1,1 5 | 1.A,1.A 6 | 1.A.1,1.A.1 7 | 1.A.2,1.A.2 8 | 1.A.3,1.A.3 9 | 1.A.4,1.A.4 10 | 1.A.5,1.A.5 11 | 1.B,1.B 12 | 1.B.1,1.B.1 13 | 1.B.2,1.B.2 14 | 2 + 3,2 15 | 2.A,2.A 16 | 2.B + 2.E,2.B 17 | 2.C,2.C 18 | 2.F,2.F 19 | 2.G + 2.D, 2.H 20 | 2.G, 2.H.3 21 | 3,2.D 22 | 4,M.AG 23 | 4.A,3.A.1 24 | 4.B,3.A.2 25 | 4.C,3.C.7 26 | 4.D, M.3.C.45.AG 27 | 4.D + 4.C + 4.E + 4.F + 4.G,3.C 28 | 4.E,3.C.1.c 29 | 4.F,3.C.1.b 30 | 4.G,3.C.8 31 | 5,M.LULUCF 32 | 4+5,3 33 | 6,4 34 | 6.A,4.A 35 | 6.B,4.D 36 | 6.C,4.C 37 | 6.D,4.E 38 | 24540,0 39 | 15163,M.0.EL 40 | 14637,M.BK 41 | 14424,M.BK.A 42 | 14423,M.BK.M, 43 | 14638, M.BIO 44 | 7,5, 5.A-D ignored as not fitting 2006 cats 45 | -------------------------------------------------------------------------------- /primap2/tests/data/Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.csv: -------------------------------------------------------------------------------- 1 | source,scenario (PRIMAP),provenance,area (ISO3),entity,unit,category (IPCC1996),1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019 2 | PRIMAP-crf,2021V1,measured,AUS,C2F6,t C2F6 / yr,0,,,,,66.67,66.72,66.61,47.98,31.32,22.15,20.41,17.77,24.03,16.48,18.63,26.08,25,24.37,24.8,25.93,9.943,8.432,6.433,4.889,3.548,3.768,3.705,2.385,2.403,2.3,2.942,5.582,3.31,9.93 3 | PRIMAP-crf,2021V1,measured,AUS,C2F6,t C2F6 / yr,2,,,,,66.67,66.72,66.61,47.98,31.32,22.15,20.41,17.77,24.03,16.48,18.63,26.08,25,24.37,24.8,25.93,9.943,8.432,6.433,4.889,3.548,3.768,3.705,2.385,2.403,2.3,2.942,5.582,3.31,9.93 4 | PRIMAP-crf,2021V1,measured,AUS,C2F6,t C2F6 / yr,2.C,,,,,66.67,66.72,66.61,47.98,31.32,22.15,20.41,17.77,24.03,16.48,18.63,26.08,25,24.37,24.8,25.93,9.943,8.432,6.433,4.889,3.548,3.768,3.705,2.385,2.403,2.3,2.942,5.582,3.31,9.93 5 | PRIMAP-crf,2021V1,measured,AUS,C2F6,t C2F6 / yr,2.C.3,,,,,66.67,66.72,66.61,47.98,31.32,22.15,20.41,17.77,24.03,16.48,18.63,26.08,25,24.37,24.8,25.93,9.943,8.432,6.433,4.889,3.548,3.768,3.705,2.385,2.403,2.3,2.942,5.582,3.31,9.93 6 | PRIMAP-crf,2021V1,measured,AUS,C2F6,t C2F6 / yr,M.0.EL,,,,,66.67,66.72,66.61,47.98,31.32,22.15,20.41,17.77,24.03,16.48,18.63,26.08,25,24.37,24.8,25.93,9.943,8.432,6.433,4.889,3.548,3.768,3.705,2.385,2.403,2.3,2.942,5.582,3.31,9.93 7 | PRIMAP-crf,2021V1,measured,AUS,CF4,t CF4 / yr,0,,,,,513.3,513.8,512.9,369.4,241.2,170.6,157.2,136.9,185,126.9,143.4,200.8,192.5,187.6,191,199.6,76.56,64.93,49.53,40.45,32.48,34.55,33.79,22.04,22.09,19.39,25.58,18.2,26.47,24.63 8 | PRIMAP-crf,2021V1,measured,AUS,CF4,t CF4 / yr,2,,,,,513.3,513.8,512.9,369.4,241.2,170.6,157.2,136.9,185,126.9,143.4,200.8,192.5,187.6,191,199.6,76.56,64.93,49.53,40.45,32.48,34.55,33.79,22.04,22.09,19.39,25.58,18.2,26.47,24.63 9 | PRIMAP-crf,2021V1,measured,AUS,CF4,t CF4 / yr,2.C,,,,,513.3,513.8,512.9,369.4,241.2,170.6,157.2,136.9,185,126.9,143.4,200.8,192.5,187.6,191,199.6,76.56,64.93,49.53,40.45,32.48,34.55,33.79,22.04,22.09,19.39,25.58,18.2,26.47,24.63 10 | PRIMAP-crf,2021V1,measured,AUS,CF4,t CF4 / yr,2.C.3,,,,,513.3,513.8,512.9,369.4,241.2,170.6,157.2,136.9,185,126.9,143.4,200.8,192.5,187.6,191,199.6,76.56,64.93,49.53,40.45,32.48,34.55,33.79,22.04,22.09,19.39,25.58,18.2,26.47,24.63 11 | PRIMAP-crf,2021V1,measured,AUS,CF4,t CF4 / yr,M.0.EL,,,,,513.3,513.8,512.9,369.4,241.2,170.6,157.2,136.9,185,126.9,143.4,200.8,192.5,187.6,191,199.6,76.56,64.93,49.53,40.45,32.48,34.55,33.79,22.04,22.09,19.39,25.58,18.2,26.47,24.63 12 | PRIMAP-crf,2021V1,measured,AUS,CH4,kt CH4 / yr,0,,,,,5697,5624,5519,5369,5237,5177,5176,5283,5239,5149,5367,5297,5228,4992,5004,5091,5092,5122,5032,4971,4860,4932,4879,4855,4768,4760,4716,4791,4753,4483 13 | PRIMAP-crf,2021V1,measured,AUS,CH4,kt CH4 / yr,1,,,,,1279,1266,1326,1291,1220,1270,1261,1367,1376,1267,1357,1341,1283,1241,1239,1296,1320,1400,1399,1372,1355,1310,1328,1324,1236,1366,1379,1358,1390,1316 14 | PRIMAP-crf,2021V1,measured,AUS,CH4,kt CH4 / yr,1.A,,,,, 15 | -------------------------------------------------------------------------------- /primap2/tests/data/Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/primap2/tests/data/Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.nc -------------------------------------------------------------------------------- /primap2/tests/data/Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.yaml: -------------------------------------------------------------------------------- 1 | attrs: 2 | references: 'doi: 10.5281/zenodo.4723476' 3 | rights: CC-BY 4.0 4 | contact: johannes.guetschow@pik-potsdam.de 5 | title: 'PRIMAP-crf: UNFCCC CRF data in IPCC categories (PRIMAP-crf-2021-v1)' 6 | comment: 'The dataset is described by the article: Jeffery et al., PRIMAP-crf: UNFCCC 7 | CRF data in IPCC 2006 categories, ESSD 10(3), doi: 10.5194/essd-10-1427-2018' 8 | institution: Potsdam Institute for Climate Impact Research 9 | area: area (ISO3) 10 | scen: scenario (PRIMAP) 11 | cat: category (IPCC1996) 12 | time_format: '%Y' 13 | dimensions: 14 | '*': 15 | - source 16 | - scenario (PRIMAP) 17 | - provenance 18 | - area (ISO3) 19 | - entity 20 | - unit 21 | - category (IPCC1996) 22 | data_file: Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.csv 23 | -------------------------------------------------------------------------------- /primap2/tests/data/PRIMAP-csg-test.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/primap2/tests/data/PRIMAP-csg-test.nc -------------------------------------------------------------------------------- /primap2/tests/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/primap2/tests/data/__init__.py -------------------------------------------------------------------------------- /primap2/tests/data/long.csv: -------------------------------------------------------------------------------- 1 | country,category,gas,unit,year,emissions,other 2 | AUS,IPC1,CO2,Gg,1991,4.1,a 3 | AUS,IPC1,CO2,Gg,2000,5,b 4 | AUS,IPC1,CO2,Gg,2010,6,c 5 | ZAM,IPC2,CH4,Mt,1991,7,d 6 | ZAM,IPC2,CH4,Mt,2000,8,e 7 | ZAM,IPC2,CH4,Mt,2010,9,f 8 | -------------------------------------------------------------------------------- /primap2/tests/data/long_no_time.csv: -------------------------------------------------------------------------------- 1 | country,category,gas,unit,emissions,other 2 | AUS,IPC1,CO2,Gg,4.1,a 3 | ZAM,IPC2,CH4,Mt,7,d 4 | -------------------------------------------------------------------------------- /primap2/tests/data/primap2_test_data_v2.5.1_final.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/primap2/tests/data/primap2_test_data_v2.5.1_final.nc -------------------------------------------------------------------------------- /primap2/tests/data/simple_categorisation_a.yaml: -------------------------------------------------------------------------------- 1 | name: A 2 | title: Simple Categorization 3 | comment: A simple example categorization without relationships between categories 4 | references: doi:00000/00000 5 | institution: PIK 6 | last_update: 2021-02-23 7 | hierarchical: no 8 | version: 1 9 | categories: 10 | 1: 11 | title: Category 1 12 | comment: The first category 13 | alternative_codes: 14 | - A 15 | - CatA 16 | info: 17 | important_data: 18 | - A 19 | - B 20 | - C 21 | other_important_thing: ABC 22 | 2: 23 | title: Category 2 24 | comment: The second category 25 | alternative_codes: 26 | - B 27 | - CatB 28 | 3: 29 | title: Category 3 30 | comment: The third category 31 | alternative_codes: 32 | - C 33 | - CatC 34 | unnumbered: 35 | title: The unnumbered category 36 | -------------------------------------------------------------------------------- /primap2/tests/data/simple_categorisation_b.yaml: -------------------------------------------------------------------------------- 1 | name: B 2 | title: Simple Categorization 3 | comment: A simple example categorization without relationships between categories 4 | references: doi:00000/00000 5 | institution: PIK 6 | last_update: 2021-02-23 7 | hierarchical: no 8 | version: 1 9 | categories: 10 | 1: 11 | title: Category 1 12 | comment: The first category 13 | alternative_codes: 14 | - A 15 | - CatA 16 | info: 17 | important_data: 18 | - A 19 | - B 20 | - C 21 | other_important_thing: ABC 22 | 2: 23 | title: Category 2 24 | comment: The second category 25 | alternative_codes: 26 | - B 27 | - CatB 28 | 3: 29 | title: Category 3 30 | comment: The third category 31 | 4: 32 | title: Category 4 33 | comment: The fourth category 34 | 5: 35 | title: Category 5 36 | comment: The fifth category 37 | -------------------------------------------------------------------------------- /primap2/tests/data/simple_conversion.csv: -------------------------------------------------------------------------------- 1 | # references: test 2 | # last_update: 2024-10-14 3 | A,B,comment 4 | 1,1, no comment 5 | 2+3,2 6 | -------------------------------------------------------------------------------- /primap2/tests/data/test_create_category_name_conversion.csv: -------------------------------------------------------------------------------- 1 | # references: test 2 | # last_update: 2024-10-14 3 | A,B,comment 4 | 1,1+2, no comment 5 | 2,-3+4 6 | 3,5-1 7 | -------------------------------------------------------------------------------- /primap2/tests/data/test_csv_data.csv: -------------------------------------------------------------------------------- 1 | country,category,gas,unit,1991,2000,2010 2 | AUS,IPC1,CO2,Gg,4.1,5,6 3 | ZAM,IPC2,CH4,Mt,7,8,9 4 | -------------------------------------------------------------------------------- /primap2/tests/data/test_csv_data_category_name.csv: -------------------------------------------------------------------------------- 1 | country,category,category_name,gas,unit,1991,2000,2010 2 | AUS,IPC1,Energy,CO2,Gg,4.1,5,6 3 | ZAM,IPC2,IPPU,CH4,Mt,7,8,9 4 | -------------------------------------------------------------------------------- /primap2/tests/data/test_csv_data_category_name_fill_cat_code.csv: -------------------------------------------------------------------------------- 1 | country,category,category_name,gas,unit,1991,2000,2010 2 | AUS,XX,Energy,CO2,Gg,4.1,5,6 3 | ZAM,XX,IPPU,CH4,Mt,7,8,9 4 | -------------------------------------------------------------------------------- /primap2/tests/data/test_csv_data_category_name_long.csv: -------------------------------------------------------------------------------- 1 | country,category,category_name,gas,unit,year,emissions 2 | AUS,IPC1,Energy,CO2,Gg,1991,4.1 3 | ZAM,IPC2,IPPU,CH4,Mt,1991,7 4 | AUS,IPC1,Energy,CO2,Gg,2000,5 5 | ZAM,IPC2,IPPU,CH4,Mt,2000,8 6 | AUS,IPC1,Energy,CO2,Gg,2010,6 7 | ZAM,IPC2,IPPU,CH4,Mt,2010,9 8 | -------------------------------------------------------------------------------- /primap2/tests/data/test_csv_data_sec_cat.csv: -------------------------------------------------------------------------------- 1 | country,category,classification,gas,unit,1991,2000,2010 2 | AUS,IPC1,TOTAL,CO2,Mt,4,5,6 3 | AUS,IPC0,TOTAL,KYOTOGHG,MtCO2eq,8,9,10 4 | ZAM,IPC0,TOTAL,KYOTOGHG,GgCO2eq,30,20,40 5 | ZAM,IPC2,TOTAL,CH4,Gg,7,8,9 6 | ZAM,IPC2,TOTAL,CO2,Gg,12,13,14 7 | USA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,300,200,400 8 | USA,IPC2,TOTAL,CH4,Gg,70,80,90 9 | USA,IPC3,TOTAL,CO2,Gg,120,130,140 10 | FRA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,30,20,40 11 | FRA,IPC2,TOTAL,CH4,Gg,7,8,9 12 | FRA,IPC2,TOTAL,CO2,Gg,12,13,14 13 | -------------------------------------------------------------------------------- /primap2/tests/data/test_csv_data_sec_cat_strings.csv: -------------------------------------------------------------------------------- 1 | country,category,classification,gas,unit,1991,2000,2010 2 | AUS,IPC1,TOTAL,CO2,Mt,4,5,6 3 | AUS,IPC0,TOTAL,KYOTOGHG,MtCO2eq,8,9,10 4 | ZAM,IPC0,TOTAL,KYOTOGHG,GgCO2eq,30,20,40 5 | ZAM,IPC2,TOTAL,CH4,Gg,7,8,9 6 | ZAM,IPC2,TOTAL,CO2,Gg,12,13,14 7 | USA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,300,200,100 8 | USA,IPC2,TOTAL,CH4,Gg,70,N/A,90 9 | USA,IPC3,TOTAL,CO2,Gg,120,TEST,140 10 | FRA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,IE,20,40 11 | FRA,IPC2,TOTAL,CH4,Gg,7, ,9 12 | FRA,IPC2,TOTAL,CO2,Gg,12,13,14 13 | -------------------------------------------------------------------------------- /primap2/tests/data/test_csv_data_unit_harmonization.csv: -------------------------------------------------------------------------------- 1 | country,category,gas,unit,1991,2000,2010 2 | AUS,IPC1,CH4,Gg,4.1,5,6 3 | AUS,IPC2,CH4 (SARGWP100),GgCO2eq,147,168,189 4 | DEU,IPC1,SF6,Gg,4.1,5,6 5 | DEU,IPC2,SF6,t,1000,2000,3000 6 | -------------------------------------------------------------------------------- /primap2/tests/data/test_empty_ds_if.csv: -------------------------------------------------------------------------------- 1 | "source","area (ISO3)","entity","unit","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020" 2 | "RAND2020","ARG","CH4","CH4 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 3 | "RAND2020","ARG","CO2","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 4 | "RAND2020","ARG","KYOTOGHG (AR4GWP100)","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 5 | "RAND2020","ARG","SF6","SF6 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 6 | "RAND2020","BOL","CH4","CH4 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 7 | "RAND2020","BOL","CO2","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 8 | "RAND2020","BOL","KYOTOGHG (AR4GWP100)","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 9 | "RAND2020","BOL","SF6","SF6 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 10 | "RAND2020","COL","CH4","CH4 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 11 | "RAND2020","COL","CO2","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 12 | "RAND2020","COL","KYOTOGHG (AR4GWP100)","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 13 | "RAND2020","COL","SF6","SF6 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 14 | "RAND2020","MEX","CH4","CH4 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 15 | "RAND2020","MEX","CO2","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 16 | "RAND2020","MEX","KYOTOGHG (AR4GWP100)","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 17 | "RAND2020","MEX","SF6","SF6 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 18 | -------------------------------------------------------------------------------- /primap2/tests/data/test_empty_ds_if.yaml: -------------------------------------------------------------------------------- 1 | attrs: 2 | area: area (ISO3) 3 | comment: This needs to be sorted alphabetically. 4 | contact: Someone 5 | title: Test Dataset 6 | data_file: test_empty_ds_if.csv 7 | dimensions: 8 | '*': 9 | - area (ISO3) 10 | - entity 11 | - source 12 | - time 13 | - unit 14 | time_format: '%Y' 15 | -------------------------------------------------------------------------------- /primap2/tests/data/test_from_interchange_format_output.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/primap2/tests/data/test_from_interchange_format_output.nc -------------------------------------------------------------------------------- /primap2/tests/data/test_read_wide_csv_file_no_sec_cats.csv: -------------------------------------------------------------------------------- 1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),1991,2000,2010 2 | 0,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,1,4.1,5.0,6.0 3 | 1,TESTcsv2021,HISTORY,ZAM,CH4,Mt CH4 / yr,2,7.0,8.0,9.0 4 | -------------------------------------------------------------------------------- /primap2/tests/data/test_read_wide_csv_file_no_sec_cats_cat_name.csv: -------------------------------------------------------------------------------- 1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),category_name,1991,2000,2010 2 | 0,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,1,Energy,4.1,5.0,6.0 3 | 1,TESTcsv2021,HISTORY,ZAM,CH4,Mt CH4 / yr,2,IPPU,7.0,8.0,9.0 4 | -------------------------------------------------------------------------------- /primap2/tests/data/test_read_wide_csv_file_output.csv: -------------------------------------------------------------------------------- 1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),Class (class),Type (type),1991,2000,2010 2 | 0,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,1,TOTAL,fugitive,4000.0,5000.0,6000.0 3 | 1,TESTcsv2021,HISTORY,AUS,KYOTOGHG (SARGWP100),Mt CO2 / yr,0,TOTAL,fugitive,8.0,9.0,10.0 4 | 2,TESTcsv2021,HISTORY,ZAM,CO2,Gg CO2 / yr,2,TOTAL,fugitive,12.0,13.0,14.0 5 | 3,TESTcsv2021,HISTORY,ZAM,KYOTOGHG (SARGWP100),Mt CO2 / yr,0,TOTAL,fugitive,0.03,0.02,0.04 6 | -------------------------------------------------------------------------------- /primap2/tests/data/test_read_wide_csv_file_output_entity_def.csv: -------------------------------------------------------------------------------- 1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),Class (class),Type (type),1991,2000,2010 2 | 0,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,0,TOTAL,fugitive,8000.0,9000.0,10000.0 3 | 1,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,1,TOTAL,fugitive,4000.0,5000.0,6000.0 4 | 2,TESTcsv2021,HISTORY,ZAM,CO2,Gg CO2 / yr,0,TOTAL,fugitive,30.0,20.0,40.0 5 | 3,TESTcsv2021,HISTORY,ZAM,CO2,Gg CO2 / yr,2,TOTAL,fugitive,7.0,8.0,9.0 6 | 4,TESTcsv2021,HISTORY,ZAM,CO2,Gg CO2 / yr,2,TOTAL,fugitive,12.0,13.0,14.0 7 | -------------------------------------------------------------------------------- /primap2/tests/data/test_read_wide_csv_file_output_unit_def.csv: -------------------------------------------------------------------------------- 1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),Class (class),Type (type),1991,2000,2010 2 | 0,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,1,TOTAL,fugitive,4.0,5.0,6.0 3 | 1,TESTcsv2021,HISTORY,ZAM,CH4,Gg CH4 / yr,2,TOTAL,fugitive,7.0,8.0,9.0 4 | 2,TESTcsv2021,HISTORY,ZAM,CO2,Gg CO2 / yr,2,TOTAL,fugitive,12.0,13.0,14.0 5 | -------------------------------------------------------------------------------- /primap2/tests/data/test_read_wide_csv_file_output_unit_harm.csv: -------------------------------------------------------------------------------- 1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),1991,2000,2010 2 | 0,TESTcsv2021,HISTORY,AUS,CH4,Gg CH4 / yr,1,4.1,5.0,6.0 3 | 1,TESTcsv2021,HISTORY,AUS,CH4,Gg CH4 / yr,2,7,8,9 4 | 2,TESTcsv2021,HISTORY,DEU,SF6,Gg SF6 / yr,1,4.1,5.0,6.0 5 | 3,TESTcsv2021,HISTORY,DEU,SF6,Gg SF6 / yr,2,1,2,3 6 | -------------------------------------------------------------------------------- /primap2/tests/data/test_sum_skip_allna_inhomogeneous_result.nc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/primap-community/primap2/71fc2ac4771e2652e7a9fbde88c250e98024df44/primap2/tests/data/test_sum_skip_allna_inhomogeneous_result.nc -------------------------------------------------------------------------------- /primap2/tests/test_conversion.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import primap2.pm2io as pm2io 4 | import primap2.pm2io._conversion # noqa: F401 5 | 6 | 7 | class TestIPCCCodePrimapToPrimap2: 8 | @pytest.mark.parametrize( 9 | "code_in, expected_code_out", 10 | [ 11 | ("IPC1A", "1.A"), 12 | ("CATM0EL", "M.0.EL"), 13 | ("IPC1A1B23", "1.A.1.b.ii.3"), 14 | ("1A1Bii3", "1.A.1.b.ii.3"), 15 | ("IPC_1.A.1.B.ii.3", "1.A.1.b.ii.3"), 16 | ("IPCM1B1C", "M.1.B.1.c"), 17 | ("M.1.B.1.C", "M.1.B.1.c"), 18 | ("M.1.B.1.C.", "M.1.B.1.c"), 19 | ("M1B1C", "M.1.B.1.c"), 20 | ], 21 | ) 22 | def test_working(self, code_in, expected_code_out): 23 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2(code_in) == expected_code_out 24 | 25 | def test_too_short(self, caplog): 26 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC") == "error_IPC" 27 | assert "WARNING" in caplog.text 28 | assert "Too short to be a PRIMAP IPCC code after removal of prefix." in caplog.text 29 | 30 | def test_wrong_format(self, caplog): 31 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPD1A") == "error_IPD1A" 32 | assert "WARNING" in caplog.text 33 | # assert ( 34 | # "Prefix is missing or unknown, known codes are 'IPC' and 'CAT'. " 35 | # "Assuming no code is present." in caplog.text 36 | # ) 37 | assert "No digit found on first level." in caplog.text 38 | 39 | def test_end_after_m(self, caplog): 40 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPCM") == "error_IPCM" 41 | assert "WARNING" in caplog.text 42 | assert "Nothing follows the 'M' for an 'M'-code." in caplog.text 43 | 44 | def test_first_lvl(self, caplog): 45 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPCA1") == "error_IPCA1" 46 | assert "WARNING" in caplog.text 47 | assert "No digit found on first level." in caplog.text 48 | 49 | def test_second_lvl(self, caplog): 50 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC123") == "error_IPC123" 51 | assert "WARNING" in caplog.text 52 | assert "No letter found on second level." in caplog.text 53 | 54 | def test_third_lvl(self, caplog): 55 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC1AC") == "error_IPC1AC" 56 | assert "WARNING" in caplog.text 57 | assert "No number found on third level." in caplog.text 58 | 59 | def test_fourth_lvl(self, caplog): 60 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC1A2_") == "error_IPC1A2_" 61 | assert "WARNING" in caplog.text 62 | assert "No letter found on fourth level." in caplog.text 63 | 64 | def test_fifth_lvl(self, caplog): 65 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC1A2BB") == "error_IPC1A2BB" 66 | assert "WARNING" in caplog.text 67 | assert "No digit or roman numeral found on fifth level." in caplog.text 68 | 69 | def test_sixth_lvl(self, caplog): 70 | assert ( 71 | pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC1A2B3X") == "error_IPC1A2B3X" 72 | ) 73 | assert "WARNING" in caplog.text 74 | assert "No number found on sixth level." in caplog.text 75 | 76 | def test_after_sixth_lvl(self, caplog): 77 | assert ( 78 | pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC1A2B33A") 79 | == "error_IPC1A2B33A" 80 | ) 81 | assert "WARNING" in caplog.text 82 | assert "Chars left after sixth level." in caplog.text 83 | 84 | 85 | class TestUnitPrimapToPrimap2: 86 | @pytest.mark.parametrize( 87 | "unit_in, entity_in, expected_unit_out", 88 | [ 89 | ("GgCO2eq", "KYOTOGHG", "Gg CO2 / yr"), 90 | ("MtC", "CO", "Mt C / yr"), 91 | ("GgN2ON", "N2O", "Gg N / yr"), 92 | ("t", "CH4", "t CH4 / yr"), 93 | ], 94 | ) 95 | def test_working(self, unit_in, entity_in, expected_unit_out): 96 | assert pm2io._conversion.convert_unit_to_primap2(unit_in, entity_in) == expected_unit_out 97 | 98 | def test_no_prefix(self, caplog): 99 | assert pm2io._conversion.convert_unit_to_primap2("CO2eq", "FGASES") == "error_CO2eq_FGASES" 100 | assert "WARNING" in caplog.text 101 | assert "No unit prefix matched for unit." in caplog.text 102 | 103 | def test_unit_empty(self, caplog): 104 | assert pm2io._conversion.convert_unit_to_primap2("", "FGASES") == "error__FGASES" 105 | assert "WARNING" in caplog.text 106 | assert "Input unit is empty. Nothing converted." in caplog.text 107 | 108 | def test_entity_empty(self, caplog): 109 | assert pm2io._conversion.convert_unit_to_primap2("GgCO2eq", "") == "error_GgCO2eq_" 110 | assert "WARNING" in caplog.text 111 | assert "Input entity is empty. Nothing converted." in caplog.text 112 | 113 | 114 | @pytest.mark.parametrize( 115 | "entity_pm1, entity_pm2", 116 | [ 117 | ("CO2", "CO2"), 118 | ("KYOTOGHG", "KYOTOGHG (SARGWP100)"), 119 | ("KYOTOGHGAR4", "KYOTOGHG (AR4GWP100)"), 120 | ], 121 | ) 122 | def test_convert_entity_gwp_primap_to_primap2(entity_pm1, entity_pm2): 123 | assert pm2io._conversion.convert_entity_gwp_primap_to_primap2(entity_pm1) == entity_pm2 124 | -------------------------------------------------------------------------------- /primap2/tests/test_fill_combine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Tests for _fill_combine.py 3 | 4 | We only test the features regarding the buggy treatment of additional (non-indexed) 5 | coordinates here. All core functionality is assumed to be sufficiently tested in xarray 6 | """ 7 | 8 | import numpy as np 9 | 10 | 11 | def test_fillna_ds_coord_present(minimal_ds): 12 | # add additional coordinate 13 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"] 14 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names)) 15 | 16 | sel = {"area (ISO3)": ["COL", "MEX"]} 17 | sel_ds = full_ds.pr.loc[sel] 18 | nan_ds = full_ds.copy() 19 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = ( 20 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan 21 | ) 22 | 23 | result_ds = nan_ds.pr.fillna(sel_ds) 24 | 25 | # assert_ds_aligned_equal(result_ds, full_ds) 26 | # above fails because data type of country_names differs 27 | 28 | # check that the additional coord is present in result 29 | assert "country_name" in list(result_ds.coords) 30 | # check that the mapping of country names to country codes is intact 31 | # (meaning that the additional coordinate is aligned correctly) 32 | for country in full_ds.coords["area (ISO3)"].values: 33 | assert ( 34 | result_ds.coords["country_name"].loc[{"area (ISO3)": country}] 35 | == full_ds.coords["country_name"].loc[{"area (ISO3)": country}] 36 | ) 37 | 38 | 39 | def test_fillna_da_coord_present(minimal_ds): 40 | # add additional coordinate 41 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"] 42 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names)) 43 | 44 | sel = {"area (ISO3)": ["COL", "MEX"]} 45 | sel_ds = full_ds.pr.loc[sel] 46 | nan_ds = full_ds.copy() 47 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = ( 48 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan 49 | ) 50 | 51 | result_da = nan_ds["CO2"].pr.fillna(sel_ds["CO2"]) 52 | 53 | # check that the additional coord is present in result 54 | assert "country_name" in list(result_da.coords) 55 | # check that the mapping of country names to country codes is intact 56 | # (meaning that the additional coordinate is aligned correctly) 57 | for country in full_ds.coords["area (ISO3)"].values: 58 | assert ( 59 | result_da.coords["country_name"].loc[{"area (ISO3)": country}] 60 | == full_ds.coords["country_name"].loc[{"area (ISO3)": country}] 61 | ) 62 | 63 | 64 | def test_combine_first_ds_coord_present(minimal_ds): 65 | # add additional coordinate 66 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"] 67 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names)) 68 | 69 | sel = {"area (ISO3)": ["COL", "MEX"]} 70 | sel_ds = full_ds.pr.loc[sel] 71 | nan_ds = full_ds.pr.loc[{"area (ISO3)": ["ARG", "COL"]}] 72 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = ( 73 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan 74 | ) 75 | 76 | result_ds = nan_ds.pr.combine_first(sel_ds) 77 | compare_ds = full_ds.pr.loc[{"area (ISO3)": ["ARG", "COL", "MEX"]}] 78 | 79 | # check that the additional coord is present in result 80 | assert "country_name" in list(result_ds.coords) 81 | # check that the mapping of country names to country codes is intact 82 | # (meaning that the additional coordinate is aligned correctly) 83 | for country in compare_ds.coords["area (ISO3)"].values: 84 | assert ( 85 | result_ds.coords["country_name"].loc[{"area (ISO3)": country}] 86 | == compare_ds.coords["country_name"].loc[{"area (ISO3)": country}] 87 | ) 88 | 89 | 90 | def test_combine_first_da_coord_present(minimal_ds): 91 | # add additional coordinate 92 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"] 93 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names)) 94 | 95 | sel = {"area (ISO3)": ["COL", "MEX"]} 96 | sel_ds = full_ds.pr.loc[sel] 97 | nan_ds = full_ds.pr.loc[{"area (ISO3)": ["ARG", "COL"]}] 98 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = ( 99 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan 100 | ) 101 | 102 | result_da = nan_ds["CO2"].pr.combine_first(sel_ds["CO2"]) 103 | compare_da = full_ds["CO2"].pr.loc[{"area (ISO3)": ["ARG", "COL", "MEX"]}] 104 | 105 | # check that the additional coord is present in result 106 | assert "country_name" in list(result_da.coords) 107 | # check that the mapping of country names to country codes is intact 108 | # (meaning that the additional coordinate is aligned correctly) 109 | for country in compare_da.coords["area (ISO3)"].values: 110 | assert ( 111 | result_da.coords["country_name"].loc[{"area (ISO3)": country}] 112 | == compare_da.coords["country_name"].loc[{"area (ISO3)": country}] 113 | ) 114 | 115 | 116 | # tests to check if xarray bug persists 117 | def test_fillna_ds_xr_fail(minimal_ds): 118 | # add additional coordinate 119 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"] 120 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names)) 121 | 122 | sel = {"area (ISO3)": ["COL", "MEX"]} 123 | sel_ds = full_ds.pr.loc[sel] 124 | nan_ds = full_ds.copy() 125 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = ( 126 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan 127 | ) 128 | 129 | result_ds = nan_ds.fillna(sel_ds) 130 | 131 | assert "country_name" not in list(result_ds.coords) 132 | 133 | 134 | def test_combine_first_ds_xr_fail(minimal_ds): 135 | # add additional coordinate 136 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"] 137 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names)) 138 | 139 | sel = {"area (ISO3)": ["COL", "MEX"]} 140 | sel_ds = full_ds.pr.loc[sel] 141 | nan_ds = full_ds.pr.loc[{"area (ISO3)": ["ARG", "COL"]}] 142 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = ( 143 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan 144 | ) 145 | 146 | result_ds = nan_ds.combine_first(sel_ds) 147 | 148 | assert "country_name" not in list(result_ds.coords) 149 | -------------------------------------------------------------------------------- /primap2/tests/test_interchange_format.py: -------------------------------------------------------------------------------- 1 | """Tests for the interchange format.""" 2 | 3 | import csv 4 | import importlib 5 | import importlib.resources 6 | 7 | import pandas as pd 8 | import pytest 9 | import xarray as xr 10 | 11 | import primap2 12 | from primap2 import pm2io 13 | 14 | from . import utils 15 | 16 | 17 | def test_round_trip(any_ds: xr.Dataset, tmp_path): 18 | path = tmp_path / "if" 19 | pm2io.write_interchange_format(path, any_ds.pr.to_interchange_format()) 20 | with path.with_suffix(".yaml").open() as fd: 21 | print(fd.read()) 22 | actual = pm2io.from_interchange_format(pm2io.read_interchange_format(path)) 23 | # we expect that Processing information is lost here 24 | expected = any_ds 25 | to_remove = [] 26 | for var in expected: 27 | if ( 28 | isinstance(var, str) 29 | and var.startswith("Processing of ") 30 | and "described_variable" in expected[var].attrs 31 | ): 32 | to_remove.append(var) 33 | for var in to_remove: 34 | del expected[var] 35 | utils.assert_ds_aligned_equal(any_ds, actual) 36 | 37 | 38 | def test_missing_file(minimal_ds, tmp_path): 39 | path = tmp_path / "if" 40 | pm2io.write_interchange_format(path, minimal_ds.pr.to_interchange_format()) 41 | with path.with_suffix(".yaml").open() as fd: 42 | content = fd.readlines() 43 | with path.with_suffix(".yaml").open("w") as fd: 44 | for line in content: 45 | if "data_file" in line: 46 | continue 47 | fd.write(line) 48 | 49 | # first test automatic discovery 50 | actual = pm2io.from_interchange_format(pm2io.read_interchange_format(path)) 51 | utils.assert_ds_aligned_equal(minimal_ds, actual) 52 | 53 | # now test without csv file 54 | path.with_suffix(".csv").unlink() 55 | with pytest.raises(FileNotFoundError, match="Data file not found at"): 56 | pm2io.read_interchange_format(path) 57 | 58 | 59 | def test_inharmonic_units(minimal_ds, tmp_path): 60 | path = tmp_path / "if" 61 | pm2io.write_interchange_format(path, minimal_ds.pr.to_interchange_format()) 62 | df = pd.read_csv(path.with_suffix(".csv")) 63 | df.loc[3, "unit"] = "m" 64 | df.to_csv(path.with_suffix(".csv"), index=False, quoting=csv.QUOTE_NONNUMERIC) 65 | 66 | with pytest.raises(ValueError, match="More than one unit"): 67 | pm2io.from_interchange_format(pm2io.read_interchange_format(path)) 68 | 69 | 70 | def test_stable_sorting(empty_ds, tmp_path): 71 | path = tmp_path / "test_empty_ds_if" 72 | ds = empty_ds.copy() 73 | # add some unsorted metadata 74 | ds.pr.contact = "Someone" 75 | ds.pr.comment = "This needs to be sorted alphabetically." 76 | ds.pr.title = "Test Dataset" 77 | # mess up the sorting of the data 78 | ds_if = ds.pr.to_interchange_format() 79 | ds_if = ds_if.sort_values("entity") 80 | pm2io.write_interchange_format(path, ds_if) 81 | result_csv = path.with_suffix(".csv").read_bytes() 82 | result_yaml = path.with_suffix(".yaml").read_bytes() 83 | test_data_dir = importlib.resources.files(primap2).joinpath("tests").joinpath("data") 84 | expected_csv = test_data_dir.joinpath("test_empty_ds_if.csv").read_bytes() 85 | expected_yaml = test_data_dir.joinpath("test_empty_ds_if.yaml").read_bytes() 86 | 87 | assert result_csv == expected_csv 88 | assert result_yaml == expected_yaml 89 | -------------------------------------------------------------------------------- /primap2/tests/test_metadata.py: -------------------------------------------------------------------------------- 1 | """Tests for _metadata.py""" 2 | 3 | import datetime 4 | 5 | 6 | def test_metadata_properties(opulent_ds): 7 | ds = opulent_ds 8 | assert ds.pr.references == "doi:10.1012" 9 | assert ds.pr.rights == "Use however you want." 10 | assert ds.pr.contact == "lol_no_one_will_answer@example.com" 11 | assert ds.pr.title == "Completely invented GHG inventory data" 12 | assert ds.pr.comment == "GHG inventory data ..." 13 | assert ds.pr.institution == "PIK" 14 | assert ds.pr.entity_terminology == "primap2" 15 | assert ds.pr.publication_date == datetime.date(2099, 12, 31) 16 | 17 | ds.pr.references = "references" 18 | assert ds.pr.references == "references" 19 | ds.pr.rights = "rights" 20 | assert ds.pr.rights == "rights" 21 | ds.pr.contact = "contact" 22 | assert ds.pr.contact == "contact" 23 | ds.pr.title = "title" 24 | assert ds.pr.title == "title" 25 | ds.pr.comment = "comment" 26 | assert ds.pr.comment == "comment" 27 | ds.pr.institution = "institution" 28 | assert ds.pr.institution == "institution" 29 | ds.pr.entity_terminology = "entity_terminology" 30 | assert ds.pr.entity_terminology == "entity_terminology" 31 | today = datetime.date.today() 32 | ds.pr.publication_date = today 33 | assert ds.pr.publication_date == today 34 | -------------------------------------------------------------------------------- /primap2/tests/test_overview.py: -------------------------------------------------------------------------------- 1 | """Tests for _overview.py""" 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import pytest 6 | import xarray as xr 7 | 8 | from primap2 import ureg 9 | 10 | 11 | def test_to_df(): 12 | data = np.array([[1, 2], [3, 4]], dtype=np.int64) 13 | a = ["a1", "a2"] 14 | b = ["b1", "b2"] 15 | da = xr.DataArray(data, coords=[("a", a), ("b", b)], name="name") 16 | actual = da.pr.to_df() 17 | 18 | expected = pd.DataFrame(data, index=a, columns=b) 19 | expected.index.name = "a" 20 | expected.columns.name = "b" 21 | 22 | pd.testing.assert_frame_equal(actual, expected) 23 | 24 | 25 | def test_to_df_1d(): 26 | data = np.array([1, 2], dtype=np.int64) 27 | a = ["a1", "a2"] 28 | da = xr.DataArray(data, coords=[("a", a)], name="name") 29 | actual = da.pr.to_df() 30 | 31 | expected = pd.Series(data, index=a, name="name") 32 | expected.index.name = "a" 33 | 34 | pd.testing.assert_series_equal(actual, expected) 35 | 36 | 37 | def test_to_df_set(): 38 | data = np.array([1, 2], dtype=np.int64) 39 | a = ["a1", "a2"] 40 | da = xr.DataArray(data, coords=[("a", a)], name="name") 41 | ds = xr.Dataset({"b": da}) 42 | actual = ds.pr.to_df("name") 43 | 44 | expected = pd.DataFrame(data, index=a, columns=["b"]) 45 | expected.index.name = "a" 46 | expected.columns.name = "name" 47 | 48 | pd.testing.assert_frame_equal(actual, expected) 49 | 50 | 51 | def test_array_empty(empty_ds): 52 | with pytest.raises(ValueError, match="Specify at least one dimension"): 53 | empty_ds.pr.coverage() 54 | with pytest.raises(ValueError, match="Specify at least one dimension"): 55 | empty_ds["CO2"].pr.coverage() 56 | 57 | 58 | def test_array_coverage(empty_ds): 59 | da = empty_ds["CO2"] 60 | da.pint.magnitude[:] = np.nan 61 | da.name = None 62 | 63 | da.pr.loc[{"time": "2001", "area": "COL"}] = 12.0 * ureg("Gg CO2 / year") 64 | da.pr.loc[{"time": "2002", "area": "COL"}] = 13.0 * ureg("Gg CO2 / year") 65 | 66 | expected = pd.DataFrame( 67 | index=da["area (ISO3)"].values, 68 | columns=da["time"].to_index(), 69 | data=np.zeros((len(da["area (ISO3)"]), len(da["time"])), dtype=np.int32), 70 | ) 71 | expected.loc["COL", "2001"] = 1 72 | expected.loc["COL", "2002"] = 1 73 | expected.index.name = "area (ISO3)" 74 | expected.columns.name = "time" 75 | 76 | pd.testing.assert_frame_equal( 77 | expected.astype(np.int32), da.pr.coverage("area", "time").astype(np.int32) 78 | ) 79 | pd.testing.assert_frame_equal( 80 | expected.T.astype(np.int32), 81 | da.pr.coverage("time", "area (ISO3)").astype(np.int32), 82 | ) 83 | 84 | 85 | def test_array_coverage_multidim(opulent_ds): 86 | da = opulent_ds["CO2"] 87 | 88 | da.pr.loc[{"product": "milk"}].pint.magnitude[:] = np.nan 89 | 90 | expected = pd.DataFrame( 91 | index=da.pr["animal"].values, 92 | columns=da.pr["product"].values, 93 | data=np.zeros((len(da.pr["animal"]), len(da.pr["product"])), dtype=np.int32), 94 | ) 95 | expected[:] = np.prod(da.shape) // np.prod(expected.shape) 96 | expected.loc[:, "milk"] = 0 97 | expected.index.name = "animal (FAOSTAT)" 98 | expected.columns.name = "product (FAOSTAT)" 99 | 100 | pd.testing.assert_frame_equal( 101 | expected.astype(np.int32), da.pr.coverage("animal", "product").astype(np.int32) 102 | ) 103 | pd.testing.assert_frame_equal( 104 | expected.T.astype(np.int32), 105 | da.pr.coverage("product", "animal").astype(np.int32), 106 | ) 107 | 108 | 109 | def test_array_coverage_error(opulent_ds): 110 | da = opulent_ds["CO2"] 111 | 112 | with pytest.raises(ValueError, match="Dimension 'non-existing' does not exist."): 113 | da.pr.coverage("animal", "non-existing") 114 | 115 | 116 | def test_set_coverage(opulent_ds): 117 | ds = opulent_ds 118 | ds["CO2"].pr.loc[{"product": "milk"}].pint.magnitude[:] = np.nan 119 | 120 | expected = pd.DataFrame( 121 | index=ds.pr["product"].values, 122 | columns=ds.pr["animal"].values, 123 | data=np.zeros((len(ds.pr["product"]), len(ds.pr["animal"])), dtype=int), 124 | ) 125 | expected[:] = np.prod(ds["CO2"].shape) // np.prod(expected.shape) * 4 126 | expected.loc["milk", :] = np.prod(ds["CO2"].shape) // np.prod(expected.shape) * 3 127 | expected.index.name = "product (FAOSTAT)" 128 | expected.columns.name = "animal (FAOSTAT)" 129 | expected.name = "coverage" 130 | 131 | pd.testing.assert_frame_equal(expected, ds.pr.coverage("product", "animal")) 132 | pd.testing.assert_frame_equal(expected.T, ds.pr.coverage("animal", "product")) 133 | 134 | 135 | def test_set_coverage_entity(opulent_ds): 136 | ds = opulent_ds 137 | ds["CO2"].pr.loc[{"product": "milk"}].pint.magnitude[:] = np.nan 138 | 139 | expected = pd.DataFrame( 140 | index=list(ds.keys()), 141 | columns=ds.pr["area"].values, 142 | data=np.zeros((len(ds), len(ds.pr["area"].values)), dtype=int), 143 | ) 144 | expected[:] = np.prod(ds["CO2"].shape) 145 | expected.loc["population", :] = np.prod(ds["population"].shape) 146 | expected.loc["CO2", :] = np.prod(ds["CO2"].shape) - np.prod( 147 | ds["CO2"].pr.loc[{"product": "milk"}].shape 148 | ) 149 | expected = expected // len(ds.pr["area"].values) 150 | expected.name = "coverage" 151 | expected.index.name = "entity" 152 | expected.columns.name = "area (ISO3)" 153 | 154 | pd.testing.assert_frame_equal(expected, ds.pr.coverage("entity", "area")) 155 | 156 | 157 | def test_set_coverage_boolean(opulent_ds): 158 | actual = opulent_ds.notnull().any("time").pr.coverage("entity", "area") 159 | expected = opulent_ds.pr.coverage("entity", "area") // len(opulent_ds["time"]) 160 | 161 | pd.testing.assert_frame_equal(actual, expected) 162 | 163 | 164 | def test_set_coverage_entity_other_dim_not_existing(opulent_ds): 165 | ds = opulent_ds 166 | 167 | ds["CO2"].pr.loc[{"product": "milk"}].pint.magnitude[:] = np.nan 168 | 169 | entites_expected = [x for x in ds.keys() if x != "population"] 170 | 171 | expected = pd.DataFrame( 172 | index=ds.pr["product"].values, 173 | columns=entites_expected, 174 | data=np.zeros((len(ds.pr["product"]), len(entites_expected)), dtype=int), 175 | ) 176 | expected[:] = np.prod(ds["CO2"].shape) // len(ds.pr["product"]) 177 | expected.loc["milk", "CO2"] = 0 178 | expected.index.name = "product (FAOSTAT)" 179 | expected.columns.name = "entity" 180 | 181 | pd.testing.assert_frame_equal(expected, ds.pr.coverage("product", "entity")) 182 | pd.testing.assert_frame_equal(expected.T, ds.pr.coverage("entity", "product")) 183 | 184 | 185 | def test_set_coverage_error(opulent_ds): 186 | ds = opulent_ds["CO2"] 187 | 188 | with pytest.raises(ValueError, match="Dimension 'non-existing' does not exist."): 189 | ds.pr.coverage("animal", "non-existing") 190 | -------------------------------------------------------------------------------- /primap2/tests/test_selection.py: -------------------------------------------------------------------------------- 1 | """Tests for _alias_selection.py""" 2 | 3 | import pytest 4 | import xarray as xr 5 | import xarray.testing 6 | 7 | import primap2 8 | 9 | 10 | @pytest.mark.parametrize( 11 | ["alias", "full_name"], 12 | [ 13 | ("time", "time"), 14 | ("area", "area (ISO3)"), 15 | ("category", "category (IPCC 2006)"), 16 | ("cat", "category (IPCC 2006)"), 17 | ("animal", "animal (FAOSTAT)"), 18 | ("product", "product (FAOSTAT)"), 19 | ("scenario", "scenario (FAOSTAT)"), 20 | ("scen", "scenario (FAOSTAT)"), 21 | ("provenance", "provenance"), 22 | ("model", "model"), 23 | ("source", "source"), 24 | ("CO2", "CO2"), 25 | ("population", "population"), 26 | ], 27 | ) 28 | def test_pr_getitem(opulent_ds, alias, full_name): 29 | da = opulent_ds.pr[alias] 30 | assert da.name == full_name 31 | 32 | 33 | @pytest.mark.parametrize( 34 | ["alias", "full_name"], 35 | [ 36 | ("time", "time"), 37 | ("area", "area (ISO3)"), 38 | ("category", "category (IPCC 2006)"), 39 | ("cat", "category (IPCC 2006)"), 40 | ("animal", "animal (FAOSTAT)"), 41 | ("product", "product (FAOSTAT)"), 42 | ("scenario", "scenario (FAOSTAT)"), 43 | ("scen", "scenario (FAOSTAT)"), 44 | ("provenance", "provenance"), 45 | ("model", "model"), 46 | ("source", "source"), 47 | ("CO2", "CO2"), 48 | ("population", "population"), 49 | ], 50 | ) 51 | def test_pr_getitem_no_attrs(opulent_ds, alias, full_name): 52 | da = opulent_ds.notnull().pr[alias] 53 | assert da.name == full_name 54 | 55 | 56 | @pytest.mark.parametrize( 57 | ["alias", "full_name"], 58 | [ 59 | ("time", "time"), 60 | ("area", "area (ISO3)"), 61 | ("category", "category (IPCC 2006)"), 62 | ("cat", "category (IPCC 2006)"), 63 | ("animal", "animal (FAOSTAT)"), 64 | ("product", "product (FAOSTAT)"), 65 | ("scenario", "scenario (FAOSTAT)"), 66 | ("scen", "scenario (FAOSTAT)"), 67 | ("provenance", "provenance"), 68 | ("model", "model"), 69 | ("source", "source"), 70 | ], 71 | ) 72 | def test_pr_alias_array(opulent_ds, alias, full_name): 73 | da = opulent_ds.pr["CO2"] 74 | actual = da.pr.sum(dim=alias) 75 | expected = da.sum(dim=full_name, keep_attrs=True) 76 | xr.testing.assert_identical(actual, expected) 77 | 78 | 79 | def test_pr_loc_select(opulent_ds): 80 | sel_pr = opulent_ds.pr.loc[ 81 | { 82 | "time": slice("2002", "2005"), 83 | "area": ["COL", "ARG"], 84 | "animal": "cow", 85 | } 86 | ] 87 | sel = opulent_ds.loc[ 88 | { 89 | "time": slice("2002", "2005"), 90 | "area (ISO3)": ["COL", "ARG"], 91 | "animal (FAOSTAT)": "cow", 92 | } 93 | ] 94 | xr.testing.assert_identical(sel_pr, sel) 95 | 96 | 97 | def test_pr_loc_select_da(opulent_ds): 98 | da = opulent_ds["CO2"] 99 | sel_pr = da.pr.loc[ 100 | { 101 | "time": slice("2002", "2005"), 102 | "area": ["COL", "ARG"], 103 | "animal": "cow", 104 | } 105 | ] 106 | sel = da.loc[ 107 | { 108 | "time": slice("2002", "2005"), 109 | "area (ISO3)": ["COL", "ARG"], 110 | "animal (FAOSTAT)": "cow", 111 | } 112 | ] 113 | xr.testing.assert_identical(sel_pr, sel) 114 | 115 | 116 | def test_pr_loc_select_not(opulent_ds): 117 | sel_pr = opulent_ds.pr.loc[ 118 | { 119 | "time": slice("2002", "2005"), 120 | "area": ["COL", "ARG"], 121 | "animal": primap2.Not("cow"), 122 | "category": primap2.Not(["0", "1"]), 123 | } 124 | ] 125 | sel = opulent_ds.loc[ 126 | { 127 | "time": slice("2002", "2005"), 128 | "area (ISO3)": ["COL", "ARG"], 129 | "animal (FAOSTAT)": ["swine", "goat"], 130 | "category (IPCC 2006)": ["2", "3", "4", "5", "1.A", "1.B"], 131 | } 132 | ] 133 | xr.testing.assert_identical(sel_pr, sel) 134 | 135 | 136 | def test_pr_loc_select_da_not(opulent_ds): 137 | da = opulent_ds["CO2"] 138 | sel_pr = da.pr.loc[ 139 | { 140 | "time": slice("2002", "2005"), 141 | "area": ["COL", "ARG"], 142 | "animal": primap2.Not("cow"), 143 | "category": primap2.Not(["0", "1"]), 144 | } 145 | ] 146 | sel = da.loc[ 147 | { 148 | "time": slice("2002", "2005"), 149 | "area (ISO3)": ["COL", "ARG"], 150 | "animal (FAOSTAT)": ["swine", "goat"], 151 | "category (IPCC 2006)": ["2", "3", "4", "5", "1.A", "1.B"], 152 | } 153 | ] 154 | xr.testing.assert_identical(sel_pr, sel) 155 | 156 | 157 | def test_resolve_not(opulent_ds): 158 | result = primap2._selection.resolve_not( 159 | input_selector={ 160 | "a": "1", 161 | "b": ["1", "2"], 162 | "animal (FAOSTAT)": primap2.Not("cow"), 163 | "area (ISO3)": primap2.Not(["MEX", "COL"]), 164 | }, 165 | xarray_obj=opulent_ds, 166 | ) 167 | assert len(result) == 4 168 | assert result["a"] == "1" 169 | assert result["b"] == ["1", "2"] 170 | assert len(result["animal (FAOSTAT)"]) == 2 171 | assert "swine" in result["animal (FAOSTAT)"] 172 | assert "goat" in result["animal (FAOSTAT)"] 173 | assert len(result["area (ISO3)"]) == 2 174 | assert "ARG" in result["area (ISO3)"] 175 | assert "BOL" in result["area (ISO3)"] 176 | 177 | 178 | def test_resolve_not_da(opulent_ds): 179 | result = primap2._selection.resolve_not( 180 | input_selector={ 181 | "a": "1", 182 | "b": ["1", "2"], 183 | "animal (FAOSTAT)": primap2.Not("cow"), 184 | "area (ISO3)": primap2.Not(["MEX", "COL"]), 185 | }, 186 | xarray_obj=opulent_ds["CO2"], 187 | ) 188 | assert len(result) == 4 189 | assert result["a"] == "1" 190 | assert result["b"] == ["1", "2"] 191 | assert len(result["animal (FAOSTAT)"]) == 2 192 | assert "swine" in result["animal (FAOSTAT)"] 193 | assert "goat" in result["animal (FAOSTAT)"] 194 | assert len(result["area (ISO3)"]) == 2 195 | assert "ARG" in result["area (ISO3)"] 196 | assert "BOL" in result["area (ISO3)"] 197 | 198 | 199 | def test_alias_special_cases(): 200 | assert primap2._selection.alias(1, {"a": "b"}, [1, 2, 3]) == 1 201 | with pytest.raises(primap2._selection.DimensionNotExistingError): 202 | primap2._selection.alias(1, {"a": "b"}, ["b", "c"]) 203 | -------------------------------------------------------------------------------- /primap2/tests/test_units.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Tests for _units.py""" 3 | 4 | import numpy as np 5 | import pytest 6 | import xarray as xr 7 | import xarray.testing 8 | 9 | from .utils import allclose, assert_equal 10 | 11 | 12 | def test_roundtrip_quantify(opulent_ds: xr.Dataset): 13 | roundtrip = opulent_ds.pr.dequantify().pr.quantify() 14 | xarray.testing.assert_identical(roundtrip, opulent_ds) 15 | 16 | 17 | def test_roundtrip_quantify_da(opulent_ds: xr.Dataset): 18 | da: xr.DataArray = opulent_ds["SF6 (SARGWP100)"] 19 | roundtrip = da.pr.dequantify().pr.quantify() 20 | assert_equal(roundtrip, da) 21 | 22 | 23 | def test_convert_to_gwp(opulent_ds: xr.Dataset): 24 | da: xr.DataArray = opulent_ds["SF6"] 25 | da_converted = da.pr.convert_to_gwp("SARGWP100", "CO2 Gg / year") 26 | da_expected = opulent_ds["SF6 (SARGWP100)"] 27 | assert_equal(da_converted, da_expected) 28 | 29 | da_converted_like = da.pr.convert_to_gwp_like(da_expected) 30 | assert_equal(da_converted_like, da_expected) 31 | 32 | 33 | def test_convert_to_gwp_like_missing(opulent_ds: xr.Dataset): 34 | da: xr.DataArray = opulent_ds["SF6"] 35 | da_gwp = da.pr.convert_to_gwp("SARGWP100", "CO2 Gg / year") 36 | 37 | del da_gwp.attrs["gwp_context"] 38 | with pytest.raises(ValueError, match="reference array has no gwp_context"): 39 | da.pr.convert_to_gwp_like(da_gwp) 40 | 41 | da_gwp = xr.full_like(da_gwp, np.nan) 42 | da_gwp.attrs["gwp_context"] = "SARGWP100" 43 | with pytest.raises(ValueError, match="reference array has no units attached"): 44 | da.pr.convert_to_gwp_like(da_gwp) 45 | 46 | 47 | def test_convert_to_gwp_incompatible(opulent_ds: xr.Dataset): 48 | da: xr.DataArray = opulent_ds["SF6 (SARGWP100)"] 49 | with pytest.raises(ValueError, match="Incompatible gwp conversions"): 50 | da.pr.convert_to_gwp("AR5GWP", "CO2 Gg / year") 51 | 52 | 53 | def test_convert_to_mass(opulent_ds: xr.Dataset): 54 | da: xr.DataArray = opulent_ds["SF6 (SARGWP100)"] 55 | da_converted = da.pr.convert_to_mass() 56 | da_expected = opulent_ds["SF6"] 57 | assert_equal(da_converted, da_expected) 58 | 59 | 60 | def test_convert_round_trip(opulent_ds: xr.Dataset): 61 | da: xr.DataArray = opulent_ds["SF6"] 62 | assert da.attrs["entity"] == "SF6" 63 | da_gwp = da.pr.convert_to_gwp(gwp_context="AR4GWP100", units="Gg CO2 / year") 64 | da_rt = da_gwp.pr.convert_to_mass() 65 | assert_equal(da, da_rt) 66 | assert da_rt.attrs["entity"] == "SF6" 67 | assert isinstance(da_rt.attrs["entity"], str) 68 | 69 | 70 | def test_convert_to_mass_missing_info(opulent_ds: xr.Dataset): 71 | da: xr.DataArray = opulent_ds["SF6"] 72 | with pytest.raises( 73 | ValueError, 74 | match="No gwp_context given and no gwp_context available in the attrs", 75 | ): 76 | da.pr.convert_to_mass() 77 | 78 | da = opulent_ds["SF6 (SARGWP100)"] 79 | del da.attrs["entity"] 80 | with pytest.raises(ValueError, match="No entity given and no entity available in the attrs"): 81 | da.pr.convert_to_mass() 82 | 83 | 84 | def test_context(opulent_ds: xr.Dataset): 85 | da: xr.DataArray = opulent_ds["SF6 (SARGWP100)"] 86 | with da.pr.gwp_context: 87 | da_converted = opulent_ds["SF6"].pint.to(da.pint.units) 88 | assert allclose(da, da_converted) 89 | -------------------------------------------------------------------------------- /primap2/tests/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pint 3 | import xarray as xr 4 | 5 | 6 | def allclose(a: xr.DataArray, b: xr.DataArray, *args, **kwargs) -> bool: 7 | """Like np.allclose, but converts a to b's units before comparing.""" 8 | try: 9 | a = a.pint.to(b.pint.units) 10 | except pint.DimensionalityError: 11 | return False 12 | if a.dtype == float: # need to use "allclose" to compare floats 13 | return np.allclose(a.pint.magnitude, b.pint.magnitude, *args, **kwargs) 14 | else: 15 | return (a.pint.magnitude == b.pint.magnitude).all() 16 | 17 | 18 | def assert_equal(a: xr.DataArray, b: xr.DataArray, *args, **kwargs): 19 | """Asserts that contents are allclose(), and the name and attrs are also equal.""" 20 | assert allclose(a, b, *args, **kwargs) 21 | assert a.attrs == b.attrs, (a.attrs, b.attrs) 22 | assert a.name == b.name, (a.name, b.name) 23 | 24 | 25 | def assert_align(a: xr.DataArray, b: xr.DataArray) -> tuple[xr.DataArray, xr.DataArray]: 26 | """Asserts that a and b have the same shape and returns a and b with axes and 27 | dimensions aligned and sorted equally so that naive comparisons can be done. 28 | """ 29 | assert set(a.dims) == set(b.dims), (a.dims, b.dims) 30 | aa, ba = xr.align(a, b, join="outer") 31 | aa = aa.transpose(*ba.dims) 32 | size_unchanged = sorted(aa.shape) == sorted(a.shape) and ba.shape == b.shape 33 | assert size_unchanged, (a.shape, b.shape) 34 | return aa, ba 35 | 36 | 37 | def assert_aligned_equal(a: xr.DataArray, b: xr.DataArray, *args, **kwargs): 38 | """Assert that a and b are equal after alignment of their dimensions.""" 39 | a, b = assert_align(a, b) 40 | assert_equal(a, b, *args, **kwargs) 41 | 42 | 43 | def assert_ds_aligned_equal(a: xr.Dataset, b: xr.Dataset, *args, **kwargs): 44 | """Assert that a and b are equal after alignment of their dimensions.""" 45 | assert set(a.keys()) == set(b.keys()) 46 | for key in a.keys(): 47 | assert_aligned_equal(a[key], b[key], *args, **kwargs) 48 | assert a.attrs == b.attrs, (a.attrs, b.attrs) 49 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=66", 4 | "wheel>=0.42", 5 | "build>=1.0.0", 6 | "setuptools_scm[toml]==8.1" 7 | ] 8 | build-backend = "setuptools.build_meta" 9 | 10 | [tool.ruff] 11 | extend-include = ["*.ipynb"] 12 | extend-exclude = [ 13 | "climate_categories/data/*.py", 14 | "climate_categories/tests/data/*.py", 15 | "docs-old/", 16 | "docs/*" 17 | ] 18 | line-length = 100 19 | lint.extend-select = [ "E", "W", "I", "UP", "B", "YTT", "A", "NPY", "RUF", "FURB", "FLY", "PYI" ] 20 | target-version = "py310" 21 | 22 | [tool.ruff.format] 23 | docstring-code-format = true 24 | 25 | [tool.ruff.lint.isort] 26 | known-first-party = ["primap2"] 27 | 28 | [tool.ruff.lint.pydocstyle] 29 | convention = "numpy" 30 | 31 | [tool.setuptools_scm] 32 | 33 | [tool.pytest.ini_options] 34 | # actually, importlib.resources.open_text will be undeprecated again in python 3.13 35 | filterwarnings = "ignore:.*open_text is deprecated.*:DeprecationWarning" 36 | 37 | [tool.towncrier] 38 | package = "primap2" 39 | package_dir = "primap2" 40 | filename = "docs/source/changelog.md" 41 | directory = "changelog/" 42 | title_format = "## primap2 {version} ({project_date})" 43 | underlines = ["", "", ""] 44 | issue_format = "[#{issue}](https://github.com/primap-community/primap2/pull/{issue})" 45 | 46 | [[tool.towncrier.type]] 47 | directory = "breaking" 48 | name = "Breaking Changes" 49 | showcontent = true 50 | 51 | [[tool.towncrier.type]] 52 | directory = "deprecation" 53 | name = "Deprecations" 54 | showcontent = true 55 | 56 | [[tool.towncrier.type]] 57 | directory = "feature" 58 | name = "Features" 59 | showcontent = true 60 | 61 | [[tool.towncrier.type]] 62 | directory = "improvement" 63 | name = "Improvements" 64 | showcontent = true 65 | 66 | [[tool.towncrier.type]] 67 | directory = "fix" 68 | name = "Bug Fixes" 69 | showcontent = true 70 | 71 | [[tool.towncrier.type]] 72 | directory = "docs" 73 | name = "Improved Documentation" 74 | showcontent = true 75 | 76 | [[tool.towncrier.type]] 77 | directory = "trivial" 78 | name = "Trivial/Internal Changes" 79 | showcontent = false 80 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | . 2 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | .[dev] 2 | -------------------------------------------------------------------------------- /requirements_upstream_dev.txt: -------------------------------------------------------------------------------- 1 | # install development versions of central upstream libraries 2 | pint@git+https://github.com/hgrecco/pint 3 | pint_xarray@git+https://github.com/xarray-contrib/pint-xarray 4 | xarray@git+https://github.com/pydata/xarray 5 | pandas@git+https://github.com/pandas-dev/pandas 6 | openscm_units@git+https://github.com/openscm/openscm-units 7 | # numpy dev currently not supported 8 | #numpy@git+https://github.com/numpy/numpy 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = primap2 3 | version = 0.12.2 4 | author = Mika Pflüger 5 | author_email = mika.pflueger@climate-resource.com 6 | description = The next generation of the PRIMAP climate policy analysis suite. 7 | long_description = file: README.md, changelog.md 8 | long_description_content_type = text/x-rst 9 | url = https://github.com/primap-community/primap2 10 | project_urls = 11 | Documentation = https://primap2.readthedocs.io/ 12 | classifiers = 13 | Development Status :: 3 - Alpha 14 | Intended Audience :: Science/Research 15 | Topic :: Scientific/Engineering :: Atmospheric Science 16 | License :: OSI Approved :: Apache Software License 17 | Natural Language :: English 18 | Programming Language :: Python :: 3 19 | Programming Language :: Python :: 3.10 20 | Programming Language :: Python :: 3.11 21 | Programming Language :: Python :: 3.12 22 | license = Apache Software License 2.0 23 | license_files = LICENSE 24 | 25 | [options] 26 | packages = 27 | primap2 28 | primap2.pm2io 29 | primap2.csg 30 | primap2.csg._strategies 31 | primap2.tests 32 | primap2.tests.csg 33 | primap2.tests.data 34 | python_requires = >=3.10, <3.13 35 | setup_requires = 36 | setuptools_scm==8.1 37 | install_requires = 38 | attrs>=23 39 | xarray>=2024.10.1 40 | numbagg>=0.8.1 41 | pint>=0.24.4 42 | pint_xarray>=0.4 43 | numpy>=1.26,<2 44 | pandas>=2.2.2 45 | openscm_units>=0.6 46 | loguru>=0.7 47 | scipy>=1.13.0 48 | h5netcdf>=1 49 | h5py>=3.10.0 50 | matplotlib>=3.7 51 | ruamel.yaml>=0.18 52 | strictyaml>=1.7.3 53 | openpyxl>=3.1 54 | tqdm>=4.66 55 | msgpack>=1 56 | climate_categories>=0.10.2 57 | 58 | [options.extras_require] 59 | test = 60 | pytest>=8 61 | pytest-cov>=4 62 | xdoctest>=1.2 63 | 64 | dev = 65 | tbump>=6.11 66 | wheel>=0.42 67 | coverage>=7.4 68 | Sphinx>=6 69 | myst-nb>=1 70 | sphinx-book-theme>=1.1 71 | sphinx-favicon>=1.0 72 | jupytext>=1.16 73 | sphinx-copybutton>=0.5.2 74 | sphinx-autosummary-accessors>=2023.4 75 | sphinx-tippy>=0.4.3 76 | numpydoc>=1.6 77 | twine>=5 78 | build>=1.2.2 79 | pre-commit>=3.6 80 | pytest>=8 81 | pytest-cov>=4 82 | xdoctest>=1.2 83 | setuptools>=66 84 | towncrier>=23.6.0 85 | ipykernel>=6.27.1 86 | jupyter>=1.1 87 | mypy>=1.11 88 | tox>=4.11 89 | tox-uv>=1.11.3 90 | ruff>=0.6.3 91 | ruff-lsp>=0.0.50 92 | datalad = 93 | datalad>=1.1 94 | 95 | [options.package_data] 96 | * = 97 | *.csv 98 | *.nc 99 | 100 | [doc8] 101 | max-line-length = 88 102 | ignore-path-errors = docs/data_format_details.rst;D001,docs/interchange_format_details.rst;D001 103 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import setuptools 4 | 5 | setuptools.setup() 6 | -------------------------------------------------------------------------------- /tbump.toml: -------------------------------------------------------------------------------- 1 | # Uncomment this if your project is hosted on GitHub: 2 | github_url = "https://github.com/primap-community/primap2/" 3 | 4 | [version] 5 | current = "0.12.2" 6 | 7 | # Example of a semver regexp. 8 | # Make sure this matches current_version before 9 | # using tbump 10 | regex = ''' 11 | (?P\d+) 12 | \. 13 | (?P\d+) 14 | \. 15 | (?P\d+) 16 | ''' 17 | 18 | [git] 19 | message_template = "Release version {new_version}" 20 | tag_template = "v{new_version}" 21 | 22 | # For each file to patch, add a [[file]] config 23 | # section containing the path of the file, relative to the 24 | # tbump.toml location. 25 | [[file]] 26 | src = "setup.cfg" 27 | search = 'version = {current_version}' 28 | 29 | [[file]] 30 | src = "primap2/__init__.py" 31 | search = '__version__ = "{current_version}"' 32 | 33 | [[file]] 34 | src = "README.md" 35 | search = 'primap-community/primap2: PRIMAP2 Version {current_version}.' 36 | 37 | [[file]] 38 | src = "Makefile" 39 | search = '\tSETUPTOOLS_SCM_PRETEND_VERSION={current_version}' 40 | 41 | # You can specify a list of commands to 42 | # run after the files have been patched 43 | # and before the git commit is made 44 | 45 | [[before_commit]] 46 | name = "lint" 47 | cmd = "make lint" 48 | 49 | [[before_commit]] 50 | name = "update github release notes" 51 | cmd = "rm .github_release_notes_file.md; venv/bin/towncrier build --version {new_version} --draft --config towncrier_github_release_notes.toml > .github_release_notes_file.md" 52 | 53 | [[before_commit]] 54 | name = "update changelog" 55 | cmd = "venv/bin/towncrier build --version {new_version} --yes" 56 | 57 | # Or run some commands after the git tag and the branch 58 | # have been pushed: 59 | 60 | [[after_push]] 61 | name = "release on github" 62 | cmd = "gh release create --repo primap-community/primap2 v{new_version} --title 'PRIMAP2 Version {new_version}' --notes-file .github_release_notes_file.md" 63 | -------------------------------------------------------------------------------- /towncrier_github_release_notes.toml: -------------------------------------------------------------------------------- 1 | [tool.towncrier] 2 | package = "primap2" 3 | package_dir = "primap2" 4 | filename = ".changelog_latest_version.md" 5 | template = "towncrier_github_release_notes_template.md" 6 | directory = "changelog/" 7 | title_format = "# primap2 {version} ({project_date})" 8 | underlines = ["", "", ""] 9 | issue_format = "[#{issue}](https://github.com/primap-community/primap2/pull/{issue})" 10 | 11 | [[tool.towncrier.type]] 12 | directory = "breaking" 13 | name = "Breaking Changes" 14 | showcontent = true 15 | 16 | [[tool.towncrier.type]] 17 | directory = "deprecation" 18 | name = "Deprecations" 19 | showcontent = true 20 | 21 | [[tool.towncrier.type]] 22 | directory = "feature" 23 | name = "Features" 24 | showcontent = true 25 | 26 | [[tool.towncrier.type]] 27 | directory = "improvement" 28 | name = "Improvements" 29 | showcontent = true 30 | 31 | [[tool.towncrier.type]] 32 | directory = "fix" 33 | name = "Bug Fixes" 34 | showcontent = true 35 | 36 | [[tool.towncrier.type]] 37 | directory = "docs" 38 | name = "Improved Documentation" 39 | showcontent = true 40 | 41 | [[tool.towncrier.type]] 42 | directory = "trivial" 43 | name = "Trivial/Internal Changes" 44 | showcontent = false 45 | -------------------------------------------------------------------------------- /towncrier_github_release_notes_template.md: -------------------------------------------------------------------------------- 1 | 2 | primap2 is a library for compiling and analyzing climate policy datasets. 3 | 4 | ## Changes 5 | {% for section, _ in sections.items() %} 6 | 7 | {% if sections[section] %} 8 | {% for category, val in definitions.items() if category in sections[section] %} 9 | ### {{ definitions[category]['name'] }} 10 | 11 | {% for text, values in sections[section][category].items() %} 12 | - {{ text }} 13 | {%- if values %} 14 | {% if "\n - " in text or '\n * ' in text %} 15 | 16 | 17 | ( 18 | {%- else %} 19 | {% if text %} ({% endif %} 20 | {%- endif -%} 21 | {%- for issue in values %} 22 | {{ issue.split(": ", 1)[0] }}{% if not loop.last %}, {% endif %} 23 | {%- endfor %} 24 | {% if text %}){% endif %} 25 | 26 | {% else %} 27 | 28 | {% endif %} 29 | {% endfor %} 30 | 31 | {% if issues_by_category[section][category] and "]: " in issues_by_category[section][category][0] %} 32 | {% for issue in issues_by_category[section][category] %} 33 | {{ issue }} 34 | {% endfor %} 35 | 36 | {% endif %} 37 | {% if sections[section][category]|length == 0 %} 38 | No significant changes. 39 | 40 | {% else %} 41 | {% endif %} 42 | {% endfor %} 43 | {% else %} 44 | No significant changes. 45 | 46 | {% endif %} 47 | {% endfor +%} 48 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # tox (https://tox.readthedocs.io/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = py{310,311,312}-{highest,lowest-direct} 8 | 9 | [testenv:py{310,311,312}-highest] 10 | deps = 11 | pytest 12 | xdoctest 13 | commands = 14 | uv pip install -e . 15 | pytest --xdoc -rx 16 | 17 | [testenv:py{310,311,312}-lowest-direct] 18 | deps = 19 | pytest 20 | xdoctest 21 | commands = 22 | uv pip install --resolution lowest-direct -e . 23 | pytest --xdoc -rx 24 | -------------------------------------------------------------------------------- /update_citation_info.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | resp = requests.get("https://zenodo.org/api/records/4535902").json() 4 | 5 | new_link = resp["links"]["doi"] 6 | new_doi = resp["metadata"]["doi"] 7 | new_date = resp["metadata"]["publication_date"] 8 | new_title = resp["metadata"]["title"] 9 | 10 | citation = f"""## Citation 11 | 12 | If you use this library and want to cite it, please cite it as: 13 | 14 | Mika Pflüger and Johannes Gütschow. ({new_date}). 15 | {new_title}. 16 | Zenodo. {new_link} 17 | """ 18 | 19 | with open("README.md") as fd: 20 | old_content = fd.read().splitlines(keepends=True) 21 | 22 | with open("README.md", "w") as fd: 23 | skip_to_next_section = False 24 | i = 0 25 | while True: 26 | try: 27 | line = old_content[i] 28 | except IndexError: 29 | break 30 | if line == "## Citation\n": 31 | fd.write(citation) 32 | skip_to_next_section = True 33 | elif skip_to_next_section: 34 | if line.startswith("#"): 35 | fd.write("\n") 36 | fd.write(line) 37 | skip_to_next_section = False 38 | else: 39 | fd.write(line) 40 | i += 1 41 | 42 | fd.truncate() 43 | --------------------------------------------------------------------------------