├── requirements.txt
├── docs
├── source
│ ├── _static
│ │ ├── .gitkeep
│ │ ├── favicon.ico
│ │ ├── favicon-48x48.png
│ │ ├── apple-touch-icon.png
│ │ ├── primap_logo_transparent.png
│ │ ├── web-app-manifest-192x192.png
│ │ ├── web-app-manifest-512x512.png
│ │ ├── primap_logo_transparent_dark.png
│ │ └── site.webmanifest
│ ├── _templates
│ │ └── .gitkeep
│ ├── .gitignore
│ ├── api
│ │ ├── .gitignore
│ │ ├── generate_api_docs.py
│ │ └── index.rst
│ ├── data_reading
│ │ ├── .gitignore
│ │ ├── test_csv_data_long.csv
│ │ ├── test_csv_data_sec_cat_if.yaml
│ │ ├── test_csv_data_sec_cat.csv
│ │ ├── test_csv_data_sec_cat_if.csv
│ │ ├── index.md
│ │ ├── test_data_long.md
│ │ ├── test_data_wide.md
│ │ └── old-PRIMAP-hist.md
│ ├── jupytext.toml
│ ├── minimal_ds.nc
│ ├── opulent_ds.nc
│ ├── pyproject.toml
│ ├── credits.md
│ ├── data_format
│ │ ├── index.md
│ │ └── interchange_format_details.md
│ ├── index.md
│ ├── usage
│ │ ├── index.md
│ │ ├── logging.md
│ │ ├── downscaling.md
│ │ ├── merge.md
│ │ ├── store_and_load.md
│ │ ├── gas_baskets.md
│ │ ├── add_and_overwrite.md
│ │ └── skipna.md
│ ├── installation.md
│ ├── datalad.md
│ └── conf.py
├── requirements.txt
├── Makefile
└── make.bat
├── primap2
├── tests
│ ├── data
│ │ ├── __init__.py
│ │ ├── long_no_time.csv
│ │ ├── PRIMAP-csg-test.nc
│ │ ├── test_csv_data.csv
│ │ ├── simple_conversion.csv
│ │ ├── downscale_test_original.nc
│ │ ├── test_downscale_reference.nc
│ │ ├── primap2_test_data_v2.5.1_final.nc
│ │ ├── test_create_category_name_conversion.csv
│ │ ├── test_csv_data_category_name.csv
│ │ ├── test_from_interchange_format_output.nc
│ │ ├── Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.nc
│ │ ├── test_csv_data_category_name_fill_cat_code.csv
│ │ ├── test_sum_skip_allna_inhomogeneous_result.nc
│ │ ├── test_csv_data_unit_harmonization.csv
│ │ ├── long.csv
│ │ ├── test_read_wide_csv_file_no_sec_cats.csv
│ │ ├── test_read_wide_csv_file_no_sec_cats_cat_name.csv
│ │ ├── test_csv_data_category_name_long.csv
│ │ ├── test_empty_ds_if.yaml
│ │ ├── test_read_wide_csv_file_output_unit_harm.csv
│ │ ├── test_read_wide_csv_file_output_unit_def.csv
│ │ ├── test_read_wide_csv_file_output.csv
│ │ ├── test_csv_data_sec_cat.csv
│ │ ├── test_csv_data_sec_cat_strings.csv
│ │ ├── test_read_wide_csv_file_output_entity_def.csv
│ │ ├── BURDI_conversion.csv
│ │ ├── Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.yaml
│ │ ├── simple_categorisation_a.yaml
│ │ ├── simple_categorisation_b.yaml
│ │ ├── test_empty_ds_if.csv
│ │ └── Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.csv
│ ├── __init__.py
│ ├── csg
│ │ ├── utils.py
│ │ └── test_wrapper.py
│ ├── test_metadata.py
│ ├── utils.py
│ ├── conftest.py
│ ├── test_units.py
│ ├── test_interchange_format.py
│ ├── test_conversion.py
│ ├── test_fill_combine.py
│ ├── test_selection.py
│ └── test_overview.py
├── csg
│ ├── _strategies
│ │ ├── __init__.py
│ │ ├── local_least_squares.py
│ │ ├── exceptions.py
│ │ ├── substitution.py
│ │ └── global_least_squares.py
│ ├── __init__.py
│ └── _wrapper.py
├── _dim_names.py
├── _accessor_base.py
├── _types.py
├── pm2io
│ ├── __init__.py
│ └── _GHG_inventory_reading.py
├── __init__.py
├── accessors.py
├── _metadata.py
└── _overview.py
├── requirements_dev.txt
├── changelog.md
├── mypy.ini
├── setup.py
├── downscale_test_reference.nc
├── codecov.yml
├── requirements_upstream_dev.txt
├── .github
├── PULL_REQUEST_TEMPLATE.md
├── ISSUE_TEMPLATE
│ ├── feature_request.md
│ └── bug_report.md
└── workflows
│ ├── ci-upstream-dev.yml
│ └── ci.yml
├── .readthedocs.yml
├── licenses
├── xarray_license
└── pint_xarray_license
├── tox.ini
├── .pre-commit-config.yaml
├── .check_python_version.py
├── update_citation_info.py
├── towncrier_github_release_notes_template.md
├── towncrier_github_release_notes.toml
├── CONTRIBUTING.md
├── primap-stubs.patch
├── tbump.toml
├── .gitignore
├── pyproject.toml
├── README.md
├── changelog
└── README.md
├── setup.cfg
└── Makefile
/requirements.txt:
--------------------------------------------------------------------------------
1 | .
2 |
--------------------------------------------------------------------------------
/docs/source/_static/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/source/_templates/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/primap2/tests/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
1 | .[dev]
2 |
--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
1 | docs/source/changelog.md
--------------------------------------------------------------------------------
/docs/source/.gitignore:
--------------------------------------------------------------------------------
1 | *.ipynb
2 |
--------------------------------------------------------------------------------
/primap2/csg/_strategies/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/source/api/.gitignore:
--------------------------------------------------------------------------------
1 | generated
2 | generated_*
3 |
--------------------------------------------------------------------------------
/docs/source/data_reading/.gitignore:
--------------------------------------------------------------------------------
1 | PRIMAPHIST22__19-Jan-2021.csv
2 |
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | warn_unused_configs = True
3 | ignore_missing_imports = True
4 |
--------------------------------------------------------------------------------
/docs/source/jupytext.toml:
--------------------------------------------------------------------------------
1 | formats = "ipynb,myst"
2 | cell_metadata_filter = "-pycharm"
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import setuptools
4 |
5 | setuptools.setup()
6 |
--------------------------------------------------------------------------------
/docs/source/minimal_ds.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/docs/source/minimal_ds.nc
--------------------------------------------------------------------------------
/docs/source/opulent_ds.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/docs/source/opulent_ds.nc
--------------------------------------------------------------------------------
/downscale_test_reference.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/downscale_test_reference.nc
--------------------------------------------------------------------------------
/primap2/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Unit tests for primap2."""
2 |
3 | from .examples import minimal_ds # noqa: F401
4 |
--------------------------------------------------------------------------------
/docs/source/_static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/docs/source/_static/favicon.ico
--------------------------------------------------------------------------------
/primap2/tests/data/long_no_time.csv:
--------------------------------------------------------------------------------
1 | country,category,gas,unit,emissions,other
2 | AUS,IPC1,CO2,Gg,4.1,a
3 | ZAM,IPC2,CH4,Mt,7,d
4 |
--------------------------------------------------------------------------------
/docs/source/_static/favicon-48x48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/docs/source/_static/favicon-48x48.png
--------------------------------------------------------------------------------
/primap2/tests/data/PRIMAP-csg-test.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/primap2/tests/data/PRIMAP-csg-test.nc
--------------------------------------------------------------------------------
/primap2/tests/data/test_csv_data.csv:
--------------------------------------------------------------------------------
1 | country,category,gas,unit,1991,2000,2010
2 | AUS,IPC1,CO2,Gg,4.1,5,6
3 | ZAM,IPC2,CH4,Mt,7,8,9
4 |
--------------------------------------------------------------------------------
/docs/source/_static/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/docs/source/_static/apple-touch-icon.png
--------------------------------------------------------------------------------
/primap2/tests/data/simple_conversion.csv:
--------------------------------------------------------------------------------
1 | # references: test
2 | # last_update: 2024-10-14
3 | A,B,comment
4 | 1,1, no comment
5 | 2+3,2
6 |
--------------------------------------------------------------------------------
/primap2/tests/data/downscale_test_original.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/primap2/tests/data/downscale_test_original.nc
--------------------------------------------------------------------------------
/primap2/tests/data/test_downscale_reference.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/primap2/tests/data/test_downscale_reference.nc
--------------------------------------------------------------------------------
/docs/source/_static/primap_logo_transparent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/docs/source/_static/primap_logo_transparent.png
--------------------------------------------------------------------------------
/docs/source/_static/web-app-manifest-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/docs/source/_static/web-app-manifest-192x192.png
--------------------------------------------------------------------------------
/docs/source/_static/web-app-manifest-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/docs/source/_static/web-app-manifest-512x512.png
--------------------------------------------------------------------------------
/docs/source/_static/primap_logo_transparent_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/docs/source/_static/primap_logo_transparent_dark.png
--------------------------------------------------------------------------------
/primap2/tests/data/primap2_test_data_v2.5.1_final.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/primap2/tests/data/primap2_test_data_v2.5.1_final.nc
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 | status:
3 | project:
4 | default:
5 | informational: true
6 | patch:
7 | default:
8 | informational: true
9 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_create_category_name_conversion.csv:
--------------------------------------------------------------------------------
1 | # references: test
2 | # last_update: 2024-10-14
3 | A,B,comment
4 | 1,1+2, no comment
5 | 2,-3+4
6 | 3,5-1
7 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_csv_data_category_name.csv:
--------------------------------------------------------------------------------
1 | country,category,category_name,gas,unit,1991,2000,2010
2 | AUS,IPC1,Energy,CO2,Gg,4.1,5,6
3 | ZAM,IPC2,IPPU,CH4,Mt,7,8,9
4 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_from_interchange_format_output.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/primap2/tests/data/test_from_interchange_format_output.nc
--------------------------------------------------------------------------------
/primap2/tests/data/Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/primap2/tests/data/Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.nc
--------------------------------------------------------------------------------
/primap2/tests/data/test_csv_data_category_name_fill_cat_code.csv:
--------------------------------------------------------------------------------
1 | country,category,category_name,gas,unit,1991,2000,2010
2 | AUS,XX,Energy,CO2,Gg,4.1,5,6
3 | ZAM,XX,IPPU,CH4,Mt,7,8,9
4 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_sum_skip_allna_inhomogeneous_result.nc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/primap-community/primap2/HEAD/primap2/tests/data/test_sum_skip_allna_inhomogeneous_result.nc
--------------------------------------------------------------------------------
/primap2/tests/data/test_csv_data_unit_harmonization.csv:
--------------------------------------------------------------------------------
1 | country,category,gas,unit,1991,2000,2010
2 | AUS,IPC1,CH4,Gg,4.1,5,6
3 | AUS,IPC2,CH4 (SARGWP100),GgCO2eq,147,168,189
4 | DEU,IPC1,SF6,Gg,4.1,5,6
5 | DEU,IPC2,SF6,t,1000,2000,3000
6 |
--------------------------------------------------------------------------------
/primap2/tests/data/long.csv:
--------------------------------------------------------------------------------
1 | country,category,gas,unit,year,emissions,other
2 | AUS,IPC1,CO2,Gg,1991,4.1,a
3 | AUS,IPC1,CO2,Gg,2000,5,b
4 | AUS,IPC1,CO2,Gg,2010,6,c
5 | ZAM,IPC2,CH4,Mt,1991,7,d
6 | ZAM,IPC2,CH4,Mt,2000,8,e
7 | ZAM,IPC2,CH4,Mt,2010,9,f
8 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 | Sphinx>=4.2,<8.1.3
3 | myst-nb>=1
4 | sphinx-book-theme>=1.1
5 | numpydoc>=1.6
6 | jupytext>=1.16
7 | sphinx-copybutton>=0.5.2
8 | sphinx-autosummary-accessors>=2023.4
9 | sphinx-tippy>=0.4.3
10 | sphinx-favicon>=1.0
11 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_read_wide_csv_file_no_sec_cats.csv:
--------------------------------------------------------------------------------
1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),1991,2000,2010
2 | 0,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,1,4.1,5.0,6.0
3 | 1,TESTcsv2021,HISTORY,ZAM,CH4,Mt CH4 / yr,2,7.0,8.0,9.0
4 |
--------------------------------------------------------------------------------
/docs/source/data_reading/test_csv_data_long.csv:
--------------------------------------------------------------------------------
1 | country,category,gas,unit,year,emissions,other
2 | AUS,IPC1,CO2,Gg,1991,4.1,a
3 | AUS,IPC1,CO2,Gg,2000,5,b
4 | AUS,IPC1,CO2,Gg,2010,6,c
5 | ZAM,IPC2,CH4,Mt,1991,7,d
6 | ZAM,IPC2,CH4,Mt,2000,8,e
7 | ZAM,IPC2,CH4,Mt,2010,9,f
8 |
--------------------------------------------------------------------------------
/primap2/_dim_names.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 | from primap2._types import DatasetOrDataArray
4 |
5 |
6 | def dim_names(obj: DatasetOrDataArray) -> tuple[typing.Hashable]:
7 | """Extract the names of dimensions compatible with all xarray versions."""
8 | return obj.sizes.keys()
9 |
--------------------------------------------------------------------------------
/primap2/csg/_strategies/local_least_squares.py:
--------------------------------------------------------------------------------
1 | # TODO
2 | # local matching with least squares instead of lineaer trends
3 | # optimization: adaptive area LS matching where areas with high overlap and a good fit
4 | # are identified and the matching is done using these areas ignoring other areas
5 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_read_wide_csv_file_no_sec_cats_cat_name.csv:
--------------------------------------------------------------------------------
1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),category_name,1991,2000,2010
2 | 0,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,1,Energy,4.1,5.0,6.0
3 | 1,TESTcsv2021,HISTORY,ZAM,CH4,Mt CH4 / yr,2,IPPU,7.0,8.0,9.0
4 |
--------------------------------------------------------------------------------
/docs/source/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.ruff]
2 | # Extend the general settings
3 | extend = "../../pyproject.toml"
4 |
5 | [tool.ruff.lint]
6 | ignore = [
7 | "B018", # Useless expression - common and fine in notebooks.
8 | "E402", # Module level import not at top of file - fine in notebooks
9 | ]
10 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_csv_data_category_name_long.csv:
--------------------------------------------------------------------------------
1 | country,category,category_name,gas,unit,year,emissions
2 | AUS,IPC1,Energy,CO2,Gg,1991,4.1
3 | ZAM,IPC2,IPPU,CH4,Mt,1991,7
4 | AUS,IPC1,Energy,CO2,Gg,2000,5
5 | ZAM,IPC2,IPPU,CH4,Mt,2000,8
6 | AUS,IPC1,Energy,CO2,Gg,2010,6
7 | ZAM,IPC2,IPPU,CH4,Mt,2010,9
8 |
--------------------------------------------------------------------------------
/docs/source/credits.md:
--------------------------------------------------------------------------------
1 | # Credits
2 |
3 | ## Developers
4 |
5 | * [Johannes Gütschow](https://orcid.org/0000-0001-9944-3685)
6 | * [Mika Pflüger](https://orcid.org/0000-0002-7814-8916)
7 |
8 | ## Former Developers
9 |
10 | Many thanks to previous developers:
11 | * [Annika Günther](https://www.pik-potsdam.de/members/annikag)
12 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_empty_ds_if.yaml:
--------------------------------------------------------------------------------
1 | attrs:
2 | area: area (ISO3)
3 | comment: This needs to be sorted alphabetically.
4 | contact: Someone
5 | title: Test Dataset
6 | data_file: test_empty_ds_if.csv
7 | dimensions:
8 | '*':
9 | - area (ISO3)
10 | - entity
11 | - source
12 | - time
13 | - unit
14 | time_format: '%Y'
15 |
--------------------------------------------------------------------------------
/primap2/csg/_strategies/exceptions.py:
--------------------------------------------------------------------------------
1 | class StrategyUnableToProcess(Exception):
2 | """The filling strategy is unable to process the given timeseries, possibly due
3 | to missing data.
4 | """
5 |
6 | def __init__(self, reason: str):
7 | """Specify the reason why the filling strategy is unable to process the data."""
8 | self.reason = reason
9 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_read_wide_csv_file_output_unit_harm.csv:
--------------------------------------------------------------------------------
1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),1991,2000,2010
2 | 0,TESTcsv2021,HISTORY,AUS,CH4,Gg CH4 / yr,1,4.1,5.0,6.0
3 | 1,TESTcsv2021,HISTORY,AUS,CH4,Gg CH4 / yr,2,7,8,9
4 | 2,TESTcsv2021,HISTORY,DEU,SF6,Gg SF6 / yr,1,4.1,5.0,6.0
5 | 3,TESTcsv2021,HISTORY,DEU,SF6,Gg SF6 / yr,2,1,2,3
6 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_read_wide_csv_file_output_unit_def.csv:
--------------------------------------------------------------------------------
1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),Class (class),Type (type),1991,2000,2010
2 | 0,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,1,TOTAL,fugitive,4.0,5.0,6.0
3 | 1,TESTcsv2021,HISTORY,ZAM,CH4,Gg CH4 / yr,2,TOTAL,fugitive,7.0,8.0,9.0
4 | 2,TESTcsv2021,HISTORY,ZAM,CO2,Gg CO2 / yr,2,TOTAL,fugitive,12.0,13.0,14.0
5 |
--------------------------------------------------------------------------------
/docs/source/data_reading/test_csv_data_sec_cat_if.yaml:
--------------------------------------------------------------------------------
1 | attrs:
2 | area: area (ISO3)
3 | cat: category (IPCC2006)
4 | scen: scenario (general)
5 | data_file: test_csv_data_sec_cat_if.csv
6 | dimensions:
7 | '*':
8 | - Class (class)
9 | - Type (type)
10 | - area (ISO3)
11 | - category (IPCC2006)
12 | - entity
13 | - scenario (general)
14 | - source
15 | - unit
16 | time_format: '%Y'
17 |
--------------------------------------------------------------------------------
/primap2/_accessor_base.py:
--------------------------------------------------------------------------------
1 | """Base classes for accessor mix-in classes."""
2 |
3 | import typing
4 |
5 | import xarray as xr
6 |
7 | XrObj = typing.TypeVar("XrObj", xr.Dataset, xr.Dataset)
8 |
9 |
10 | class BaseDataArrayAccessor:
11 | def __init__(self, da: xr.DataArray):
12 | self._da = da
13 |
14 |
15 | class BaseDatasetAccessor:
16 | def __init__(self, ds: xr.Dataset):
17 | self._ds = ds
18 |
--------------------------------------------------------------------------------
/requirements_upstream_dev.txt:
--------------------------------------------------------------------------------
1 | # install development versions of central upstream libraries
2 | pint@git+https://github.com/hgrecco/pint
3 | pint_xarray@git+https://github.com/xarray-contrib/pint-xarray
4 | xarray@git+https://github.com/pydata/xarray
5 | pandas@git+https://github.com/pandas-dev/pandas
6 | openscm_units@git+https://github.com/openscm/openscm-units
7 | # numpy dev currently not supported
8 | #numpy@git+https://github.com/numpy/numpy
9 |
--------------------------------------------------------------------------------
/docs/source/data_format/index.md:
--------------------------------------------------------------------------------
1 | # Data Format
2 |
3 | In this section, we will dive deeper into the specifics of the primap2 data format,
4 | and we will also show the interchange format, an additional format for ingesting and
5 | exporting data.
6 |
7 |
8 | ```{toctree}
9 | :caption: primap2 data formats
10 | :maxdepth: 2
11 |
12 | data_format_examples
13 | data_format_details
14 | interchange_format_examples
15 | interchange_format_details
16 | ```
17 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | # Pull request
2 |
3 | Please confirm that this pull request has done the following:
4 |
5 | - [ ] Tests added
6 | - [ ] Documentation added (where applicable)
7 | - [ ] Description in a `{pr}.thing.md` file in the directory `changelog` added - see [changelog/README.md](https://github.com/pik-primap/primap2/blob/main/changelog/README.md) for details
8 |
9 | ## Description
10 |
11 | Please provide a short description what your pull request does.
12 |
--------------------------------------------------------------------------------
/primap2/_types.py:
--------------------------------------------------------------------------------
1 | import typing
2 |
3 | import xarray as xr
4 |
5 | DatasetOrDataArray = typing.TypeVar("DatasetOrDataArray", xr.Dataset, xr.DataArray)
6 | KeyT = typing.TypeVar("KeyT", str, typing.Mapping[typing.Hashable, typing.Any])
7 | DimOrDimsT = typing.TypeVar(
8 | "DimOrDimsT",
9 | str,
10 | typing.Hashable,
11 | typing.Iterable[str],
12 | typing.Iterable[typing.Hashable],
13 | )
14 | FunctionT = typing.TypeVar("FunctionT", bound=typing.Callable[..., typing.Any])
15 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_read_wide_csv_file_output.csv:
--------------------------------------------------------------------------------
1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),Class (class),Type (type),1991,2000,2010
2 | 0,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,1,TOTAL,fugitive,4000.0,5000.0,6000.0
3 | 1,TESTcsv2021,HISTORY,AUS,KYOTOGHG (SARGWP100),Mt CO2 / yr,0,TOTAL,fugitive,8.0,9.0,10.0
4 | 2,TESTcsv2021,HISTORY,ZAM,CO2,Gg CO2 / yr,2,TOTAL,fugitive,12.0,13.0,14.0
5 | 3,TESTcsv2021,HISTORY,ZAM,KYOTOGHG (SARGWP100),Mt CO2 / yr,0,TOTAL,fugitive,0.03,0.02,0.04
6 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | # Required
5 | version: 2
6 |
7 | # Set the version of Python and other tools you might need
8 | build:
9 | os: ubuntu-22.04
10 | tools:
11 | python: "3.12"
12 |
13 | # Build documentation in the docs/source/ directory with Sphinx
14 | sphinx:
15 | configuration: docs/source/conf.py
16 |
17 | python:
18 | install:
19 | - requirements: docs/requirements.txt
20 |
--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../README.md
2 | ```
3 | ## Documentation
4 |
5 | We have divided the documentation into chapters, most of the focussing on using the
6 | library.
7 | We also have a chapter for developers working on the library itself.
8 |
9 | ```{toctree}
10 | :caption: Contents
11 | :maxdepth: 2
12 |
13 | installation
14 | usage/index
15 | data_format/index
16 | data_reading/index
17 | datalad
18 | development
19 | credits
20 | changelog
21 | api/index
22 | ideas_for_sparse_data
23 | ```
24 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_csv_data_sec_cat.csv:
--------------------------------------------------------------------------------
1 | country,category,classification,gas,unit,1991,2000,2010
2 | AUS,IPC1,TOTAL,CO2,Mt,4,5,6
3 | AUS,IPC0,TOTAL,KYOTOGHG,MtCO2eq,8,9,10
4 | ZAM,IPC0,TOTAL,KYOTOGHG,GgCO2eq,30,20,40
5 | ZAM,IPC2,TOTAL,CH4,Gg,7,8,9
6 | ZAM,IPC2,TOTAL,CO2,Gg,12,13,14
7 | USA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,300,200,400
8 | USA,IPC2,TOTAL,CH4,Gg,70,80,90
9 | USA,IPC3,TOTAL,CO2,Gg,120,130,140
10 | FRA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,30,20,40
11 | FRA,IPC2,TOTAL,CH4,Gg,7,8,9
12 | FRA,IPC2,TOTAL,CO2,Gg,12,13,14
13 |
--------------------------------------------------------------------------------
/docs/source/data_reading/test_csv_data_sec_cat.csv:
--------------------------------------------------------------------------------
1 | country,category,classification,gas,unit,1991,2000,2010
2 | AUS,IPC1,TOTAL,CO2,Mt,4,5,6
3 | AUS,IPC0,TOTAL,KYOTOGHG,MtCO2eq,8,9,10
4 | ZAM,IPC0,TOTAL,KYOTOGHG,GgCO2eq,30,20,40
5 | ZAM,IPC2,TOTAL,CH4,Gg,7,8,9
6 | ZAM,IPC2,TOTAL,CO2,Gg,12,13,14
7 | USA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,300,200,400
8 | USA,IPC2,TOTAL,CH4,Gg,70,80,90
9 | USA,IPC3,TOTAL,CO2,Gg,120,130,140
10 | FRA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,30,20,40
11 | FRA,IPC2,TOTAL,CH4,Gg,7,8,9
12 | FRA,IPC2,TOTAL,CO2,Gg,12,13,14
13 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_csv_data_sec_cat_strings.csv:
--------------------------------------------------------------------------------
1 | country,category,classification,gas,unit,1991,2000,2010
2 | AUS,IPC1,TOTAL,CO2,Mt,4,5,6
3 | AUS,IPC0,TOTAL,KYOTOGHG,MtCO2eq,8,9,10
4 | ZAM,IPC0,TOTAL,KYOTOGHG,GgCO2eq,30,20,40
5 | ZAM,IPC2,TOTAL,CH4,Gg,7,8,9
6 | ZAM,IPC2,TOTAL,CO2,Gg,12,13,14
7 | USA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,300,200,100
8 | USA,IPC2,TOTAL,CH4,Gg,70,N/A,90
9 | USA,IPC3,TOTAL,CO2,Gg,120,TEST,140
10 | FRA,IPC0,TOTAL,KYOTOGHG,GgCO2eq,IE,20,40
11 | FRA,IPC2,TOTAL,CH4,Gg,7, ,9
12 | FRA,IPC2,TOTAL,CO2,Gg,12,13,14
13 |
--------------------------------------------------------------------------------
/docs/source/usage/index.md:
--------------------------------------------------------------------------------
1 | # Usage
2 | Because PRIMAP2 builds on xarray, all xarray functionality is available
3 | right away.
4 | Additional functionality is provided in the `primap2` package and
5 | in the `pr` namespace on xarray objects.
6 | In this section, we will show the fundamentals of how to work with primap2 data.
7 |
8 |
9 | ```{toctree}
10 | :caption: Usage documentation
11 | :maxdepth: 2
12 |
13 | select_and_view
14 | store_and_load
15 | add_and_overwrite
16 | logging
17 | merge
18 | skipna
19 | downscaling
20 | gas_baskets
21 | csg
22 | ```
23 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_read_wide_csv_file_output_entity_def.csv:
--------------------------------------------------------------------------------
1 | ,source,scenario (general),area (ISO3),entity,unit,category (IPCC2006),Class (class),Type (type),1991,2000,2010
2 | 0,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,0,TOTAL,fugitive,8000.0,9000.0,10000.0
3 | 1,TESTcsv2021,HISTORY,AUS,CO2,Gg CO2 / yr,1,TOTAL,fugitive,4000.0,5000.0,6000.0
4 | 2,TESTcsv2021,HISTORY,ZAM,CO2,Gg CO2 / yr,0,TOTAL,fugitive,30.0,20.0,40.0
5 | 3,TESTcsv2021,HISTORY,ZAM,CO2,Gg CO2 / yr,2,TOTAL,fugitive,7.0,8.0,9.0
6 | 4,TESTcsv2021,HISTORY,ZAM,CO2,Gg CO2 / yr,2,TOTAL,fugitive,12.0,13.0,14.0
7 |
--------------------------------------------------------------------------------
/docs/source/_static/site.webmanifest:
--------------------------------------------------------------------------------
1 | {
2 | "name": "primap2 documentation",
3 | "short_name": "primap2",
4 | "icons": [
5 | {
6 | "src": "/web-app-manifest-192x192.png",
7 | "sizes": "192x192",
8 | "type": "image/png",
9 | "purpose": "maskable"
10 | },
11 | {
12 | "src": "/web-app-manifest-512x512.png",
13 | "sizes": "512x512",
14 | "type": "image/png",
15 | "purpose": "maskable"
16 | }
17 | ],
18 | "theme_color": "#ffffff",
19 | "background_color": "#ffffff",
20 | "display": "standalone"
21 | }
22 |
--------------------------------------------------------------------------------
/licenses/xarray_license:
--------------------------------------------------------------------------------
1 | Copyright 2014-2019, xarray Developers
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 |
5 | https://www.apache.org/licenses/LICENSE-2.0
6 |
7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
8 |
--------------------------------------------------------------------------------
/licenses/pint_xarray_license:
--------------------------------------------------------------------------------
1 | Copyright 2020, pint-xarray developers
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 |
5 | https://www.apache.org/licenses/LICENSE-2.0
6 |
7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
8 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | # tox (https://tox.readthedocs.io/) is a tool for running tests
2 | # in multiple virtualenvs. This configuration file will run the
3 | # test suite on all supported python versions. To use it, "pip install tox"
4 | # and then run "tox" from this directory.
5 |
6 | [tox]
7 | envlist = py{311,312}-{highest,lowest-direct}
8 |
9 | [testenv:py{311,312}-highest]
10 | deps =
11 | pytest
12 | xdoctest
13 | commands =
14 | uv pip install -e .
15 | pytest --xdoc -rx
16 |
17 | [testenv:py{311,312}-lowest-direct]
18 | deps =
19 | pytest
20 | xdoctest
21 | commands =
22 | uv pip install --resolution lowest-direct -e .
23 | pytest --xdoc -rx
24 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature Request
3 | about: Suggest an idea for this project
4 |
5 | ---
6 |
7 | **Is your feature request related to a problem? Please describe.**
8 |
9 | A clear and concise description of what the problem is. Ex. It's annoying that I always have to [...]
10 |
11 | **Describe the solution you'd like**
12 |
13 | A description of the solution you would like to see.
14 |
15 | **Describe alternatives you've considered**
16 |
17 | A description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 |
21 | Add any other context or screenshots about the feature request here.
22 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | repos:
4 | - repo: https://github.com/pre-commit/pre-commit-hooks
5 | rev: 'v6.0.0'
6 | hooks:
7 | - id: trailing-whitespace
8 | - id: end-of-file-fixer
9 | - id: check-yaml
10 | - id: check-added-large-files
11 | - id: check-ast
12 | - id: fix-byte-order-marker
13 | - id: check-case-conflict
14 | - id: check-merge-conflict
15 | - id: detect-private-key
16 | - id: mixed-line-ending
17 | - repo: https://github.com/astral-sh/ruff-pre-commit
18 | rev: 'v0.14.9'
19 | hooks:
20 | - id: ruff
21 | args: [ --fix, --exit-non-zero-on-fix ]
22 | - id: ruff-format
23 |
--------------------------------------------------------------------------------
/primap2/tests/data/BURDI_conversion.csv:
--------------------------------------------------------------------------------
1 | # references: non_annex1_data repo
2 | # last_update: 2024-10-14
3 | BURDI,IPCC2006_PRIMAP,comment
4 | 1,1
5 | 1.A,1.A
6 | 1.A.1,1.A.1
7 | 1.A.2,1.A.2
8 | 1.A.3,1.A.3
9 | 1.A.4,1.A.4
10 | 1.A.5,1.A.5
11 | 1.B,1.B
12 | 1.B.1,1.B.1
13 | 1.B.2,1.B.2
14 | 2 + 3,2
15 | 2.A,2.A
16 | 2.B + 2.E,2.B
17 | 2.C,2.C
18 | 2.F,2.F
19 | 2.G + 2.D, 2.H
20 | 2.G, 2.H.3
21 | 3,2.D
22 | 4,M.AG
23 | 4.A,3.A.1
24 | 4.B,3.A.2
25 | 4.C,3.C.7
26 | 4.D, M.3.C.45.AG
27 | 4.D + 4.C + 4.E + 4.F + 4.G,3.C
28 | 4.E,3.C.1.c
29 | 4.F,3.C.1.b
30 | 4.G,3.C.8
31 | 5,M.LULUCF
32 | 4+5,3
33 | 6,4
34 | 6.A,4.A
35 | 6.B,4.D
36 | 6.C,4.C
37 | 6.D,4.E
38 | 24540,0
39 | 15163,M.0.EL
40 | 14637,M.BK
41 | 14424,M.BK.A
42 | 14423,M.BK.M,
43 | 14638, M.BIO
44 | 7,5, 5.A-D ignored as not fitting 2006 cats
45 |
--------------------------------------------------------------------------------
/.check_python_version.py:
--------------------------------------------------------------------------------
1 | """Check if the used version of Python is good enough for us."""
2 |
3 | import itertools
4 | import sys
5 |
6 | SUPPORTED_MAJOR_VERSIONS = (3,)
7 | SUPPORTED_MINOR_VERSIONS = (10, 11, 12)
8 |
9 | if (
10 | sys.version_info.major not in SUPPORTED_MAJOR_VERSIONS
11 | or sys.version_info.minor not in SUPPORTED_MINOR_VERSIONS
12 | ):
13 | supported_versions = itertools.product(SUPPORTED_MAJOR_VERSIONS, SUPPORTED_MINOR_VERSIONS)
14 | supported_versions_human_readable = ", ".join(
15 | ".".join(str(x) for x in version) for version in supported_versions
16 | )
17 | print(
18 | f"Python version {sys.version_info} not supported, please install Python"
19 | f" in one of the supported versions: {supported_versions_human_readable}."
20 | )
21 | sys.exit(1)
22 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug Report
3 | about: Write a report to help us improve
4 |
5 | ---
6 |
7 | **Describe the bug**
8 |
9 | A clear and concise description of what the bug is.
10 |
11 | **Failing Test**
12 |
13 | Please put the code (ideally in the form of a unit
14 | test) which fails below
15 |
16 | **Expected behavior**
17 |
18 | A clear and concise description of what you expected to happen.
19 |
20 | **Screenshots**
21 |
22 | If applicable, add screenshots to help explain your problem.
23 |
24 | **System (please complete the following information):**
25 |
26 | - OS: [e.g. Windows, Linux, macOS]
27 | - Python version [e.g. Python 3.5] and output of `conda list --export` and `pip freeze` as applicable
28 |
29 | **Additional context**
30 |
31 | Add any other context about the problem here.
32 |
--------------------------------------------------------------------------------
/primap2/tests/data/Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.yaml:
--------------------------------------------------------------------------------
1 | attrs:
2 | references: 'doi: 10.5281/zenodo.4723476'
3 | rights: CC-BY 4.0
4 | contact: johannes.guetschow@pik-potsdam.de
5 | title: 'PRIMAP-crf: UNFCCC CRF data in IPCC categories (PRIMAP-crf-2021-v1)'
6 | comment: 'The dataset is described by the article: Jeffery et al., PRIMAP-crf: UNFCCC
7 | CRF data in IPCC 2006 categories, ESSD 10(3), doi: 10.5194/essd-10-1427-2018'
8 | institution: Potsdam Institute for Climate Impact Research
9 | area: area (ISO3)
10 | scen: scenario (PRIMAP)
11 | cat: category (IPCC1996)
12 | time_format: '%Y'
13 | dimensions:
14 | '*':
15 | - source
16 | - scenario (PRIMAP)
17 | - provenance
18 | - area (ISO3)
19 | - entity
20 | - unit
21 | - category (IPCC1996)
22 | data_file: Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.csv
23 |
--------------------------------------------------------------------------------
/primap2/pm2io/__init__.py:
--------------------------------------------------------------------------------
1 | """Data reading module of the PRIMAP2 climate policy analysis package."""
2 |
3 | from ._data_reading import (
4 | convert_long_dataframe_if,
5 | convert_wide_dataframe_if,
6 | read_long_csv_file_if,
7 | read_wide_csv_file_if,
8 | )
9 | from ._GHG_inventory_reading import nir_add_unit_information, nir_convert_df_to_long
10 | from ._interchange_format import (
11 | from_interchange_format,
12 | read_interchange_format,
13 | write_interchange_format,
14 | )
15 |
16 | __all__ = [
17 | "convert_long_dataframe_if",
18 | "convert_wide_dataframe_if",
19 | "from_interchange_format",
20 | "nir_add_unit_information",
21 | "nir_convert_df_to_long",
22 | "read_interchange_format",
23 | "read_long_csv_file_if",
24 | "read_wide_csv_file_if",
25 | "write_interchange_format",
26 | ]
27 |
--------------------------------------------------------------------------------
/primap2/__init__.py:
--------------------------------------------------------------------------------
1 | """The PRIMAP2 climate policy analysis package."""
2 |
3 | __author__ = """Mika Pflüger and Johannes Gütschow"""
4 | __email__ = "mika.pflueger@climate-resource.com"
5 | __version__ = "0.13.0"
6 |
7 | import sys
8 |
9 | from loguru import logger
10 |
11 | from . import accessors, pm2io
12 | from ._data_format import (
13 | ProcessingStepDescription,
14 | TimeseriesProcessingDescription,
15 | open_dataset,
16 | )
17 | from ._selection import Not
18 | from ._units import ureg
19 |
20 | logger.remove()
21 | logger.add(
22 | sys.stderr,
23 | format="{time} {level} {message}",
24 | level="INFO",
25 | colorize=True,
26 | )
27 |
28 | __all__ = [
29 | "Not",
30 | "ProcessingStepDescription",
31 | "TimeseriesProcessingDescription",
32 | "accessors",
33 | "open_dataset",
34 | "pm2io",
35 | "ureg",
36 | ]
37 |
--------------------------------------------------------------------------------
/primap2/tests/data/simple_categorisation_a.yaml:
--------------------------------------------------------------------------------
1 | name: A
2 | title: Simple Categorization
3 | comment: A simple example categorization without relationships between categories
4 | references: doi:00000/00000
5 | institution: PIK
6 | last_update: 2021-02-23
7 | hierarchical: no
8 | version: 1
9 | categories:
10 | 1:
11 | title: Category 1
12 | comment: The first category
13 | alternative_codes:
14 | - A
15 | - CatA
16 | info:
17 | important_data:
18 | - A
19 | - B
20 | - C
21 | other_important_thing: ABC
22 | 2:
23 | title: Category 2
24 | comment: The second category
25 | alternative_codes:
26 | - B
27 | - CatB
28 | 3:
29 | title: Category 3
30 | comment: The third category
31 | alternative_codes:
32 | - C
33 | - CatC
34 | unnumbered:
35 | title: The unnumbered category
36 |
--------------------------------------------------------------------------------
/primap2/tests/data/simple_categorisation_b.yaml:
--------------------------------------------------------------------------------
1 | name: B
2 | title: Simple Categorization
3 | comment: A simple example categorization without relationships between categories
4 | references: doi:00000/00000
5 | institution: PIK
6 | last_update: 2021-02-23
7 | hierarchical: no
8 | version: 1
9 | categories:
10 | 1:
11 | title: Category 1
12 | comment: The first category
13 | alternative_codes:
14 | - A
15 | - CatA
16 | info:
17 | important_data:
18 | - A
19 | - B
20 | - C
21 | other_important_thing: ABC
22 | 2:
23 | title: Category 2
24 | comment: The second category
25 | alternative_codes:
26 | - B
27 | - CatB
28 | 3:
29 | title: Category 3
30 | comment: The third category
31 | 4:
32 | title: Category 4
33 | comment: The fourth category
34 | 5:
35 | title: Category 5
36 | comment: The fifth category
37 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | echo.
16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | echo.installed, then set the SPHINXBUILD environment variable to point
18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | echo.may add the Sphinx directory to PATH.
20 | echo.
21 | echo.If you don't have Sphinx installed, grab it from
22 | echo.https://www.sphinx-doc.org/
23 | exit /b 1
24 | )
25 |
26 | if "%1" == "" goto help
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/source/installation.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 | ## Releases
4 |
5 | To install PRIMAP2, run this command in your terminal:
6 |
7 | ```shell
8 | $ pip install primap2
9 | ```
10 |
11 | This is the preferred method to install PRIMAP2, as it will always install the
12 | most recent release.
13 |
14 | If you don't have [pip] installed, this [Python installation guide] can guide
15 | you through the process.
16 |
17 | ## From sources
18 |
19 | The sources for PRIMAP2 can be downloaded from the
20 | [Github repo](https://github.com/primap-community/primap2).
21 |
22 | Simply clone the public repository using git:
23 |
24 | ```shell
25 | $ git clone git://github.com/primap-community/primap2
26 | ```
27 |
28 | Once you have a copy of the source, you can install it with:
29 |
30 | ```shell
31 | $ python setup.py install
32 | ```
33 |
34 | [pip]: https://pip.pypa.io
35 | [python installation guide]: http://docs.python-guide.org/en/latest/starting/installation/
36 |
--------------------------------------------------------------------------------
/primap2/csg/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Composite Source Generator
3 |
4 | Generate a composite harmonized dataset from multiple sources according to defined
5 | source priorities and matching algorithms.
6 | """
7 |
8 | from ._compose import compose
9 | from ._models import (
10 | PriorityDefinition,
11 | StrategyDefinition,
12 | )
13 | from ._strategies.exceptions import StrategyUnableToProcess
14 | from ._strategies.gaps import FitParameters
15 | from ._strategies.global_least_squares import GlobalLSStrategy
16 | from ._strategies.local_trends import LocalTrendsStrategy
17 | from ._strategies.substitution import SubstitutionStrategy
18 | from ._wrapper import create_composite_source
19 |
20 | __all__ = [
21 | "FitParameters",
22 | "GlobalLSStrategy",
23 | "LocalTrendsStrategy",
24 | "PriorityDefinition",
25 | "StrategyDefinition",
26 | "StrategyUnableToProcess",
27 | "SubstitutionStrategy",
28 | "compose",
29 | "create_composite_source",
30 | ]
31 |
--------------------------------------------------------------------------------
/primap2/tests/csg/utils.py:
--------------------------------------------------------------------------------
1 | from collections.abc import Sequence
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import xarray as xr
6 |
7 |
8 | def get_single_ts(
9 | *,
10 | time: pd.DatetimeIndex | None = None,
11 | data: np.ndarray | None = None,
12 | dims: Sequence[str] | None = None,
13 | coords: dict[str, str | Sequence[str]] | None = None,
14 | entity: str = "CH4",
15 | gwp_context: str | None = None,
16 | ) -> xr.DataArray:
17 | if time is None:
18 | time = pd.date_range("1850-01-01", "2022-01-01", freq="YS")
19 | if dims is None:
20 | dims = []
21 | if data is None:
22 | data = np.linspace(0.0, 1.0, len(time))
23 | if coords is None:
24 | coords = {}
25 | if gwp_context is None:
26 | name = entity
27 | attrs = {"entity": entity}
28 | else:
29 | name = f"{entity} ({gwp_context})"
30 | attrs = {"entity": entity, "gwp_context": gwp_context}
31 | return xr.DataArray(
32 | data,
33 | dims=["time", *dims],
34 | coords={"time": time, **coords},
35 | name=name,
36 | attrs=attrs,
37 | )
38 |
--------------------------------------------------------------------------------
/update_citation_info.py:
--------------------------------------------------------------------------------
1 | import requests
2 |
3 | resp = requests.get("https://zenodo.org/api/records/4535902").json()
4 |
5 | new_link = resp["links"]["doi"]
6 | new_doi = resp["metadata"]["doi"]
7 | new_date = resp["metadata"]["publication_date"]
8 | new_title = resp["metadata"]["title"]
9 |
10 | citation = f"""## Citation
11 |
12 | If you use this library and want to cite it, please cite it as:
13 |
14 | Mika Pflüger and Johannes Gütschow. ({new_date}).
15 | {new_title}.
16 | Zenodo. {new_link}
17 | """
18 |
19 | with open("README.md") as fd:
20 | old_content = fd.read().splitlines(keepends=True)
21 |
22 | with open("README.md", "w") as fd:
23 | skip_to_next_section = False
24 | i = 0
25 | while True:
26 | try:
27 | line = old_content[i]
28 | except IndexError:
29 | break
30 | if line == "## Citation\n":
31 | fd.write(citation)
32 | skip_to_next_section = True
33 | elif skip_to_next_section:
34 | if line.startswith("#"):
35 | fd.write("\n")
36 | fd.write(line)
37 | skip_to_next_section = False
38 | else:
39 | fd.write(line)
40 | i += 1
41 |
42 | fd.truncate()
43 |
--------------------------------------------------------------------------------
/towncrier_github_release_notes_template.md:
--------------------------------------------------------------------------------
1 |
2 | primap2 is a library for compiling and analyzing climate policy datasets.
3 |
4 | ## Changes
5 | {% for section, _ in sections.items() %}
6 |
7 | {% if sections[section] %}
8 | {% for category, val in definitions.items() if category in sections[section] %}
9 | ### {{ definitions[category]['name'] }}
10 |
11 | {% for text, values in sections[section][category].items() %}
12 | - {{ text }}
13 | {%- if values %}
14 | {% if "\n - " in text or '\n * ' in text %}
15 |
16 |
17 | (
18 | {%- else %}
19 | {% if text %} ({% endif %}
20 | {%- endif -%}
21 | {%- for issue in values %}
22 | {{ issue.split(": ", 1)[0] }}{% if not loop.last %}, {% endif %}
23 | {%- endfor %}
24 | {% if text %}){% endif %}
25 |
26 | {% else %}
27 |
28 | {% endif %}
29 | {% endfor %}
30 |
31 | {% if issues_by_category[section][category] and "]: " in issues_by_category[section][category][0] %}
32 | {% for issue in issues_by_category[section][category] %}
33 | {{ issue }}
34 | {% endfor %}
35 |
36 | {% endif %}
37 | {% if sections[section][category]|length == 0 %}
38 | No significant changes.
39 |
40 | {% else %}
41 | {% endif %}
42 | {% endfor %}
43 | {% else %}
44 | No significant changes.
45 |
46 | {% endif %}
47 | {% endfor +%}
48 |
--------------------------------------------------------------------------------
/towncrier_github_release_notes.toml:
--------------------------------------------------------------------------------
1 | [tool.towncrier]
2 | package = "primap2"
3 | package_dir = "primap2"
4 | filename = ".changelog_latest_version.md"
5 | template = "towncrier_github_release_notes_template.md"
6 | directory = "changelog/"
7 | title_format = "# primap2 {version} ({project_date})"
8 | underlines = ["", "", ""]
9 | issue_format = "[#{issue}](https://github.com/primap-community/primap2/pull/{issue})"
10 |
11 | [[tool.towncrier.type]]
12 | directory = "breaking"
13 | name = "Breaking Changes"
14 | showcontent = true
15 |
16 | [[tool.towncrier.type]]
17 | directory = "deprecation"
18 | name = "Deprecations"
19 | showcontent = true
20 |
21 | [[tool.towncrier.type]]
22 | directory = "feature"
23 | name = "Features"
24 | showcontent = true
25 |
26 | [[tool.towncrier.type]]
27 | directory = "improvement"
28 | name = "Improvements"
29 | showcontent = true
30 |
31 | [[tool.towncrier.type]]
32 | directory = "fix"
33 | name = "Bug Fixes"
34 | showcontent = true
35 |
36 | [[tool.towncrier.type]]
37 | directory = "docs"
38 | name = "Improved Documentation"
39 | showcontent = true
40 |
41 | [[tool.towncrier.type]]
42 | directory = "trivial"
43 | name = "Trivial/Internal Changes"
44 | showcontent = false
45 |
--------------------------------------------------------------------------------
/docs/source/data_reading/test_csv_data_sec_cat_if.csv:
--------------------------------------------------------------------------------
1 | "source","scenario (general)","area (ISO3)","entity","unit","category (IPCC2006)","Class (class)","Type (type)","1991","2000","2010"
2 | "TESTcsv2021","HISTORY","AUS","CO2","Gg CO2 / yr","1","TOTAL","fugitive",4000.0000000000005,5000.000000000001,6000.000000000001
3 | "TESTcsv2021","HISTORY","AUS","KYOTOGHG (SARGWP100)","Mt CO2 / yr","0","TOTAL","fugitive",8.0,9.0,10.0
4 | "TESTcsv2021","HISTORY","FRA","CH4","Gg CH4 / yr","2","TOTAL","fugitive",7.0,8.0,9.0
5 | "TESTcsv2021","HISTORY","FRA","CO2","Gg CO2 / yr","2","TOTAL","fugitive",12.0,13.0,14.0
6 | "TESTcsv2021","HISTORY","FRA","KYOTOGHG (SARGWP100)","Mt CO2 / yr","0","TOTAL","fugitive",0.03,0.02,0.04
7 | "TESTcsv2021","HISTORY","USA","CH4","Gg CH4 / yr","2","TOTAL","fugitive",70.0,80.0,90.0
8 | "TESTcsv2021","HISTORY","USA","CO2","Gg CO2 / yr","3","TOTAL","fugitive",120.0,130.0,140.0
9 | "TESTcsv2021","HISTORY","USA","KYOTOGHG (SARGWP100)","Mt CO2 / yr","0","TOTAL","fugitive",0.3,0.2,0.4
10 | "TESTcsv2021","HISTORY","ZAM","CH4","Gg CH4 / yr","2","TOTAL","fugitive",7.0,8.0,9.0
11 | "TESTcsv2021","HISTORY","ZAM","CO2","Gg CO2 / yr","2","TOTAL","fugitive",12.0,13.0,14.0
12 | "TESTcsv2021","HISTORY","ZAM","KYOTOGHG (SARGWP100)","Mt CO2 / yr","0","TOTAL","fugitive",0.03,0.02,0.04
13 |
--------------------------------------------------------------------------------
/primap2/tests/test_metadata.py:
--------------------------------------------------------------------------------
1 | """Tests for _metadata.py"""
2 |
3 | import datetime
4 |
5 |
6 | def test_metadata_properties(opulent_ds):
7 | ds = opulent_ds
8 | assert ds.pr.references == "doi:10.1012"
9 | assert ds.pr.rights == "Use however you want."
10 | assert ds.pr.contact == "lol_no_one_will_answer@example.com"
11 | assert ds.pr.title == "Completely invented GHG inventory data"
12 | assert ds.pr.comment == "GHG inventory data ..."
13 | assert ds.pr.institution == "PIK"
14 | assert ds.pr.entity_terminology == "primap2"
15 | assert ds.pr.publication_date == datetime.date(2099, 12, 31)
16 |
17 | ds.pr.references = "references"
18 | assert ds.pr.references == "references"
19 | ds.pr.rights = "rights"
20 | assert ds.pr.rights == "rights"
21 | ds.pr.contact = "contact"
22 | assert ds.pr.contact == "contact"
23 | ds.pr.title = "title"
24 | assert ds.pr.title == "title"
25 | ds.pr.comment = "comment"
26 | assert ds.pr.comment == "comment"
27 | ds.pr.institution = "institution"
28 | assert ds.pr.institution == "institution"
29 | ds.pr.entity_terminology = "entity_terminology"
30 | assert ds.pr.entity_terminology == "entity_terminology"
31 | today = datetime.date.today()
32 | ds.pr.publication_date = today
33 | assert ds.pr.publication_date == today
34 |
--------------------------------------------------------------------------------
/.github/workflows/ci-upstream-dev.yml:
--------------------------------------------------------------------------------
1 | name: CI upstream development versions
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches: [main]
7 | workflow_dispatch: # allows you to trigger the workflow run manually
8 | schedule:
9 | - cron: "5 2 * * tue" # Tuesdays at 02:05 UTC.
10 |
11 | jobs:
12 | test:
13 | runs-on: ${{ matrix.os }}
14 | strategy:
15 | matrix:
16 | os: [ ubuntu-latest, windows-latest ]
17 | python-version: [ "3.12" ]
18 | steps:
19 | - uses: actions/checkout@v4
20 |
21 | - name: Set up uv and caching
22 | uses: astral-sh/setup-uv@v2
23 | with:
24 | enable-cache: true
25 | cache-suffix: "${{ matrix.os }}-${{ matrix.python-version }}-upstream-dev"
26 | cache-dependency-glob: "setup.cfg"
27 | version: "0.5.18"
28 |
29 | - name: Create venv
30 | run: |
31 | uv venv --seed --python ${{ matrix.python-version }}
32 |
33 | - name: Install highest dependencies
34 | run: |
35 | uv pip install --resolution highest --prerelease allow .[test]
36 | uv pip install --resolution highest --prerelease allow --requirements requirements_upstream_dev.txt
37 |
38 | - name: Test with pytest (linux)
39 | if: startsWith(matrix.os, 'ubuntu')
40 | run: |
41 | source .venv/bin/activate
42 | pytest --xdoc
43 |
44 | - name: Test with pytest (windows)
45 | if: startsWith(matrix.os, 'windows')
46 | run: |
47 | .venv\Scripts\activate
48 | pytest --xdoc
49 |
--------------------------------------------------------------------------------
/docs/source/datalad.md:
--------------------------------------------------------------------------------
1 | # Data Packages
2 |
3 | Individual PRIMAP2 datasets are stored in netcdf files, which preserve all meta
4 | data and the structure of the data.
5 | One or multiple datasets are stored together with the input data and python scripts
6 | needed to generate them in data packages, which are managed with
7 | [datalad](https://www.datalad.org/).
8 | Documentation about datalad can be found in
9 | [its handbook](https://handbook.datalad.org).
10 |
11 | ## Installing datalad
12 |
13 | Datalad depends on multiple components (python, git, and git-annex) and therefore the
14 | installation differs for each platform.
15 | Please refer to the
16 | [datalad handbook](http://handbook.datalad.org/en/latest/intro/installation.html)
17 | for detailed installation instructions.
18 |
19 | ## Creating a data package
20 |
21 | Detailed information on creating datasets can be found in the
22 | [corresponding section](http://handbook.datalad.org/en/latest/basics/101-101-create.html)
23 | in the datalad handbook.
24 | Here, we will show the commands needed to create a dataset for use with PRIMAP2.
25 | To create an empty dataset use the `datalad create` command:
26 |
27 | ```shell
28 | $ datalad create -c text2git
29 | ```
30 |
31 | This will create a new folder and populate it with configuration for git, git-annex,
32 | and datalad.
33 | Additionally, it will add configuration such that all text files such as python code
34 | are stored in git with full change tracking, while all binary files such as netcdf files
35 | are added to the annex so that they are transmitted only on demand.
36 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | Contributions are welcome, and they are greatly appreciated! Every little bit
4 | helps, and credit will always be given.
5 |
6 | You can contribute in many ways:
7 |
8 | ## Types of Contributions
9 |
10 | ### Report Bugs
11 |
12 | Report bugs at https://github.com/primap-community/primap2/issues.
13 |
14 | If you are reporting a bug, please include:
15 |
16 | * Your operating system name and version.
17 | * Any details about your local setup that might be helpful in troubleshooting.
18 | * Detailed steps to reproduce the bug.
19 |
20 | ### Fix Bugs
21 |
22 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
23 | wanted" is open to whoever wants to implement it.
24 |
25 | ### Implement Features
26 |
27 | Look through the GitHub issues for features. Anything tagged with "enhancement"
28 | and "help wanted" is open to whoever wants to implement it.
29 |
30 | ### Write Documentation
31 |
32 | PRIMAP2 could always use more documentation, whether as part of the
33 | official PRIMAP2 docs, in docstrings, or even on the web in blog posts,
34 | articles, and such.
35 |
36 | ### Submit Feedback
37 |
38 | The best way to send feedback is to file an issue at https://github.com/primap-community/primap2/issues.
39 |
40 | If you are proposing a feature:
41 |
42 | * Explain in detail how it would work.
43 | * Keep the scope as narrow as possible, to make it easier to implement.
44 | * Remember that our time is limited, and that contributions
45 | are welcome :)
46 |
47 | ## Get Started!
48 |
49 | To get started with PRIMAP2 development, check out our development
50 | documentation at https://primap2.readthedocs.io/en/main/development.html.
51 |
--------------------------------------------------------------------------------
/primap-stubs.patch:
--------------------------------------------------------------------------------
1 | diff '--color=auto' -ru xarray.orig/core/dataarray.pyi xarray/core/dataarray.pyi
2 | --- xarray.orig/core/dataarray.pyi 2024-04-17 16:39:22.871662707 +0200
3 | +++ xarray/core/dataarray.pyi 2024-04-17 16:38:30.763522838 +0200
4 | @@ -1,6 +1,7 @@
5 | import datetime
6 | import numpy as np
7 | import pandas as pd
8 | +import primap2
9 | from _typeshed import Incomplete
10 | from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence
11 | from dask.dataframe import DataFrame as DaskDataFrame
12 | @@ -56,6 +57,8 @@
13 | @name.setter
14 | def name(self, value: Hashable | None) -> None: ...
15 | + @property
16 | + def pr(self) -> primap2.accessors.PRIMAP2DataArrayAccessor: ...
17 | @property
18 | def variable(self) -> Variable: ...
19 | @property
20 | def dtype(self) -> np.dtype: ...
21 | diff '--color=auto' -ru xarray.orig/core/dataset.pyi xarray/core/dataset.pyi
22 | --- xarray.orig/core/dataset.pyi 2024-04-17 16:39:22.871662707 +0200
23 | +++ xarray/core/dataset.pyi 2024-04-17 16:39:00.667603556 +0200
24 | @@ -1,6 +1,7 @@
25 | import datetime
26 | import numpy as np
27 | import pandas as pd
28 | +import primap2
29 | from _typeshed import Incomplete
30 | from collections.abc import Collection, Hashable, Iterable, Iterator, Mapping, MutableMapping, Sequence
31 | from dask.dataframe import DataFrame as DaskDataFrame
32 | @@ -69,6 +70,8 @@
33 | @attrs.setter
34 | def attrs(self, value: Mapping[Any, Any]) -> None: ...
35 | + @property
36 | + def pr(self) -> primap2.accessors.PRIMAP2DatasetAccessor: ...
37 | @property
38 | def encoding(self) -> dict[Any, Any]: ...
39 | @encoding.setter
40 | def encoding(self, value: Mapping[Any, Any]) -> None: ...
41 |
--------------------------------------------------------------------------------
/tbump.toml:
--------------------------------------------------------------------------------
1 | # Uncomment this if your project is hosted on GitHub:
2 | github_url = "https://github.com/primap-community/primap2/"
3 |
4 | [version]
5 | current = "0.13.0"
6 |
7 | # Example of a semver regexp.
8 | # Make sure this matches current_version before
9 | # using tbump
10 | regex = '''
11 | (?P\d+)
12 | \.
13 | (?P\d+)
14 | \.
15 | (?P\d+)
16 | '''
17 |
18 | [git]
19 | message_template = "Release version {new_version}"
20 | tag_template = "v{new_version}"
21 |
22 | # For each file to patch, add a [[file]] config
23 | # section containing the path of the file, relative to the
24 | # tbump.toml location.
25 | [[file]]
26 | src = "setup.cfg"
27 | search = 'version = {current_version}'
28 |
29 | [[file]]
30 | src = "primap2/__init__.py"
31 | search = '__version__ = "{current_version}"'
32 |
33 | [[file]]
34 | src = "README.md"
35 | search = 'primap-community/primap2: PRIMAP2 Version {current_version}.'
36 |
37 | [[file]]
38 | src = "Makefile"
39 | search = '\tSETUPTOOLS_SCM_PRETEND_VERSION={current_version}'
40 |
41 | # You can specify a list of commands to
42 | # run after the files have been patched
43 | # and before the git commit is made
44 |
45 | [[before_commit]]
46 | name = "lint"
47 | cmd = "make lint"
48 |
49 | [[before_commit]]
50 | name = "update github release notes"
51 | cmd = "rm .github_release_notes_file.md; venv/bin/towncrier build --version {new_version} --draft --config towncrier_github_release_notes.toml > .github_release_notes_file.md"
52 |
53 | [[before_commit]]
54 | name = "update changelog"
55 | cmd = "venv/bin/towncrier build --version {new_version} --yes"
56 |
57 | # Or run some commands after the git tag and the branch
58 | # have been pushed:
59 |
60 | [[after_push]]
61 | name = "release on github"
62 | cmd = "gh release create --repo primap-community/primap2 v{new_version} --title 'PRIMAP2 Version {new_version}' --notes-file .github_release_notes_file.md"
63 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | pull_request:
5 | push:
6 | branches: [main]
7 | workflow_dispatch: # allows you to trigger the workflow run manually
8 |
9 | jobs:
10 | test:
11 | runs-on: ${{ matrix.os }}
12 | strategy:
13 | matrix:
14 | os: [ ubuntu-latest, windows-latest ]
15 | python-version: [ "3.11", "3.12" ]
16 | resolution: [ "highest", "lowest-direct" ]
17 | steps:
18 | - uses: actions/checkout@v4
19 |
20 | - name: Set up uv and caching
21 | uses: astral-sh/setup-uv@v2
22 | with:
23 | enable-cache: true
24 | cache-suffix: "${{ matrix.os }}-${{ matrix.python-version }}-${{ matrix.resolution }}"
25 | cache-dependency-glob: "setup.cfg"
26 | version: "0.4.9"
27 |
28 | - name: Create venv
29 | run: |
30 | uv venv --seed --python ${{ matrix.python-version }}
31 |
32 | - name: Install ${{ matrix.resolution }} dependencies
33 | run: |
34 | uv pip install --resolution ${{ matrix.resolution }} .[test]
35 |
36 | - name: Test with pytest (linux)
37 | if: startsWith(matrix.os, 'ubuntu')
38 | run: |
39 | source .venv/bin/activate
40 | pytest --xdoc --cov=primap2 --cov-report=xml --junitxml=junit.xml -o junit_family=legacy
41 |
42 | - name: Test with pytest (windows)
43 | if: startsWith(matrix.os, 'windows')
44 | run: |
45 | .venv\Scripts\activate
46 | pytest --xdoc --cov=primap2 --cov-report=xml --junitxml=junit.xml -o junit_family=legacy
47 |
48 | - name: Upload coverage to Codecov
49 | uses: codecov/codecov-action@v4
50 | with:
51 | files: ./coverage.xml
52 | token: ${{ secrets.CODECOV_TOKEN }}
53 |
54 | - name: Upload test results to Codecov
55 | if: ${{ !cancelled() }}
56 | uses: codecov/test-results-action@v1
57 | with:
58 | token: ${{ secrets.CODECOV_TOKEN }}
59 |
--------------------------------------------------------------------------------
/primap2/accessors.py:
--------------------------------------------------------------------------------
1 | """xarray extension accessors providing an API under the 'pr' namespace."""
2 |
3 | import xarray as xr
4 |
5 | from ._aggregate import DataArrayAggregationAccessor, DatasetAggregationAccessor
6 | from ._convert import DataArrayConversionAccessor
7 | from ._data_format import DatasetDataFormatAccessor
8 | from ._downscale import DataArrayDownscalingAccessor, DatasetDownscalingAccessor
9 | from ._fill_combine import DataArrayFillAccessor, DatasetFillAccessor
10 | from ._merge import DataArrayMergeAccessor, DatasetMergeAccessor
11 | from ._metadata import DatasetMetadataAccessor
12 | from ._overview import DataArrayOverviewAccessor, DatasetOverviewAccessor
13 | from ._selection import (
14 | DataArrayAliasSelectionAccessor,
15 | DatasetAliasSelectionAccessor,
16 | )
17 | from ._setters import DataArraySettersAccessor, DatasetSettersAccessor
18 | from ._units import DataArrayUnitAccessor, DatasetUnitAccessor
19 |
20 |
21 | @xr.register_dataset_accessor("pr")
22 | class PRIMAP2DatasetAccessor(
23 | DatasetAggregationAccessor,
24 | DatasetAliasSelectionAccessor,
25 | DatasetDataFormatAccessor,
26 | DatasetDownscalingAccessor,
27 | DatasetMergeAccessor,
28 | DatasetMetadataAccessor,
29 | DatasetOverviewAccessor,
30 | DatasetSettersAccessor,
31 | DatasetUnitAccessor,
32 | DatasetFillAccessor,
33 | ):
34 | """Collection of methods useful for climate policy analysis."""
35 |
36 |
37 | @xr.register_dataarray_accessor("pr")
38 | class PRIMAP2DataArrayAccessor(
39 | DataArrayAggregationAccessor,
40 | DataArrayAliasSelectionAccessor,
41 | DataArrayConversionAccessor,
42 | DataArrayDownscalingAccessor,
43 | DataArrayMergeAccessor,
44 | DataArrayOverviewAccessor,
45 | DataArraySettersAccessor,
46 | DataArrayUnitAccessor,
47 | DataArrayFillAccessor,
48 | ):
49 | """Collection of methods useful for climate policy analysis."""
50 |
51 |
52 | __all__ = ["PRIMAP2DataArrayAccessor", "PRIMAP2DatasetAccessor"]
53 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .github_release_notes_file.md
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | docs-old/generated/
15 | docs/generated/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | stubs/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | html/
33 | .mutmut-cache
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | .hypothesis/
55 | .pytest_cache/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs-old/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # pyenv
82 | .python-version
83 |
84 | # celery beat schedule file
85 | celerybeat-schedule
86 |
87 | # SageMath parsed files
88 | *.sage.py
89 |
90 | # dotenv
91 | .env
92 |
93 | # virtualenv
94 | .venv
95 | venv/
96 | ENV/
97 |
98 | # Spyder project settings
99 | .spyderproject
100 | .spyproject
101 |
102 | # Rope project settings
103 | .ropeproject
104 |
105 | # mkdocs documentation
106 | /site
107 |
108 | # mypy
109 | .mypy_cache/
110 |
111 | # IDE settings
112 | .vscode/
113 | .idea/
114 | /.dmypy.json
115 |
116 | # Johannes' development scripts
117 | JG_test_scripts
118 |
119 | # mac stuff
120 | .DS_Store
121 |
--------------------------------------------------------------------------------
/primap2/tests/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pint
3 | import xarray as xr
4 |
5 |
6 | def allclose(a: xr.DataArray, b: xr.DataArray, *args, **kwargs) -> bool:
7 | """Like np.allclose, but converts a to b's units before comparing."""
8 | try:
9 | a = a.pint.to(b.pint.units)
10 | except pint.DimensionalityError:
11 | return False
12 | if a.dtype == float: # need to use "allclose" to compare floats
13 | return np.allclose(a.pint.magnitude, b.pint.magnitude, *args, **kwargs)
14 | else:
15 | return (a.pint.magnitude == b.pint.magnitude).all()
16 |
17 |
18 | def assert_equal(a: xr.DataArray, b: xr.DataArray, *args, **kwargs):
19 | """Asserts that contents are allclose(), and the name and attrs are also equal."""
20 | assert allclose(a, b, *args, **kwargs)
21 | assert a.attrs == b.attrs, (a.attrs, b.attrs)
22 | assert a.name == b.name, (a.name, b.name)
23 |
24 |
25 | def assert_align(a: xr.DataArray, b: xr.DataArray) -> tuple[xr.DataArray, xr.DataArray]:
26 | """Asserts that a and b have the same shape and returns a and b with axes and
27 | dimensions aligned and sorted equally so that naive comparisons can be done.
28 | """
29 | assert set(a.dims) == set(b.dims), (a.dims, b.dims)
30 | aa, ba = xr.align(a, b, join="outer")
31 | aa = aa.transpose(*ba.dims)
32 | size_unchanged = sorted(aa.shape) == sorted(a.shape) and ba.shape == b.shape
33 | assert size_unchanged, (a.shape, b.shape)
34 | return aa, ba
35 |
36 |
37 | def assert_aligned_equal(a: xr.DataArray, b: xr.DataArray, *args, **kwargs):
38 | """Assert that a and b are equal after alignment of their dimensions."""
39 | a, b = assert_align(a, b)
40 | assert_equal(a, b, *args, **kwargs)
41 |
42 |
43 | def assert_ds_aligned_equal(a: xr.Dataset, b: xr.Dataset, *args, **kwargs):
44 | """Assert that a and b are equal after alignment of their dimensions."""
45 | assert set(a.keys()) == set(b.keys())
46 | for key in a.keys():
47 | assert_aligned_equal(a[key], b[key], *args, **kwargs)
48 | assert a.attrs == b.attrs, (a.attrs, b.attrs)
49 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "setuptools>=66",
4 | "wheel>=0.42",
5 | "build>=1.0.0",
6 | "setuptools_scm[toml]==8.1"
7 | ]
8 | build-backend = "setuptools.build_meta"
9 |
10 | [tool.ruff]
11 | extend-include = ["*.ipynb"]
12 | extend-exclude = [
13 | "climate_categories/data/*.py",
14 | "climate_categories/tests/data/*.py",
15 | "docs-old/",
16 | "docs/*"
17 | ]
18 | line-length = 100
19 | lint.extend-select = [ "E", "W", "I", "UP", "B", "YTT", "A", "NPY", "RUF", "FURB", "FLY", "PYI" ]
20 | target-version = "py311"
21 |
22 | [tool.ruff.format]
23 | docstring-code-format = true
24 |
25 | [tool.ruff.lint.isort]
26 | known-first-party = ["primap2"]
27 |
28 | [tool.ruff.lint.pydocstyle]
29 | convention = "numpy"
30 |
31 | [tool.setuptools_scm]
32 |
33 | [tool.pytest.ini_options]
34 | # actually, importlib.resources.open_text will be undeprecated again in python 3.13
35 | filterwarnings = "ignore:.*open_text is deprecated.*:DeprecationWarning"
36 |
37 | [tool.towncrier]
38 | package = "primap2"
39 | package_dir = "primap2"
40 | filename = "docs/source/changelog.md"
41 | directory = "changelog/"
42 | title_format = "## primap2 {version} ({project_date})"
43 | underlines = ["", "", ""]
44 | issue_format = "[#{issue}](https://github.com/primap-community/primap2/pull/{issue})"
45 |
46 | [[tool.towncrier.type]]
47 | directory = "breaking"
48 | name = "Breaking Changes"
49 | showcontent = true
50 |
51 | [[tool.towncrier.type]]
52 | directory = "deprecation"
53 | name = "Deprecations"
54 | showcontent = true
55 |
56 | [[tool.towncrier.type]]
57 | directory = "feature"
58 | name = "Features"
59 | showcontent = true
60 |
61 | [[tool.towncrier.type]]
62 | directory = "improvement"
63 | name = "Improvements"
64 | showcontent = true
65 |
66 | [[tool.towncrier.type]]
67 | directory = "fix"
68 | name = "Bug Fixes"
69 | showcontent = true
70 |
71 | [[tool.towncrier.type]]
72 | directory = "docs"
73 | name = "Improved Documentation"
74 | showcontent = true
75 |
76 | [[tool.towncrier.type]]
77 | directory = "trivial"
78 | name = "Trivial/Internal Changes"
79 | showcontent = false
80 |
--------------------------------------------------------------------------------
/primap2/csg/_strategies/substitution.py:
--------------------------------------------------------------------------------
1 | """Simple strategy which replaces NaNs by datapoints from second timeseries."""
2 |
3 | import attrs
4 | import xarray as xr
5 |
6 | import primap2
7 |
8 |
9 | @attrs.define(frozen=True)
10 | class SubstitutionStrategy:
11 | """Fill missing data in the result dataset by copying.
12 |
13 | The NaNs in the result dataset are substituted with data from the filling
14 | dataset.
15 | """
16 |
17 | type = "substitution"
18 |
19 | def fill(
20 | self,
21 | *,
22 | ts: xr.DataArray,
23 | fill_ts: xr.DataArray,
24 | fill_ts_repr: str,
25 | ) -> tuple[xr.DataArray, list[primap2.ProcessingStepDescription]]:
26 | """Fill gaps in ts using data from the fill_ts.
27 |
28 | Parameters
29 | ----------
30 | ts
31 | Base timeseries. Missing data (NaNs) in this timeseries will be filled.
32 | This function does not modify the data in ts.
33 | fill_ts
34 | Fill timeseries. Data from this timeseries will be used (possibly after
35 | modification) to fill missing data in the base timeseries.
36 | This function does not modify the data in fill_ts.
37 | fill_ts_repr
38 | String representation of fill_ts. Human-readable short representation of
39 | the fill_ts (e.g. the source).
40 |
41 | Returns
42 | -------
43 | filled_ts, descriptions. filled_ts contains the result, where missing
44 | data in ts is (partly) filled using unmodified data from fill_ts.
45 | descriptions contains information about which years were affected and
46 | filled how.
47 | """
48 | ts_aligned, fill_ts_aligned = xr.align(ts, fill_ts, join="exact")
49 | filled_ts = ts_aligned.fillna(fill_ts_aligned)
50 | filled_mask = ts.isnull() & ~fill_ts.isnull()
51 | time_filled = "all" if filled_mask.all() else filled_mask["time"][filled_mask].to_numpy()
52 | description = primap2.ProcessingStepDescription(
53 | time=time_filled,
54 | description=f"substituted with corresponding values from {fill_ts_repr}",
55 | function=self.type,
56 | source=fill_ts_repr,
57 | )
58 | return filled_ts, [description]
59 |
--------------------------------------------------------------------------------
/docs/source/usage/logging.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | format_version: 0.13
8 | jupytext_version: 1.16.4
9 | kernelspec:
10 | display_name: Python 3 (ipykernel)
11 | language: python
12 | name: python3
13 | ---
14 | # Log messages
15 |
16 | Many primap2 functions emit log messages, which have an associated severity.
17 | The severities we use are shown in the table.
18 |
19 | | severity | used for | default |
20 | |----------|----------------------------------------------------------------------------------|---------|
21 | | debug | useful for understanding what functions do internally | ✗ |
22 | | info | noteworthy information during normal processing | ✓ |
23 | | warning | problems which are not necessarily fatal, but should be acknowledged by the user | ✓ |
24 | | error | problems which need to be solved by the user | ✓ |
25 |
26 | As noted, by default `debug` messages are not shown, all other messages are shown.
27 |
28 | ## Changing what is shown
29 |
30 | As said, by default `debug` messages are not shown, as you can see here:
31 |
32 | ```{code-cell} ipython3
33 | import primap2
34 | import sys
35 |
36 | from loguru import logger
37 |
38 | logger.debug("This message will not be shown")
39 | logger.info("This message will be shown")
40 | ```
41 |
42 | To change this, remove the standard logger and add a new logger:
43 |
44 | ```{code-cell} ipython3
45 | logger.remove()
46 | logger.add(sys.stderr, level="DEBUG")
47 |
48 | logger.debug("Now you see debug messages")
49 | logger.info("You still also see info messages")
50 | ```
51 |
52 | Instead of showing more, you can also show less:
53 |
54 | ```{code-cell} ipython3
55 | logger.remove()
56 | logger.add(sys.stderr, level="WARNING")
57 |
58 | logger.debug("You don't see debug messages")
59 | logger.info("You also don't see info messages")
60 | logger.warning("But you do see all warnings")
61 | ```
62 |
63 | ## Advanced usage
64 |
65 | It is also possible to log to a file or add more information to the logs. See the
66 | [loguru documentation](https://loguru.readthedocs.io/) for details.
67 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PRIMAP2
2 |
3 | [](https://pypi.python.org/pypi/primap2)
4 | [](https://primap2.readthedocs.io/en/stable/?badge=main)
5 | [](https://doi.org/10.5281/zenodo.4535902)
6 |
7 | PRIMAP2 is the next generation of the PRIMAP climate policy analysis suite.
8 | PRIMAP2 is free software, you are welcome to use it in your own research.
9 | The documentation can be found at .
10 |
11 | ## Structure
12 |
13 | PRIMAP2 is:
14 | - A flexible and powerful data format built on [xarray](https://xarray.pydata.org).
15 | - A collection of functions for common tasks when wrangling climate policy
16 | data, like aggregation and interpolation.
17 | - A format for data packages built on [datalad](https://www.datalad.org), providing
18 | metadata extraction and search on a collection of data packages.
19 |
20 | ## Status
21 |
22 | PRIMAP2 is in active development, and not everything promised above is built
23 | yet.
24 |
25 | ## License
26 |
27 | Copyright 2020-2022, Potsdam-Institut für Klimafolgenforschung e.V.
28 |
29 | Copyright 2023-2024, Climate Resource Pty Ltd
30 |
31 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
32 | file except in compliance with the License. You may obtain a copy of the License at
33 |
34 |
35 |
36 | Unless required by applicable law or agreed to in writing, software distributed under
37 | the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
38 | KIND, either express or implied. See the License for the specific language governing
39 | permissions and limitations under the License.
40 |
41 | PRIMAP2 incorporates parts of xarray and pint_xarray, which are available under the
42 | Apache License, Version 2.0 as well. The full text of the xarray copyright statement is
43 | included in the licenses directory.
44 |
45 | ## Citation
46 |
47 | If you use this library and want to cite it, please cite it as:
48 |
49 | Mika Pflüger and Johannes Gütschow. (2025-09-05).
50 | primap-community/primap2: PRIMAP2 Version 0.13.0.
51 | Zenodo. https://doi.org/10.5281/zenodo.17061768
52 |
--------------------------------------------------------------------------------
/changelog/README.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG
2 |
3 | This directory contains "news fragments", i.e. short files that contain a small markdown-formatted bit of text that will be
4 | added to the CHANGELOG when it is next compiled.
5 |
6 | The CHANGELOG will be read by users, so this description should be aimed to primap2 users instead of
7 | describing internal changes which are only relevant to developers. Merge requests in combination with our git history provides additional
8 | developer-centric information.
9 |
10 | Make sure to use phrases in the past tense and use punctuation, examples:
11 |
12 | ```
13 | Improved verbose diff output with sequences.
14 |
15 | Terminal summary statistics now use multiple colors.
16 | ```
17 |
18 | Each file should have a name of the form `..md`, where `` is the pull request number, and `` is one of:
19 |
20 | * `feature`: new user facing features, like new command-line options and new behaviour.
21 | * `improvement`: improvement of existing functionality, usually without requiring user intervention
22 | * `fix`: fixes a bug.
23 | * `docs`: documentation improvement, like rewording an entire section or adding missing docs.
24 | * `deprecation`: feature deprecation.
25 | * `breaking`: a change which may break existing uses, such as feature removal or behaviour change.
26 | * `trivial`: fixing a small typo or internal change that might be noteworthy.
27 |
28 | So for example: `123.feature.md`, `456.fix.md`.
29 |
30 | Since you need the pull request number for the filename, you must submit a PR first. From this PR, you can get the
31 | PR number and then create the news file. A single PR can also have multiple news items, for example a given PR may
32 | add a feature as well as deprecate some existing functionality.
33 |
34 | If you are not sure what issue type to use, don't hesitate to ask in your PR.
35 |
36 | `towncrier` preserves multiple paragraphs and formatting (code blocks, lists, and so on), but for entries other than
37 | features it is usually better to stick to a single paragraph to keep it concise. You may also use `MyST` [style
38 | cross-referencing](https://myst-parser.readthedocs.io/en/latest/syntax/cross-referencing.html) within your news items
39 | to link to other documentation.
40 |
41 | You can also run `towncrier --draft` to see the draft changelog that will be appended to [docs/source/changelog.md]()
42 | on the next release.
43 |
--------------------------------------------------------------------------------
/primap2/tests/data/test_empty_ds_if.csv:
--------------------------------------------------------------------------------
1 | "source","area (ISO3)","entity","unit","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020"
2 | "RAND2020","ARG","CH4","CH4 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3 | "RAND2020","ARG","CO2","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 | "RAND2020","ARG","KYOTOGHG (AR4GWP100)","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5 | "RAND2020","ARG","SF6","SF6 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6 | "RAND2020","BOL","CH4","CH4 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7 | "RAND2020","BOL","CO2","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8 | "RAND2020","BOL","KYOTOGHG (AR4GWP100)","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9 | "RAND2020","BOL","SF6","SF6 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10 | "RAND2020","COL","CH4","CH4 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11 | "RAND2020","COL","CO2","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12 | "RAND2020","COL","KYOTOGHG (AR4GWP100)","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13 | "RAND2020","COL","SF6","SF6 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14 | "RAND2020","MEX","CH4","CH4 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15 | "RAND2020","MEX","CO2","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16 | "RAND2020","MEX","KYOTOGHG (AR4GWP100)","CO2 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17 | "RAND2020","MEX","SF6","SF6 * gigagram / year",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18 |
--------------------------------------------------------------------------------
/primap2/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import pytest
4 | import xarray as xr
5 | from loguru import logger
6 |
7 | import primap2 # noqa: F401
8 |
9 | from . import examples
10 |
11 |
12 | # monkey-patch caplog to work with loguru
13 | # see https://loguru.readthedocs.io/en/stable/resources/migration.html#making-things-work-with-pytest-and-caplog
14 | @pytest.fixture
15 | def caplog(caplog):
16 | class PropogateHandler(logging.Handler):
17 | def emit(self, record):
18 | logging.getLogger(record.name).handle(record)
19 |
20 | handler_id = logger.add(PropogateHandler(), format="{message} {extra}")
21 | yield caplog
22 | logger.remove(handler_id)
23 |
24 |
25 | @pytest.fixture
26 | def minimal_ds() -> xr.Dataset:
27 | """A valid, minimal dataset."""
28 | return examples._cached_minimal_ds.copy(deep=True)
29 |
30 |
31 | @pytest.fixture
32 | def opulent_ds() -> xr.Dataset:
33 | """A valid dataset using lots of features."""
34 | return examples._cached_opulent_ds.copy(deep=True)
35 |
36 |
37 | @pytest.fixture
38 | def opulent_str_ds() -> xr.Dataset:
39 | """Like the opulent dataset, but additionally with a stringly typed data variable
40 | "method".
41 | """
42 | return examples._cached_opulent_str_ds.copy(deep=True)
43 |
44 |
45 | @pytest.fixture
46 | def empty_ds() -> xr.Dataset:
47 | """An empty hull of a dataset with missing data."""
48 | return examples._cached_empty_ds.copy(deep=True)
49 |
50 |
51 | @pytest.fixture
52 | def opulent_processing_ds() -> xr.Dataset:
53 | """Like the opulent dataset, but additionally with processing information."""
54 | return examples._cached_opulent_processing_ds.copy(deep=True)
55 |
56 |
57 | @pytest.fixture(params=["opulent", "opulent_str", "opulent_processing", "minimal", "empty"])
58 | def any_ds(request) -> xr.Dataset:
59 | """Test with all available valid example Datasets."""
60 | if request.param == "opulent":
61 | return examples._cached_opulent_ds.copy(deep=True)
62 | elif request.param == "opulent_str":
63 | return examples._cached_opulent_str_ds.copy(deep=True)
64 | elif request.param == "opulent_processing":
65 | return examples._cached_opulent_processing_ds.copy(deep=True)
66 | elif request.param == "minimal":
67 | return examples._cached_minimal_ds.copy(deep=True)
68 | elif request.param == "empty":
69 | return examples._cached_empty_ds.copy(deep=True)
70 |
--------------------------------------------------------------------------------
/docs/source/usage/downscaling.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | format_version: 0.13
8 | jupytext_version: 1.16.4
9 | kernelspec:
10 | display_name: Python 3 (ipykernel)
11 | language: python
12 | name: python3
13 | ---
14 |
15 | # Downscaling
16 |
17 |
18 | To downscale a super-category (for example, regional data) to sub-categories
19 | (for example, country-level data in the same region), the
20 | {py:meth}`xarray.DataArray.pr.downscale_timeseries`
21 | function is available. It determines shares from available data points, then
22 | does downscaling for years where full information is not available.
23 |
24 | Let's first create an example dataset with regional data and some country data
25 | missing.
26 |
27 | ```{code-cell} ipython3
28 | ---
29 | mystnb:
30 | code_prompt_show: Logging setup for the docs
31 | tags: [hide-cell]
32 | ---
33 | # setup logging for the docs - we don't need debug logs
34 | import sys
35 | from loguru import logger
36 |
37 | logger.remove()
38 | logger.add(sys.stderr, level="INFO")
39 | ```
40 |
41 | ```{code-cell} ipython3
42 | import primap2
43 | import numpy as np
44 | import xarray as xr
45 |
46 | # select an example dataset
47 | da = primap2.open_dataset("../minimal_ds.nc")["CO2"].loc[{"time": slice("2000", "2003"), "source": "RAND2020"}]
48 | da.pr.to_df()
49 | ```
50 |
51 | ```{code-cell} ipython3
52 | # compute regional data as sum of country-level data
53 | temp = da.sum(dim="area (ISO3)")
54 | temp = temp.expand_dims({"area (ISO3)": ["LATAM"]})
55 | # delete data from the country level for the years 2002-2003 (inclusive)
56 | da.loc[{"time": slice("2002", "2003")}].pint.magnitude[:] = np.nan
57 | # add regional data to the array
58 | da = xr.concat([da, temp], dim="area (ISO3)")
59 | da.pr.to_df()
60 | ```
61 |
62 | As you can see, for 2000 and 2001, country-level data is available, but for later
63 | years, only regional ("LATAM") data is available. We now want to extrapolate the
64 | missing data using the shares from early years and the regional data.
65 |
66 | ```{code-cell} ipython3
67 | # Do the downscaling to fill in country-level data from regional data
68 | da.pr.downscale_timeseries(
69 | basket="LATAM",
70 | basket_contents=["BOL", "MEX", "COL", "ARG"],
71 | dim="area (ISO3)",
72 | )
73 | ```
74 |
75 | For the downscaling, shares for the countries at the points in time where data for
76 | all countries is available are determined, the shares are inter- and extrapolated where
77 | data is missing, and then the regional data is downscaled using these shares.
78 |
--------------------------------------------------------------------------------
/primap2/_metadata.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | from . import _accessor_base
4 |
5 |
6 | class DatasetMetadataAccessor(_accessor_base.BaseDatasetAccessor):
7 | @property
8 | def entity_terminology(self) -> str:
9 | """The terminology of the entity attributes in this dataset's data variables"""
10 | return self._ds.attrs["entity_terminology"]
11 |
12 | @entity_terminology.setter
13 | def entity_terminology(self, value: str):
14 | self._ds.attrs["entity_terminology"] = value
15 |
16 | @property
17 | def references(self) -> str:
18 | """Citable reference(s) describing the data
19 |
20 | If the references start with ``doi:``, it is a doi, otherwise it is a
21 | free-form literature reference.
22 | """
23 | return self._ds.attrs["references"]
24 |
25 | @references.setter
26 | def references(self, value: str):
27 | self._ds.attrs["references"] = value
28 |
29 | @property
30 | def rights(self) -> str:
31 | """License or other usage restrictions of the data"""
32 | return self._ds.attrs["rights"]
33 |
34 | @rights.setter
35 | def rights(self, value: str):
36 | self._ds.attrs["rights"] = value
37 |
38 | @property
39 | def contact(self) -> str:
40 | """Who can answer questions about the data"""
41 | return self._ds.attrs["contact"]
42 |
43 | @contact.setter
44 | def contact(self, value: str):
45 | self._ds.attrs["contact"] = value
46 |
47 | @property
48 | def title(self) -> str:
49 | """A succinct description"""
50 | return self._ds.attrs["title"]
51 |
52 | @title.setter
53 | def title(self, value: str):
54 | self._ds.attrs["title"] = value
55 |
56 | @property
57 | def comment(self) -> str:
58 | """Longer form description"""
59 | return self._ds.attrs["comment"]
60 |
61 | @comment.setter
62 | def comment(self, value: str):
63 | self._ds.attrs["comment"] = value
64 |
65 | @property
66 | def institution(self) -> str:
67 | """Where the data originates"""
68 | return self._ds.attrs["institution"]
69 |
70 | @institution.setter
71 | def institution(self, value: str):
72 | self._ds.attrs["institution"] = value
73 |
74 | @property
75 | def publication_date(self) -> datetime.date:
76 | """The original date of publication of the dataset, if published."""
77 | return self._ds.attrs["publication_date"]
78 |
79 | @publication_date.setter
80 | def publication_date(self, value: datetime.date):
81 | self._ds.attrs["publication_date"] = value
82 |
--------------------------------------------------------------------------------
/docs/source/usage/merge.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | format_version: 0.13
8 | jupytext_version: 1.16.4
9 | kernelspec:
10 | display_name: Python 3 (ipykernel)
11 | language: python
12 | name: python3
13 | ---
14 |
15 | # Merging datasets
16 |
17 | xarray provides different functions to combine Datasets and DataArrays.
18 | However, these are not built to combine data which contain duplicates
19 | with rounding / processing errors.
20 | Unfortunately, when reading data e.g. from country
21 | reports this is often needed as some sectors are included in several tables
22 | and might use different numbers of decimals.
23 | Thus, PRIMAP2 has added the {py:meth}`xarray.Dataset.pr.merge`
24 | function that can accept data discrepancies not exceeding a given tolerance
25 | level.
26 | The merging of attributes is handled by xarray and the `combine_attrs`
27 | parameter is just passed on to the xarray functions.
28 | The default is to `drop_conflicts`.
29 |
30 | Below is an example using the built-in `opulent_ds`.
31 |
32 | ```{code-cell} ipython3
33 | :tags: [hide-cell]
34 | :mystnb:
35 | : code_prompt_show: "Logging setup for the docs"
36 |
37 | # setup logging for the docs - we don't need debug logs
38 | import sys
39 | from loguru import logger
40 |
41 | logger.remove()
42 | logger.add(sys.stderr, level="INFO")
43 | ```
44 |
45 | ```{code-cell} ipython3
46 | import xarray as xr
47 |
48 | from primap2.tests.examples import opulent_ds
49 |
50 | op_ds = opulent_ds()
51 |
52 | # only take part of the countries to have something to actually merge
53 | da_start = op_ds["CO2"].pr.loc[{"area": ["ARG", "COL", "MEX"]}]
54 |
55 | # modify some data
56 | data_to_modify = op_ds["CO2"].pr.loc[{"area": ["ARG"]}].pr.sum("area")
57 | data_to_modify.data = data_to_modify.data * 1.009
58 | da_merge = op_ds["CO2"].pr.set("area", "ARG", data_to_modify, existing="overwrite")
59 |
60 | # merge with tolerance such that it will pass
61 | da_result = da_start.pr.merge(da_merge, tolerance=0.01)
62 | ```
63 |
64 | ```{code-cell} ipython3
65 | # merge with lower tolerance such that it will fail
66 | try:
67 | # the logged message is very large, only show a small part
68 | logger.disable("primap2")
69 | da_result = da_start.pr.merge(da_merge, tolerance=0.005)
70 | except xr.MergeError as err:
71 | err_short = "\n".join(str(err).split("\n")[0:6])
72 | print(f"An error occured during merging: {err_short}")
73 | logger.enable("primap2")
74 |
75 | # you could also only log a warning and not raise an error
76 | # using the error_on_discrepancy=False argument to `merge`
77 | ```
78 |
--------------------------------------------------------------------------------
/docs/source/usage/store_and_load.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | format_version: 0.13
8 | jupytext_version: 1.16.4
9 | kernelspec:
10 | display_name: Python 3 (ipykernel)
11 | language: python
12 | name: python3
13 | ---
14 |
15 | # Store and load datasets
16 |
17 | The native storage format for primap2 datasets is [netcdf](https://www.unidata.ucar.edu/software/netcdf/),
18 | which supports to store all
19 | data and metadata in one file, as well as compression.
20 | We again use a toy example dataset to show how to store and reload datasets.
21 |
22 | ```{code-cell} ipython3
23 | :tags: [hide-cell]
24 | :mystnb:
25 | : code_prompt_show: "Logging setup for the docs"
26 |
27 | # setup logging for the docs - we don't need debug logs
28 | import sys
29 | from loguru import logger
30 |
31 | logger.remove()
32 | logger.add(sys.stderr, level="INFO")
33 | ```
34 |
35 | ```{code-cell} ipython3
36 | import primap2
37 | import primap2.tests
38 |
39 | ds = primap2.tests.examples.toy_ds()
40 |
41 | ds
42 | ```
43 |
44 | ## Store to disk
45 |
46 | Storing a dataset to disk works using the {py:meth}`xarray.Dataset.pr.to_netcdf` function.
47 |
48 | ```{code-cell} ipython3
49 | import tempfile
50 | import pathlib
51 |
52 | # setup temporary directory to save things to in this example
53 | with tempfile.TemporaryDirectory() as tdname:
54 | td = pathlib.Path(tdname)
55 |
56 | # simple saving without compression
57 | ds.pr.to_netcdf(td / "toy_ds.nc")
58 |
59 | # using zlib compression for all gases
60 | compression = {"zlib": True, "complevel": 9}
61 | encoding = {var: compression for var in ds.data_vars}
62 | ds.pr.to_netcdf(td / "toy_ds_compressed.nc", encoding=encoding)
63 | ```
64 |
65 | ```{caution}
66 | `netcdf` files are not reproducible.
67 |
68 | `netcdf` is a very flexible format, which e.g. supports compression using a range
69 | of libraries, therefore the exact same `Dataset` can be represented by different
70 | `netcdf` files on disk. Unfortunately, even if you specify the compression options,
71 | `netcdf` files additionally contain metadata about all software versions used to
72 | produce the file. Therefore, if you reproduce a `Dataset` containing the same data
73 | and metadata and store it to a `netcdf` file, it will generally not create a file
74 | which is identical.
75 | ```
76 |
77 | ## Load from disk
78 |
79 | We also provide the function {py:func}`primap2.open_dataset` to load datasets back into memory.
80 | In this example, we load a minimal dataset.
81 |
82 | ```{code-cell} ipython3
83 | ds = primap2.open_dataset("../minimal_ds.nc")
84 |
85 | ds
86 | ```
87 |
88 | Note how units were read and attributes restored.
89 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = primap2
3 | version = 0.13.0
4 | author = Mika Pflüger
5 | author_email = mika.pflueger@climate-resource.com
6 | description = The next generation of the PRIMAP climate policy analysis suite.
7 | long_description = file: README.md, changelog.md
8 | long_description_content_type = text/x-rst
9 | url = https://github.com/primap-community/primap2
10 | project_urls =
11 | Documentation = https://primap2.readthedocs.io/
12 | classifiers =
13 | Development Status :: 3 - Alpha
14 | Intended Audience :: Science/Research
15 | Topic :: Scientific/Engineering :: Atmospheric Science
16 | License :: OSI Approved :: Apache Software License
17 | Natural Language :: English
18 | Programming Language :: Python :: 3
19 | Programming Language :: Python :: 3.11
20 | Programming Language :: Python :: 3.12
21 | license = Apache Software License 2.0
22 | license_files = LICENSE
23 |
24 | [options]
25 | packages =
26 | primap2
27 | primap2.pm2io
28 | primap2.csg
29 | primap2.csg._strategies
30 | primap2.tests
31 | primap2.tests.csg
32 | primap2.tests.data
33 | python_requires = >=3.11, <3.13
34 | setup_requires =
35 | setuptools_scm==8.1
36 | install_requires =
37 | attrs>=23
38 | xarray==2025.8.0
39 | numbagg>=0.8.1
40 | pint>=0.24.4
41 | pint_xarray>=0.4, <0.6
42 | numpy>=1.26,<2
43 | pandas>=2.2.2
44 | openscm_units>=0.6
45 | loguru>=0.7
46 | scipy>=1.13.0
47 | h5netcdf>=1
48 | h5py>=3.10.0
49 | matplotlib>=3.7
50 | ruamel.yaml>=0.18
51 | strictyaml>=1.7.3
52 | openpyxl>=3.1
53 | tqdm>=4.66
54 | msgpack>=1
55 | climate_categories>=0.10.2
56 |
57 | [options.extras_require]
58 | test =
59 | pytest>=8
60 | pytest-cov>=4
61 | xdoctest>=1.2
62 |
63 | dev =
64 | tbump>=6.11
65 | wheel>=0.42
66 | coverage>=7.4
67 | Sphinx>=6
68 | myst-nb>=1
69 | sphinx-book-theme>=1.1
70 | sphinx-favicon>=1.0
71 | jupytext>=1.16
72 | sphinx-copybutton>=0.5.2
73 | sphinx-autosummary-accessors>=2023.4
74 | sphinx-tippy>=0.4.3
75 | numpydoc>=1.6
76 | twine>=5
77 | build>=1.2.2
78 | pre-commit>=3.6
79 | pytest>=8
80 | pytest-cov>=4
81 | xdoctest>=1.2
82 | setuptools>=66
83 | towncrier>=23.6.0
84 | ipykernel>=6.27.1
85 | jupyter>=1.1
86 | mypy>=1.11
87 | tox>=4.11
88 | tox-uv>=1.11.3
89 | ruff>=0.6.3
90 | ruff-lsp>=0.0.50
91 | datalad =
92 | datalad>=1.1
93 |
94 | [options.package_data]
95 | * =
96 | *.csv
97 | *.nc
98 |
99 | [doc8]
100 | max-line-length = 88
101 | ignore-path-errors = docs/data_format_details.rst;D001,docs/interchange_format_details.rst;D001
102 |
--------------------------------------------------------------------------------
/primap2/tests/data/Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.csv:
--------------------------------------------------------------------------------
1 | source,scenario (PRIMAP),provenance,area (ISO3),entity,unit,category (IPCC1996),1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
2 | PRIMAP-crf,2021V1,measured,AUS,C2F6,t C2F6 / yr,0,,,,,66.67,66.72,66.61,47.98,31.32,22.15,20.41,17.77,24.03,16.48,18.63,26.08,25,24.37,24.8,25.93,9.943,8.432,6.433,4.889,3.548,3.768,3.705,2.385,2.403,2.3,2.942,5.582,3.31,9.93
3 | PRIMAP-crf,2021V1,measured,AUS,C2F6,t C2F6 / yr,2,,,,,66.67,66.72,66.61,47.98,31.32,22.15,20.41,17.77,24.03,16.48,18.63,26.08,25,24.37,24.8,25.93,9.943,8.432,6.433,4.889,3.548,3.768,3.705,2.385,2.403,2.3,2.942,5.582,3.31,9.93
4 | PRIMAP-crf,2021V1,measured,AUS,C2F6,t C2F6 / yr,2.C,,,,,66.67,66.72,66.61,47.98,31.32,22.15,20.41,17.77,24.03,16.48,18.63,26.08,25,24.37,24.8,25.93,9.943,8.432,6.433,4.889,3.548,3.768,3.705,2.385,2.403,2.3,2.942,5.582,3.31,9.93
5 | PRIMAP-crf,2021V1,measured,AUS,C2F6,t C2F6 / yr,2.C.3,,,,,66.67,66.72,66.61,47.98,31.32,22.15,20.41,17.77,24.03,16.48,18.63,26.08,25,24.37,24.8,25.93,9.943,8.432,6.433,4.889,3.548,3.768,3.705,2.385,2.403,2.3,2.942,5.582,3.31,9.93
6 | PRIMAP-crf,2021V1,measured,AUS,C2F6,t C2F6 / yr,M.0.EL,,,,,66.67,66.72,66.61,47.98,31.32,22.15,20.41,17.77,24.03,16.48,18.63,26.08,25,24.37,24.8,25.93,9.943,8.432,6.433,4.889,3.548,3.768,3.705,2.385,2.403,2.3,2.942,5.582,3.31,9.93
7 | PRIMAP-crf,2021V1,measured,AUS,CF4,t CF4 / yr,0,,,,,513.3,513.8,512.9,369.4,241.2,170.6,157.2,136.9,185,126.9,143.4,200.8,192.5,187.6,191,199.6,76.56,64.93,49.53,40.45,32.48,34.55,33.79,22.04,22.09,19.39,25.58,18.2,26.47,24.63
8 | PRIMAP-crf,2021V1,measured,AUS,CF4,t CF4 / yr,2,,,,,513.3,513.8,512.9,369.4,241.2,170.6,157.2,136.9,185,126.9,143.4,200.8,192.5,187.6,191,199.6,76.56,64.93,49.53,40.45,32.48,34.55,33.79,22.04,22.09,19.39,25.58,18.2,26.47,24.63
9 | PRIMAP-crf,2021V1,measured,AUS,CF4,t CF4 / yr,2.C,,,,,513.3,513.8,512.9,369.4,241.2,170.6,157.2,136.9,185,126.9,143.4,200.8,192.5,187.6,191,199.6,76.56,64.93,49.53,40.45,32.48,34.55,33.79,22.04,22.09,19.39,25.58,18.2,26.47,24.63
10 | PRIMAP-crf,2021V1,measured,AUS,CF4,t CF4 / yr,2.C.3,,,,,513.3,513.8,512.9,369.4,241.2,170.6,157.2,136.9,185,126.9,143.4,200.8,192.5,187.6,191,199.6,76.56,64.93,49.53,40.45,32.48,34.55,33.79,22.04,22.09,19.39,25.58,18.2,26.47,24.63
11 | PRIMAP-crf,2021V1,measured,AUS,CF4,t CF4 / yr,M.0.EL,,,,,513.3,513.8,512.9,369.4,241.2,170.6,157.2,136.9,185,126.9,143.4,200.8,192.5,187.6,191,199.6,76.56,64.93,49.53,40.45,32.48,34.55,33.79,22.04,22.09,19.39,25.58,18.2,26.47,24.63
12 | PRIMAP-crf,2021V1,measured,AUS,CH4,kt CH4 / yr,0,,,,,5697,5624,5519,5369,5237,5177,5176,5283,5239,5149,5367,5297,5228,4992,5004,5091,5092,5122,5032,4971,4860,4932,4879,4855,4768,4760,4716,4791,4753,4483
13 | PRIMAP-crf,2021V1,measured,AUS,CH4,kt CH4 / yr,1,,,,,1279,1266,1326,1291,1220,1270,1261,1367,1376,1267,1357,1341,1283,1241,1239,1296,1320,1400,1399,1372,1355,1310,1328,1324,1236,1366,1379,1358,1390,1316
14 | PRIMAP-crf,2021V1,measured,AUS,CH4,kt CH4 / yr,1.A,,,,,
15 |
--------------------------------------------------------------------------------
/primap2/tests/test_units.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Tests for _units.py"""
3 |
4 | import numpy as np
5 | import pytest
6 | import xarray as xr
7 | import xarray.testing
8 |
9 | from .utils import allclose, assert_equal
10 |
11 |
12 | def test_roundtrip_quantify(opulent_ds: xr.Dataset):
13 | roundtrip = opulent_ds.pr.dequantify().pr.quantify()
14 | xarray.testing.assert_identical(roundtrip, opulent_ds)
15 |
16 |
17 | def test_roundtrip_quantify_da(opulent_ds: xr.Dataset):
18 | da: xr.DataArray = opulent_ds["SF6 (SARGWP100)"]
19 | roundtrip = da.pr.dequantify().pr.quantify()
20 | assert_equal(roundtrip, da)
21 |
22 |
23 | def test_convert_to_gwp(opulent_ds: xr.Dataset):
24 | da: xr.DataArray = opulent_ds["SF6"]
25 | da_converted = da.pr.convert_to_gwp("SARGWP100", "CO2 Gg / year")
26 | da_expected = opulent_ds["SF6 (SARGWP100)"]
27 | assert_equal(da_converted, da_expected)
28 |
29 | da_converted_like = da.pr.convert_to_gwp_like(da_expected)
30 | assert_equal(da_converted_like, da_expected)
31 |
32 |
33 | def test_convert_to_gwp_like_missing(opulent_ds: xr.Dataset):
34 | da: xr.DataArray = opulent_ds["SF6"]
35 | da_gwp = da.pr.convert_to_gwp("SARGWP100", "CO2 Gg / year")
36 |
37 | del da_gwp.attrs["gwp_context"]
38 | with pytest.raises(ValueError, match="reference array has no gwp_context"):
39 | da.pr.convert_to_gwp_like(da_gwp)
40 |
41 | da_gwp = xr.full_like(da_gwp, np.nan)
42 | da_gwp.attrs["gwp_context"] = "SARGWP100"
43 | with pytest.raises(ValueError, match="reference array has no units attached"):
44 | da.pr.convert_to_gwp_like(da_gwp)
45 |
46 |
47 | def test_convert_to_gwp_incompatible(opulent_ds: xr.Dataset):
48 | da: xr.DataArray = opulent_ds["SF6 (SARGWP100)"]
49 | with pytest.raises(ValueError, match="Incompatible gwp conversions"):
50 | da.pr.convert_to_gwp("AR5GWP", "CO2 Gg / year")
51 |
52 |
53 | def test_convert_to_mass(opulent_ds: xr.Dataset):
54 | da: xr.DataArray = opulent_ds["SF6 (SARGWP100)"]
55 | da_converted = da.pr.convert_to_mass()
56 | da_expected = opulent_ds["SF6"]
57 | assert_equal(da_converted, da_expected)
58 |
59 |
60 | def test_convert_round_trip(opulent_ds: xr.Dataset):
61 | da: xr.DataArray = opulent_ds["SF6"]
62 | assert da.attrs["entity"] == "SF6"
63 | da_gwp = da.pr.convert_to_gwp(gwp_context="AR4GWP100", units="Gg CO2 / year")
64 | da_rt = da_gwp.pr.convert_to_mass()
65 | assert_equal(da, da_rt)
66 | assert da_rt.attrs["entity"] == "SF6"
67 | assert isinstance(da_rt.attrs["entity"], str)
68 |
69 |
70 | def test_convert_to_mass_missing_info(opulent_ds: xr.Dataset):
71 | da: xr.DataArray = opulent_ds["SF6"]
72 | with pytest.raises(
73 | ValueError,
74 | match="No gwp_context given and no gwp_context available in the attrs",
75 | ):
76 | da.pr.convert_to_mass()
77 |
78 | da = opulent_ds["SF6 (SARGWP100)"]
79 | del da.attrs["entity"]
80 | with pytest.raises(ValueError, match="No entity given and no entity available in the attrs"):
81 | da.pr.convert_to_mass()
82 |
83 |
84 | def test_context(opulent_ds: xr.Dataset):
85 | da: xr.DataArray = opulent_ds["SF6 (SARGWP100)"]
86 | with da.pr.gwp_context:
87 | da_converted = opulent_ds["SF6"].pint.to(da.pint.units)
88 | assert allclose(da, da_converted)
89 |
--------------------------------------------------------------------------------
/primap2/tests/test_interchange_format.py:
--------------------------------------------------------------------------------
1 | """Tests for the interchange format."""
2 |
3 | import csv
4 | import importlib
5 | import importlib.resources
6 |
7 | import pandas as pd
8 | import pytest
9 | import xarray as xr
10 |
11 | import primap2
12 | from primap2 import pm2io
13 |
14 | from . import utils
15 |
16 |
17 | def test_round_trip(any_ds: xr.Dataset, tmp_path):
18 | path = tmp_path / "if"
19 | pm2io.write_interchange_format(path, any_ds.pr.to_interchange_format())
20 | with path.with_suffix(".yaml").open() as fd:
21 | print(fd.read())
22 | actual = pm2io.from_interchange_format(pm2io.read_interchange_format(path))
23 | # we expect that Processing information is lost here
24 | expected = any_ds
25 | to_remove = []
26 | for var in expected:
27 | if (
28 | isinstance(var, str)
29 | and var.startswith("Processing of ")
30 | and "described_variable" in expected[var].attrs
31 | ):
32 | to_remove.append(var)
33 | for var in to_remove:
34 | del expected[var]
35 | utils.assert_ds_aligned_equal(any_ds, actual)
36 |
37 |
38 | def test_missing_file(minimal_ds, tmp_path):
39 | path = tmp_path / "if"
40 | pm2io.write_interchange_format(path, minimal_ds.pr.to_interchange_format())
41 | with path.with_suffix(".yaml").open() as fd:
42 | content = fd.readlines()
43 | with path.with_suffix(".yaml").open("w") as fd:
44 | for line in content:
45 | if "data_file" in line:
46 | continue
47 | fd.write(line)
48 |
49 | # first test automatic discovery
50 | actual = pm2io.from_interchange_format(pm2io.read_interchange_format(path))
51 | utils.assert_ds_aligned_equal(minimal_ds, actual)
52 |
53 | # now test without csv file
54 | path.with_suffix(".csv").unlink()
55 | with pytest.raises(FileNotFoundError, match="Data file not found at"):
56 | pm2io.read_interchange_format(path)
57 |
58 |
59 | def test_inharmonic_units(minimal_ds, tmp_path):
60 | path = tmp_path / "if"
61 | pm2io.write_interchange_format(path, minimal_ds.pr.to_interchange_format())
62 | df = pd.read_csv(path.with_suffix(".csv"))
63 | df.loc[3, "unit"] = "m"
64 | df.to_csv(path.with_suffix(".csv"), index=False, quoting=csv.QUOTE_NONNUMERIC)
65 |
66 | with pytest.raises(ValueError, match="More than one unit"):
67 | pm2io.from_interchange_format(pm2io.read_interchange_format(path))
68 |
69 |
70 | def test_stable_sorting(empty_ds, tmp_path):
71 | path = tmp_path / "test_empty_ds_if"
72 | ds = empty_ds.copy()
73 | # add some unsorted metadata
74 | ds.pr.contact = "Someone"
75 | ds.pr.comment = "This needs to be sorted alphabetically."
76 | ds.pr.title = "Test Dataset"
77 | # mess up the sorting of the data
78 | ds_if = ds.pr.to_interchange_format()
79 | ds_if = ds_if.sort_values("entity")
80 | pm2io.write_interchange_format(path, ds_if)
81 | result_csv = path.with_suffix(".csv").read_bytes()
82 | result_yaml = path.with_suffix(".yaml").read_bytes()
83 | test_data_dir = importlib.resources.files(primap2).joinpath("tests").joinpath("data")
84 | expected_csv = test_data_dir.joinpath("test_empty_ds_if.csv").read_bytes()
85 | expected_yaml = test_data_dir.joinpath("test_empty_ds_if.yaml").read_bytes()
86 |
87 | assert result_csv == expected_csv
88 | assert result_yaml == expected_yaml
89 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: clean clean-test clean-pyc clean-build docs help virtual-environment install-pre-commit stubs update-venv README.md check-python-version
2 | .DEFAULT_GOAL := help
3 |
4 | define PRINT_HELP_PYSCRIPT
5 | import re, sys
6 |
7 | for line in sys.stdin:
8 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
9 | if match:
10 | target, help = match.groups()
11 | print("%-20s %s" % (target, help))
12 | endef
13 | export PRINT_HELP_PYSCRIPT
14 |
15 | help:
16 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
17 |
18 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
19 |
20 | clean-build: ## remove build artifacts
21 | rm -fr build/
22 | rm -fr dist/
23 | rm -fr .eggs/
24 | find . -name '*.egg-info' -exec rm -fr {} +
25 | find . -name '*.egg' -exec rm -fr {} +
26 |
27 | clean-pyc: ## remove Python file artifacts
28 | find . -name '*.pyc' -exec rm -f {} +
29 | find . -name '*.pyo' -exec rm -f {} +
30 | find . -name '*~' -exec rm -f {} +
31 | find . -name '__pycache__' -exec rm -fr {} +
32 |
33 | clean-test: ## remove test and coverage artifacts
34 | rm -fr .tox/
35 | rm -f .coverage
36 | rm -fr htmlcov/
37 | rm -fr .pytest_cache
38 |
39 | lint: venv ## check style with pre-commit hooks
40 | venv/bin/pre-commit run --all-files
41 |
42 | test: venv ## run tests quickly with the default Python
43 | venv/bin/pytest --xdoc -rx
44 |
45 | test-all: ## run tests on every Python version with tox
46 | venv/bin/tox -p
47 |
48 | coverage: venv ## check code coverage quickly with the default Python
49 | venv/bin/coverage run --source primap2 -m pytest
50 | venv/bin/coverage report -m
51 | venv/bin/coverage html
52 | ls htmlcov/index.html
53 |
54 | clean-docs: venv ## Remove generated parts of documentation, then build docs
55 | . venv/bin/activate ; $(MAKE) -C docs clean
56 | . venv/bin/activate ; $(MAKE) -C docs html
57 |
58 | docs: venv ## generate Sphinx HTML documentation, including API docs
59 | . venv/bin/activate ; $(MAKE) -C docs html
60 |
61 | release: venv dist ## package and upload a release
62 | venv/bin/twine upload --repository primap dist/*
63 |
64 | dist: clean venv ## builds source and wheel package
65 | # because we update the citation info after releasing on github and zenodo but
66 | # before building for pypi, we need to force the correct version.
67 | SETUPTOOLS_SCM_PRETEND_VERSION=0.13.0 venv/bin/python -m build
68 |
69 | install: clean ## install the package to the active Python's site-packages
70 | python setup.py install
71 |
72 | virtual-environment: venv ## setup a virtual environment for development
73 |
74 | venv: requirements_dev.txt setup.cfg
75 | [ -d venv ] || python3 .check_python_version.py
76 | [ -d venv ] || python3 -m venv venv
77 | venv/bin/python -m pip install --upgrade wheel uv
78 | . venv/bin/activate ; venv/bin/uv pip install --upgrade -e .[dev]
79 | touch venv
80 |
81 | update-venv: ## update all packages in the development environment
82 | [ -d venv ] || python3 -m venv venv
83 | venv/bin/python .check_python_version.py
84 | venv/bin/python -m pip install --upgrade wheel uv
85 | . venv/bin/activate ; venv/bin/uv pip install --upgrade --resolution highest -e .[dev]
86 | touch venv
87 |
88 | install-pre-commit: update-venv ## install the pre-commit hooks
89 | venv/bin/pre-commit install
90 |
91 | stubs: venv ## generate directory with xarray stubs with inserted primap2 stubs
92 | rm -rf stubs
93 | mkdir -p stubs
94 | venv/bin/stubgen -p xarray -o stubs
95 | (cd stubs; patch -s -p0 < ../primap-stubs.patch)
96 |
97 | README.md: ## Update the citation information from zenodo
98 | venv/bin/python update_citation_info.py
99 |
--------------------------------------------------------------------------------
/docs/source/api/generate_api_docs.py:
--------------------------------------------------------------------------------
1 | """Generate API docs as we like them.
2 |
3 | autodoc and autosummary-accessors make it possible to use `autosummary` sections that
4 | automatically include functions etc. However, what exactly gets documented needs to
5 | be added manually. This script automates this.
6 | """
7 |
8 | # add new submodules that should be documented here
9 | SUBMODULES_TO_DOCUMENT = ["pm2io", "csg"]
10 |
11 | import primap2
12 |
13 | primap2_top_level_api = [x for x in primap2.__all__ if x not in SUBMODULES_TO_DOCUMENT]
14 | primap2_top_level_api_formatted = "\n".join(f" {x}" for x in sorted(primap2_top_level_api))
15 |
16 | sm_documentation_formatted = []
17 | for sm in SUBMODULES_TO_DOCUMENT:
18 | exec(f"import primap2.{sm}")
19 | sm_top_level_api = getattr(primap2, sm).__all__
20 | sm_top_level_api_formatted = "\n".join(f" {sm}.{x}" for x in sorted(sm_top_level_api))
21 | sm_documentation_formatted.append(f"""
22 | .. _primap2.{sm}:
23 |
24 | primap2.{sm}
25 | {'~'*(len('primap2.') + len(sm))}
26 |
27 | {getattr(primap2, sm).__doc__}
28 |
29 | .. autosummary::
30 | :toctree: generated_{sm}/
31 |
32 | {sm_top_level_api_formatted}
33 | """)
34 |
35 | submodules_documentation_formatted = "\n".join(sm_documentation_formatted)
36 |
37 |
38 | def accessor_attrs_meths(accessor) -> tuple[list[str], list[str]]:
39 | members = dir(accessor)
40 | attrs = []
41 | meths = []
42 | for m in members:
43 | if m.startswith("_") and m != "__getitem__":
44 | continue
45 | if callable(getattr(accessor, m)):
46 | meths.append(m)
47 | else:
48 | attrs.append(m)
49 | return attrs, meths
50 |
51 |
52 | da_pr_attrs, da_pr_meths = accessor_attrs_meths(primap2.accessors.PRIMAP2DataArrayAccessor)
53 | da_pr_attrs_formatted = "\n".join(f" DataArray.pr.{x}" for x in sorted(da_pr_attrs))
54 | da_pr_meths_formatted = "\n".join(f" DataArray.pr.{x}" for x in sorted(da_pr_meths))
55 |
56 | ds_pr_attrs, ds_pr_meths = accessor_attrs_meths(primap2.accessors.PRIMAP2DatasetAccessor)
57 | ds_pr_attrs_formatted = "\n".join(f" Dataset.pr.{x}" for x in sorted(ds_pr_attrs))
58 | ds_pr_meths_formatted = "\n".join(f" Dataset.pr.{x}" for x in sorted(ds_pr_meths))
59 |
60 |
61 | with open("index.rst", "w") as fd:
62 | fd.write(f"""
63 | API
64 | ===
65 | .. currentmodule:: primap2
66 |
67 | Top-level API
68 | -------------
69 |
70 | .. autosummary::
71 | :toctree: generated/
72 |
73 | {primap2_top_level_api_formatted}
74 |
75 |
76 | Submodules
77 | ----------
78 |
79 | {submodules_documentation_formatted}
80 |
81 | .. currentmodule:: xarray
82 |
83 | DataArray
84 | ---------
85 |
86 | .. _da.pr.attributes:
87 |
88 | Attributes
89 | ~~~~~~~~~~
90 |
91 | .. autosummary::
92 | :toctree: generated/
93 | :template: autosummary/accessor_attribute.rst
94 |
95 | {da_pr_attrs_formatted}
96 |
97 | .. _da.pr.methods:
98 |
99 | Methods
100 | ~~~~~~~
101 |
102 | .. autosummary::
103 | :toctree: generated/
104 | :template: autosummary/accessor_method.rst
105 |
106 | {da_pr_meths_formatted}
107 |
108 |
109 | Dataset
110 | -------
111 |
112 | .. _ds.pr.attributes:
113 |
114 | Attributes
115 | ~~~~~~~~~~
116 |
117 | .. autosummary::
118 | :toctree: generated/
119 | :template: autosummary/accessor_attribute.rst
120 |
121 | {ds_pr_attrs_formatted}
122 |
123 | .. _ds.pr.methods:
124 |
125 | Methods
126 | ~~~~~~~
127 |
128 | .. autosummary::
129 | :toctree: generated/
130 | :template: autosummary/accessor_method.rst
131 |
132 | {ds_pr_meths_formatted}
133 | """)
134 |
--------------------------------------------------------------------------------
/docs/source/usage/gas_baskets.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | format_version: 0.13
8 | jupytext_version: 1.16.4
9 | kernelspec:
10 | display_name: Python 3 (ipykernel)
11 | language: python
12 | name: python3
13 | ---
14 |
15 | # Gas baskets
16 |
17 | Gas baskets like `KyotoGHG` are essentially the sum of individual emissions. Usually,
18 | gas baskets are specified in units of warming equivalent CO2, so they necessarily
19 | always have to specify a global warming potential metric as well.
20 |
21 | We offer a few specialized functions for handling gas baskets.
22 |
23 | ## Summation
24 |
25 | To sum the contents of gas baskets , the function
26 | {py:meth}`xarray.Dataset.pr.gas_basket_contents_sum` is available.
27 |
28 | Let's first create an example dataset.
29 |
30 | ```{code-cell} ipython3
31 | import primap2
32 | import xarray as xr
33 | import numpy as np
34 |
35 | # select example dataset
36 | ds = primap2.open_dataset("../minimal_ds.nc").loc[{"time": slice("2000", "2003")}][
37 | ["CH4", "CO2", "SF6"]
38 | ]
39 | ds
40 | ```
41 |
42 | ```{code-cell}
43 | # add (empty) gas basket with corresponding metadata
44 | ds["KyotoGHG (AR4GWP100)"] = xr.full_like(ds["CO2"], np.nan).pr.quantify(units="Gg CO2 / year")
45 | ds["KyotoGHG (AR4GWP100)"].attrs = {"entity": "KyotoGHG", "gwp_context": "AR4GWP100"}
46 |
47 | ds
48 | ```
49 |
50 | Now, we can compute `KyotoGHG` from its contents (assuming for the moment that this
51 | only includes CO$_2$, SF$_6$ and CH$_4$)
52 |
53 | ```{code-cell}
54 | # compute gas basket from its contents, which have to be given explicitly
55 | ds.pr.gas_basket_contents_sum(
56 | basket="KyotoGHG (AR4GWP100)",
57 | basket_contents=["CO2", "SF6", "CH4"],
58 | )
59 | ```
60 |
61 | Note that like all PRIMAP2 functions,
62 | {py:meth}`xarray.Dataset.pr.gas_basket_contents_sum`
63 | returns the result without overwriting anything in the original dataset,
64 | so you have to explicitly overwrite existing data if you want that:
65 |
66 | ```{code-cell}
67 | ds["KyotoGHG (AR4GWP100)"] = ds.pr.gas_basket_contents_sum(
68 | basket="KyotoGHG (AR4GWP100)",
69 | basket_contents=["CO2", "SF6", "CH4"],
70 | )
71 | ```
72 |
73 | ## Filling in missing information
74 |
75 | To fill in missing data in a gas basket, use
76 | {py:meth}`xarray.Dataset.pr.fill_na_gas_basket_from_contents`
77 |
78 | ```{code-cell}
79 | # delete all data about the years 2002-2003 (inclusive) from the
80 | # KyotoGHG data
81 | ds["KyotoGHG (AR4GWP100)"].loc[{"time": slice("2002", "2003")}].pint.magnitude[:] = np.nan
82 | ds["KyotoGHG (AR4GWP100)"]
83 | ```
84 |
85 | ```{code-cell}
86 | ds.pr.fill_na_gas_basket_from_contents(
87 | basket="KyotoGHG (AR4GWP100)", basket_contents=["CO2", "SF6", "CH4"]
88 | )
89 | ```
90 |
91 | The reverse case is that you are missing some data in the timeseries of
92 | individual gases and want to fill those in using downscaled data from
93 | a gas basket.
94 | Here, use
95 | {py:meth}`xarray.Dataset.pr.downscale_gas_timeseries`
96 |
97 | ```{code-cell}
98 | # delete all data about the years 2005-2009 from the individual gas data
99 | sel = {"time": slice("2002", "2003")}
100 | ds["CO2"].loc[sel].pint.magnitude[:] = np.nan
101 | ds["SF6"].loc[sel].pint.magnitude[:] = np.nan
102 | ds["CH4"].loc[sel].pint.magnitude[:] = np.nan
103 | ds
104 | ```
105 |
106 | ```{code-cell}
107 | # This determines gas shares at the points in time where individual gas
108 | # data is available, interpolates the shares where data is missing, and
109 | # then downscales the gas basket data using the interpolated shares
110 | ds.pr.downscale_gas_timeseries(basket="KyotoGHG (AR4GWP100)", basket_contents=["CO2", "SF6", "CH4"])
111 | ```
112 |
--------------------------------------------------------------------------------
/docs/source/data_reading/index.md:
--------------------------------------------------------------------------------
1 | # Data Reading
2 |
3 | To work with emissions data in PRIMAP2 it needs to be converted into the
4 | PRIMAP2 netcdf data format. For the most important datasets we will (in
5 | the future) offer datalad packages that can automatically download and
6 | process the data. But currently and for custom data you will need to do
7 | the conversion yourself.
8 |
9 | ## General information
10 |
11 | The data reading functionality is bundled in the PRIMAP2 submodule {ref}`primap2.pm2io`.
12 |
13 | To enable a wider use of the PRIMAP2 data reading functionality we read all
14 | data into the PRIMAP2 interchange format which is a wide format pandas
15 | DataFrame with coordinates in columns and following PRIMAP2 specifications.
16 | Additional meta data is stored in `DataFrame.attrs`. As the `attrs`
17 | functionality in pandas is experimental it is just stored in the DataFrame
18 | returned by the reading functions and should be stored individually before
19 | doing any processing with the DataFrame.
20 |
21 | The PRIMAP2 interchange format can then be converted into native
22 | PRIMAP2 xarray Datasets.
23 |
24 | For details on data reading see the following sections and example code linked
25 | therein.
26 |
27 | ## Wide csv file
28 |
29 | The function {meth}`primap2.pm2io.read_wide_csv_file_if` reads wide format csv files
30 | which are widely used for emissions data.
31 | All coordinate columns can be defined using dicts
32 | as input including giving default values for coordinates not available in the csv
33 | files.
34 | Data can be filtered for wanted or unwanted coordinate values.
35 |
36 | To illustrate the use of the function we have two examples.
37 | The first example
38 | illustrates the different input parameters using a simple test dataset while
39 | the second example is a real world use of the function reading the PRIMAP-hist
40 | v2.2 dataset into PRIMAP2.
41 |
42 | ```{toctree}
43 | :caption: 'Examples wide csv:'
44 | :maxdepth: 2
45 |
46 | test_data_wide
47 | old-PRIMAP-hist
48 | ```
49 |
50 | ## Long (tidy) csv file
51 |
52 | The function {meth}`primap2.pm2io.read_long_csv_file_if` reads long format CSV files
53 | (also often called tidy CSV files), which are for example used by the FAOstat for
54 | agriculture emissions data.
55 | The input for the function is very similar to the input for
56 | {meth}`primap2.pm2io.read_wide_csv_file_if` described previously, with the difference
57 | mainly that you have to specify the column where to find the data and time information.
58 |
59 | To illustrate the use of the function, we have again an example.
60 | The example just reads in some example data to understand how the function works.
61 |
62 | ```{toctree}
63 | :caption: 'Examples long CSV:'
64 | :maxdepth: 2
65 |
66 | test_data_long
67 | ```
68 |
69 | ## Treatment of string codes
70 |
71 | String codes like "IE", "NA" etc. need to be mapped to numerical values.
72 | The codes have to be interpreted to select if they have to be mapped to 0 or
73 | NaN. For example "IE" stands for "included elsewhere" and thus it has to be
74 | mapped to 0 to show that emissions in this timeseries are 0 and not missing.
75 |
76 | As a default, we use easy rules combined with defined mappings for special cases.
77 | The rules are as follows and each data point is tested against the rules in the same order as below.
78 |
79 | - If the code contains `FX` it is mapped to `np.nan`
80 | - If the code contains `IE` and/or `NO` it is mapped to 0
81 | - If the code contains `NE` and/or `NA` but neither `IE` nor `NO`, it is mapped to `np.nan`.
82 |
83 | The special cases are
84 |
85 | ```python
86 | _special_codes = {
87 | "C": np.nan,
88 | "CC": np.nan,
89 | "CH4": np.nan, # TODO: move to user passed codes in CRT reading
90 | "nan": np.nan,
91 | "NaN": np.nan,
92 | "-": 0,
93 | "NE0": np.nan,
94 | "NE(1)": np.nan,
95 | "": np.nan,
96 | "FX": np.nan,
97 | }
98 | ```
99 |
100 | `NaN` and `nan` will be detected as `np.nan`.
101 |
102 | Users can define custom rules by assigning a dict in the format of `_special_rules`
103 | to the `convert_str` parameter.
104 |
105 | ## Further formats
106 |
107 | In the future we will offer data reading functions for further formats.
108 | Information will be added here.
109 |
--------------------------------------------------------------------------------
/docs/source/api/index.rst:
--------------------------------------------------------------------------------
1 |
2 | API
3 | ===
4 | .. currentmodule:: primap2
5 |
6 | Top-level API
7 | -------------
8 |
9 | .. autosummary::
10 | :toctree: generated/
11 |
12 | Not
13 | ProcessingStepDescription
14 | TimeseriesProcessingDescription
15 | accessors
16 | open_dataset
17 | ureg
18 |
19 |
20 | Submodules
21 | ----------
22 |
23 |
24 | .. _primap2.pm2io:
25 |
26 | primap2.pm2io
27 | ~~~~~~~~~~~~~
28 |
29 | Data reading module of the PRIMAP2 climate policy analysis package.
30 |
31 | .. autosummary::
32 | :toctree: generated_pm2io/
33 |
34 | pm2io.convert_long_dataframe_if
35 | pm2io.convert_wide_dataframe_if
36 | pm2io.from_interchange_format
37 | pm2io.nir_add_unit_information
38 | pm2io.nir_convert_df_to_long
39 | pm2io.read_interchange_format
40 | pm2io.read_long_csv_file_if
41 | pm2io.read_wide_csv_file_if
42 | pm2io.write_interchange_format
43 |
44 |
45 | .. _primap2.csg:
46 |
47 | primap2.csg
48 | ~~~~~~~~~~~
49 |
50 |
51 | Composite Source Generator
52 |
53 | Generate a composite harmonized dataset from multiple sources according to defined
54 | source priorities and matching algorithms.
55 |
56 |
57 | .. autosummary::
58 | :toctree: generated_csg/
59 |
60 | csg.FitParameters
61 | csg.GlobalLSStrategy
62 | csg.LocalTrendsStrategy
63 | csg.PriorityDefinition
64 | csg.StrategyDefinition
65 | csg.StrategyUnableToProcess
66 | csg.SubstitutionStrategy
67 | csg.compose
68 | csg.create_composite_source
69 |
70 |
71 | .. currentmodule:: xarray
72 |
73 | DataArray
74 | ---------
75 |
76 | .. _da.pr.attributes:
77 |
78 | Attributes
79 | ~~~~~~~~~~
80 |
81 | .. autosummary::
82 | :toctree: generated/
83 | :template: autosummary/accessor_attribute.rst
84 |
85 | DataArray.pr.dim_alias_translations
86 | DataArray.pr.gwp_context
87 | DataArray.pr.loc
88 |
89 | .. _da.pr.methods:
90 |
91 | Methods
92 | ~~~~~~~
93 |
94 | .. autosummary::
95 | :toctree: generated/
96 | :template: autosummary/accessor_method.rst
97 |
98 | DataArray.pr.__getitem__
99 | DataArray.pr.add_aggregates_coordinates
100 | DataArray.pr.any
101 | DataArray.pr.combine_first
102 | DataArray.pr.convert
103 | DataArray.pr.convert_to_gwp
104 | DataArray.pr.convert_to_gwp_like
105 | DataArray.pr.convert_to_mass
106 | DataArray.pr.count
107 | DataArray.pr.coverage
108 | DataArray.pr.dequantify
109 | DataArray.pr.downscale_timeseries
110 | DataArray.pr.fill_all_na
111 | DataArray.pr.fillna
112 | DataArray.pr.merge
113 | DataArray.pr.quantify
114 | DataArray.pr.set
115 | DataArray.pr.sum
116 | DataArray.pr.to_df
117 |
118 |
119 | Dataset
120 | -------
121 |
122 | .. _ds.pr.attributes:
123 |
124 | Attributes
125 | ~~~~~~~~~~
126 |
127 | .. autosummary::
128 | :toctree: generated/
129 | :template: autosummary/accessor_attribute.rst
130 |
131 | Dataset.pr.comment
132 | Dataset.pr.contact
133 | Dataset.pr.dim_alias_translations
134 | Dataset.pr.entity_terminology
135 | Dataset.pr.institution
136 | Dataset.pr.loc
137 | Dataset.pr.publication_date
138 | Dataset.pr.references
139 | Dataset.pr.rights
140 | Dataset.pr.title
141 |
142 | .. _ds.pr.methods:
143 |
144 | Methods
145 | ~~~~~~~
146 |
147 | .. autosummary::
148 | :toctree: generated/
149 | :template: autosummary/accessor_method.rst
150 |
151 | Dataset.pr.__getitem__
152 | Dataset.pr.add_aggregates_coordinates
153 | Dataset.pr.add_aggregates_variables
154 | Dataset.pr.any
155 | Dataset.pr.combine_first
156 | Dataset.pr.count
157 | Dataset.pr.coverage
158 | Dataset.pr.dequantify
159 | Dataset.pr.downscale_gas_timeseries
160 | Dataset.pr.downscale_timeseries
161 | Dataset.pr.ensure_valid
162 | Dataset.pr.expand_dims
163 | Dataset.pr.fill_all_na
164 | Dataset.pr.fill_na_gas_basket_from_contents
165 | Dataset.pr.fillna
166 | Dataset.pr.gas_basket_contents_sum
167 | Dataset.pr.has_processing_info
168 | Dataset.pr.merge
169 | Dataset.pr.quantify
170 | Dataset.pr.remove_processing_info
171 | Dataset.pr.set
172 | Dataset.pr.sum
173 | Dataset.pr.to_df
174 | Dataset.pr.to_interchange_format
175 | Dataset.pr.to_netcdf
176 |
--------------------------------------------------------------------------------
/docs/source/data_reading/test_data_long.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | text_representation:
4 | extension: .md
5 | format_name: myst
6 | format_version: 0.13
7 | jupytext_version: 1.16.4
8 | kernelspec:
9 | display_name: Python 3 (ipykernel)
10 | language: python
11 | name: python3
12 | ---
13 |
14 | # Data reading example 3 - minimal test dataset (long)
15 | To run this example the file `test_csv_data_long.csv` must be placed in the same folder as this notebook.
16 | You can find the notebook and the csv file in the folder `docs/source/data_reading` in the PRIMAP2 repository.
17 |
18 | ```{code-cell} ipython3
19 | # imports
20 | import primap2 as pm2
21 | ```
22 |
23 | ## Dataset Specifications
24 | Here we define which columns of the csv file contain the metadata.
25 | The dict `coords_cols` contains the mapping of csv columns to PRIMAP2 dimensions.
26 | Default values not found in the CSV are set using `coords_defaults`.
27 | The terminologies (e.g. IPCC2006 for categories or the ISO3 country codes for area) are set in the `coords_terminologies` dict.
28 | `coords_value_mapping` defines conversion of metadata values, e.g. category codes.
29 | You can either specify a dict for a metadata column which directly defines the mapping, a function which is used to map metadata values, or a string to select one of the pre-defined functions included in PRIMAP2.
30 | `filter_keep` and `filter_remove` filter the input data.
31 | Each entry in `filter_keep` specifies a subset of the input data which is kept while the subsets defined by `filter_remove` are removed from the input data.
32 |
33 | In the example, the CSV contains the coordinates `country`, `category`, `gas`, and `year`.
34 | They are translated into their proper PRIMAP2 names by specifying the in the
35 | `coords_cols` dictionary. Additionally, columns are specified for the `unit`, and
36 | for the actual `data` (which is found in the column `emissions` in the CSV file).
37 | The format used in the `year` column is given using the `time_format` argument.
38 | Values for the `scenario` and `source` coordinate is not available in the csv file;
39 | therefore, we add them using default values defined in `coords_defaults`.
40 | Terminologies are given for `area`, `category`, `scenario`, and the secondary categories.
41 | Providing these terminologies is mandatory to create a valid PRIMAP2 dataset.
42 |
43 | Coordinate mapping is necessary for `category`, `entity`, and `unit`.
44 | They all use the PRIMAP1 specifications in the csv file.
45 | For `category` this means that e.g. `IPC1A2` would be converted to `1.A.2` for `entity` the conversion affects the way GWP information is stored in the entity name: e.g. `KYOTOGHGAR4` is mapped to `KYOTOGHG (AR4GWP100)`.
46 |
47 | In this example, we also add `meta_data` to add a reference for the data and usage rights.
48 |
49 | ```{code-cell} ipython3
50 | file = "test_csv_data_long.csv"
51 | coords_cols = {
52 | "unit": "unit",
53 | "entity": "gas",
54 | "area": "country",
55 | "category": "category",
56 | "time": "year",
57 | "data": "emissions",
58 | }
59 | coords_defaults = {
60 | "source": "TESTcsv2021",
61 | "scenario": "HISTORY",
62 | }
63 | coords_terminologies = {
64 | "area": "ISO3",
65 | "category": "IPCC2006",
66 | "scenario": "general",
67 | }
68 | coords_value_mapping = {
69 | "category": "PRIMAP1",
70 | "entity": "PRIMAP1",
71 | "unit": "PRIMAP1",
72 | }
73 | meta_data = {
74 | "references": "Just ask around.",
75 | "rights": "public domain",
76 | }
77 | data_if = pm2.pm2io.read_long_csv_file_if(
78 | file,
79 | coords_cols=coords_cols,
80 | coords_defaults=coords_defaults,
81 | coords_terminologies=coords_terminologies,
82 | coords_value_mapping=coords_value_mapping,
83 | meta_data=meta_data,
84 | time_format="%Y",
85 | )
86 | data_if.head()
87 | ```
88 |
89 | ```{code-cell} ipython3
90 | data_if.attrs
91 | ```
92 |
93 | ## Transformation to PRIMAP2 xarray format
94 | The transformation to PRIMAP2 xarray format is done using the function {meth}`primap2.pm2io.from_interchange_format` which takes an interchange format DataFrame.
95 | The resulting xr Dataset is already quantified, thus the variables are pint arrays which include a unit.
96 |
97 | ```{code-cell} ipython3
98 | data_pm2 = pm2.pm2io.from_interchange_format(data_if)
99 | data_pm2
100 | ```
101 |
--------------------------------------------------------------------------------
/docs/source/data_reading/test_data_wide.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | text_representation:
4 | extension: .md
5 | format_name: myst
6 | format_version: 0.13
7 | jupytext_version: 1.16.4
8 | kernelspec:
9 | display_name: Python 3 (ipykernel)
10 | language: python
11 | name: python3
12 | ---
13 |
14 | # Data reading example 1 - minimal test dataset
15 | To run this example the file `test_csv_data_sec_cat.csv` must be placed in the same folder as this notebook. You can find the notebook and the csv file in the folder `docs/data_reading_examples` in the PRIMAP2 repository.
16 |
17 | ```{code-cell} ipython3
18 | import primap2 as pm2
19 | ```
20 |
21 | ## Dataset Specifications
22 | Here we define which columns of the csv file contain the metadata. The dict `coords_cols` contains the mapping of csv columns to PRIMAP2 dimensions.
23 | Default values are set using `coords_defaults`.
24 | The terminologies (e.g. IPCC2006 for categories or the ISO3 country codes for area) are set in the `coords_terminologies` dict.
25 | `coords_value_mapping` defines conversion of metadata values, e.g. category codes.
26 | You can either specify a dict for a metadata column which directly defines the mapping, a function which is used to map metadata values, or a string to select one of the pre-defined functions included in PRIMAP2.
27 | `filter_keep` and `filter_remove` filter the input data.
28 | Each entry in `filter_keep` specifies a subset of the input data which is kept while the subsets defined by `filter_remove` are removed from the input data.
29 |
30 | For details, we refer to the documentation of {py:func}`primap2.pm2io.read_wide_csv_file_if`.
31 |
32 | In the example, the CSV contains the coordinates `entity`, `area`, `category`, and the secondary category `class`.
33 | As secondary categories have free names, they are prefixed with `sec_cats__` to make clear that it is a secondary category.
34 | Values for the secondary category `type`, and the `scenario` coordinate is not available in the csv file;
35 | therefore, we add them using default values defined in `coords_defaults`.
36 | Terminologies are given for `area`, `category`, `scenario`, and the secondary categories.
37 | Providing these terminologies is mandatory to create a valid PRIMAP2 dataset.
38 |
39 | Coordinate mapping is necessary for `category`, `entity`, and `unit`.
40 | They all use the PRIMAP1 specifications in the csv file.
41 | For `category` this means that e.g. `IPC1A2` would be converted to `1.A.2` for `entity` the conversion affects the way GWP information is stored in the entity name: e.g. `KYOTOGHGAR4` is mapped to `KYOTOGHG (AR4GWP100)`.
42 |
43 | In this example, we also add `meta_data` to add a reference for the data and usage rights.
44 |
45 | For examples on using filters we refer to the [second example which reads the PRIMAP-hist data](./old-PRIMAP-hist).
46 |
47 | ```{code-cell} ipython3
48 | file = "test_csv_data_sec_cat.csv"
49 | coords_cols = {
50 | "unit": "unit",
51 | "entity": "gas",
52 | "area": "country",
53 | "category": "category",
54 | "sec_cats__Class": "classification",
55 | }
56 | coords_defaults = {
57 | "source": "TESTcsv2021",
58 | "sec_cats__Type": "fugitive",
59 | "scenario": "HISTORY",
60 | }
61 | coords_terminologies = {
62 | "area": "ISO3",
63 | "category": "IPCC2006",
64 | "sec_cats__Type": "type",
65 | "sec_cats__Class": "class",
66 | "scenario": "general",
67 | }
68 | coords_value_mapping = {
69 | "category": "PRIMAP1",
70 | "entity": "PRIMAP1",
71 | "unit": "PRIMAP1",
72 | }
73 | meta_data = {
74 | "references": "Just ask around.",
75 | "rights": "public domain",
76 | }
77 | data_if = pm2.pm2io.read_wide_csv_file_if(
78 | file,
79 | coords_cols=coords_cols,
80 | coords_defaults=coords_defaults,
81 | coords_terminologies=coords_terminologies,
82 | coords_value_mapping=coords_value_mapping,
83 | meta_data=meta_data,
84 | )
85 | data_if.head()
86 | ```
87 |
88 | ```{code-cell} ipython3
89 | data_if.attrs
90 | ```
91 |
92 | ## Transformation to PRIMAP2 xarray format ##
93 | The transformation to PRIMAP2 xarray format is done using the function {meth}`primap2.pm2io.from_interchange_format` which takes an interchange format DataFrame.
94 | The resulting xr Dataset is already quantified, thus the variables are pint arrays which include a unit.
95 |
96 | ```{code-cell} ipython3
97 | data_pm2 = pm2.pm2io.from_interchange_format(data_if)
98 | data_pm2
99 | ```
100 |
--------------------------------------------------------------------------------
/docs/source/data_format/interchange_format_details.md:
--------------------------------------------------------------------------------
1 | # Interchange format details
2 |
3 | The interchange format consists of a wide tabular data object and an additional
4 | dictionary carrying the meta data.
5 |
6 | In memory, the tabular data object is a pandas DataFrame and the meta data object
7 | is a python dictionary.
8 | For storage, the tabular data is written to a CSV file and the meta data is written
9 | to a YAML file.
10 |
11 | ## Tabular data
12 |
13 | The data is stored in a wide format.
14 | Each row is a time series.
15 | The columns list first all coordinate values for the time series, then the time points.
16 | An example table representation is:
17 |
18 | | area (ISO3) | category (IPCC2006) | entity (primap2) | unit | 2000 | 2001 | 2002 | 2003 |
19 | |-------------|---------------------|------------------|-----------------|------|------|------|------|
20 | | "COL" | "1" | "CO2" | "Gg CO2 / year" | 2.3 | 2.2 | 2.0 | 1.9 |
21 | | "COL" | "2" | "CO2" | "Gg CO2 / year" | 1.5 | 1.6 | 1.3 | 1.2 |
22 |
23 | Specifically, the columns consist of:
24 |
25 | - All dimensions except the `time` defined on the Dataset as defined in
26 | {ref}`data_format_details`,
27 | including the category set (terminology) in brackets as in the standard data format.
28 | - The entity (with its terminology in brackets, if an entity terminology is defined
29 | for the dataset), which is used to store the data variable name. The full variable
30 | name including the global warming potential if applicable is used here.
31 | - The unit in a format which can be parsed by openscm-units.
32 | - One column per value in the `time` dimension of the Dataset, formatted according
33 | to the `time_format` strftime format string given in the meta data (see below).
34 |
35 | The strictly tabular data format makes it possible to read the data e.g. into Excel,
36 | but imposes multiple inefficiencies:
37 |
38 | - In PRIMAP2 data sets, the unit is identical for all time series of the same entity.
39 | Still, the unit is stored for each time series.
40 | - In PRIMAP2 data sets, not all entities use all dimensions. For example, population
41 | data might be given together with emissions data, but only the emissions data use
42 | categories. However, the tabular format imposes to store all entities with the same
43 | dimensions. Therefore, the dimensions that each entity uses are listed in the
44 | meta data (see below) and dimensions which are not used for the entity are denoted
45 | with an empty string in the tabular data.
46 |
47 | ## Meta Data
48 |
49 | To correctly interpret the tabular data, meta data is necessary.
50 | The meta data is a dictionary with the following keys:
51 |
52 | | key | data type | meaning |
53 | |------------------------|-----------|----------------------------------------------------------------------------------|
54 | | attrs | dict | The `attrs` dictionary of the dataset as defined in {ref}`data_format_details` |
55 | | data_file | str | The relative path to the CSV data file (only when stored, not in-memory) |
56 | | dimensions | dict | Mapping of the entities to a list of the dimensions used by them |
57 | | time_format | str | strftime style time format string used for the time columns |
58 | | additional_coordinates | dict | Mapping of additional coordinate entities to the associated dimension (optional) |
59 | | dtypes | dict | Mapping of non-float entities to their data type (optional) |
60 |
61 | In the `dimensions` dictionary, the keys are entities as given in the tabular data in
62 | the entity column. The values are lists of column names as used in the tabular data,
63 | i.e. including the terminology.
64 | To avoid repeating dimension information for many entities with the same dimensions,
65 | it is possible to use `*` as the entity name in the dimensions dict, which will be used
66 | as a default for all entities not explicitly listed.
67 | Dimension information has to be given for all entities, i.e. if no default dimensions
68 | are specified using `*`, there has to exist an entry in the dimensions dict for each
69 | unique value in the entity column in the tabular data.
70 |
71 | ## On-disk format details
72 |
73 | ### CSV file
74 |
75 | Numeric values are given unquoted and string values are quoted with `"`.
76 | Missing information is denoted by an empty string `""`.
77 |
78 | ### YAML file
79 |
80 | All keys have to be sorted alphabetically.
81 |
--------------------------------------------------------------------------------
/docs/source/data_reading/old-PRIMAP-hist.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | text_representation:
4 | extension: .md
5 | format_name: myst
6 | format_version: 0.13
7 | jupytext_version: 1.16.4
8 | kernelspec:
9 | display_name: Python 3 (ipykernel)
10 | language: python
11 | name: python3
12 | ---
13 |
14 | # Data reading example 2 - PRIMAP-hist v2.2 #
15 |
16 | In this example, we read an old version of PRIMAP-hist which is not available in the
17 | native format because it was produced before the native format was invented.
18 |
19 | ```{code-cell} ipython3
20 | # imports
21 | import primap2 as pm2
22 | ```
23 |
24 | ## Obtain the input data
25 |
26 | The PRIMAP-hist data (doi:10.5281/zenodo.4479172) is available [from Zenodo](https://zenodo.org/record/4479172),
27 | we download it directly.
28 |
29 | ```{code-cell} ipython3
30 | import requests
31 | response = requests.get("https://zenodo.org/records/4479172/files/PRIMAP-hist_v2.2_19-Jan-2021.csv?download=1")
32 | file = "PRIMAPHIST22__19-Jan-2021.csv"
33 | with open(file, "w") as fd:
34 | fd.write(response.text)
35 | ```
36 |
37 | ## Dataset Specifications
38 | Here we define which columns of the csv file contain the coordinates.
39 | The dict `coords_cols` contains the mapping of csv columns to PRIMAP2 dimensions.
40 | Default values are set using `coords_defaults`.
41 | The terminologies (e.g. IPCC2006 for categories or the ISO3 country codes for area) are set in the `coords_terminologies` dict.
42 | `coords_value_mapping` defines conversion of metadata values, e.g. category codes.
43 | `filter_keep` and `filter_remove` filter the input data.
44 | Each entry in `filter_keep` specifies a subset of the input data which is kept while the subsets defined by `filter_remove` are removed from the input data.
45 |
46 | For details, we refer to the documentation of {py:func}`primap2.pm2io.read_wide_csv_file_if`.
47 |
48 | ```{code-cell} ipython3
49 | coords_cols = {
50 | "unit": "unit",
51 | "entity": "entity",
52 | "area": "country",
53 | "scenario": "scenario",
54 | "category": "category",
55 | }
56 | coords_defaults = {
57 | "source": "PRIMAP-hist_v2.2",
58 | }
59 | coords_terminologies = {
60 | "area": "ISO3",
61 | "category": "IPCC2006",
62 | "scenario": "PRIMAP-hist",
63 | }
64 |
65 | coords_value_mapping = {
66 | "category": "PRIMAP1",
67 | "unit": "PRIMAP1",
68 | "entity": "PRIMAP1",
69 | }
70 |
71 | filter_keep = {
72 | "f1": {
73 | "entity": "CO2",
74 | "category": ["IPC2", "IPC1"],
75 | "country": ["AUS", "BRA", "CHN", "GBR", "AFG"],
76 | },
77 | "f2": {
78 | "entity": "KYOTOGHG",
79 | "category": ["IPCMAG", "IPC4"],
80 | "country": ["AUS", "BRA", "CHN", "GBR", "AFG"],
81 | },
82 | }
83 |
84 | filter_remove = {"f1": {"scenario": "HISTTP"}}
85 | # filter_keep = {"f1": {"entity": "KYOTOGHG", "category": ["IPC2", "IPC1"]},}
86 | # filter_keep = {}
87 | # filter_remove = {}
88 |
89 | meta_data = {"references": "doi:10.5281/zenodo.4479172"}
90 | ```
91 |
92 | ## Reading the data to interchange format
93 | To enable a wider use of the PRIMAP2 data reading functionality we read into the PRIMAP2 interchange format, which is a wide format pandas DataFrame with coordinates in columns and following PRIMAP2 specifications.
94 | Additional metadata not captured in this format are stored in `DataFrame.attrs` as a dictionary.
95 | As the `attrs` functionality in pandas is experimental it is just stored in the DataFrame returned by the reading functions and should be stored individually before doing any processing with the DataFrame.
96 |
97 | Here we read the data using the {meth}`primap2.pm2io.read_wide_csv_file_if` function.
98 | We have specified restrictive filters above to limit the data included in this notebook.
99 |
100 | ```{code-cell} ipython3
101 | PMH_if = pm2.pm2io.read_wide_csv_file_if(
102 | file,
103 | coords_cols=coords_cols,
104 | coords_defaults=coords_defaults,
105 | coords_terminologies=coords_terminologies,
106 | coords_value_mapping=coords_value_mapping,
107 | filter_keep=filter_keep,
108 | filter_remove=filter_remove,
109 | meta_data=meta_data,
110 | )
111 | PMH_if.head()
112 | ```
113 |
114 | ```{code-cell} ipython3
115 | PMH_if.attrs
116 | ```
117 |
118 | ## Transformation to PRIMAP2 xarray format
119 | The transformation to PRIMAP2 xarray format is done using the function {meth}`primap2.pm2io.from_interchange_format` which takes an interchange format DataFrame.
120 | The resulting xr Dataset is already quantified, thus the variables are pint arrays which include a unit.
121 |
122 | ```{code-cell} ipython3
123 | PMH_pm2 = pm2.pm2io.from_interchange_format(PMH_if)
124 | PMH_pm2
125 | ```
126 |
--------------------------------------------------------------------------------
/docs/source/usage/add_and_overwrite.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | format_version: 0.13
8 | jupytext_version: 1.16.4
9 | kernelspec:
10 | display_name: Python 3 (ipykernel)
11 | language: python
12 | name: python3
13 | ---
14 |
15 | # Add and overwrite data
16 |
17 | Generally, datasets in primap2 follow the xarray convention that the data within
18 | datasets is immutable.
19 | To change any data, you need to create a view or a copy of the dataset with the changes
20 | applied.
21 | To this end, we provide a `set` function to set specified data.
22 | It can be used to only fill gaps, add wholly new data, or overwrite existing data in
23 | the dataset.
24 |
25 | ## The `set` functions
26 |
27 | We provide {py:meth}`xarray.DataArray.pr.set` and {py:meth}`xarray.Dataset.pr.set` functions,
28 | for `DataArray`s (individual gases) and `Dataset`s (multiple gases), respectively.
29 |
30 | The basic signature of the `set` functions is `set(dimension, keys, values)`, and it
31 | returns the changed object without changing the original one.
32 | Use it like this:
33 |
34 | ```{code-cell}
35 | # setup: import library and open dataset
36 | import primap2
37 |
38 | ds_min = primap2.open_dataset("../minimal_ds.nc")
39 |
40 | # Now, select CO2 a slice of the CO2 data as an example to use
41 | da = ds_min["CO2"].loc[{"time": slice("2000", "2005")}]
42 | da
43 | ```
44 |
45 | ```{code-cell}
46 | import numpy as np
47 |
48 | from primap2 import ureg
49 |
50 | # generate new data for Cuba
51 | new_data_cuba = np.linspace(0, 20, 6) * ureg("Gg CO2 / year")
52 |
53 | # Actually modify our original data
54 | modified = da.pr.set("area", "CUB", new_data_cuba)
55 | modified
56 | ```
57 |
58 | By default, existing non-NaN values are not overwritten:
59 |
60 | ```{code-cell}
61 | try:
62 | da.pr.set("area", "COL", np.linspace(0, 20, 6) * ureg("Gg CO2 / year"))
63 | except ValueError as err:
64 | print(err)
65 | ```
66 |
67 | You can overwrite existing values by specifying `existing="overwrite"`
68 | to overwrite all values or `existing="fillna"` to overwrite only NaNs.
69 |
70 | ```{code-cell}
71 | da.pr.set(
72 | "area",
73 | "COL",
74 | np.linspace(0, 20, 6) * ureg("Gg CO2 / year"),
75 | existing="overwrite",
76 | )
77 | ```
78 |
79 | By default, the `set()` function extends the specified dimension automatically to
80 | accommodate new values if not all key values are in the specified dimension yet.
81 | You can change this by specifying `new="error"`, which will raise a KeyError if any of
82 | the keys is not found:
83 |
84 | ```{code-cell}
85 | try:
86 | da.pr.set(
87 | "area",
88 | ["COL", "CUB"],
89 | np.linspace(0, 20, 6) * ureg("Gg CO2 / year"),
90 | existing="overwrite",
91 | new="error",
92 | )
93 | except KeyError as err:
94 | print(err)
95 | ```
96 |
97 | ## Example: computing super-categories
98 |
99 | In particular, the `set()` functions can also be used with xarray's arithmetic
100 | functions to derive values from existing data and store the result in the Dataset.
101 | As an example, we will derive better values for category 0 by adding all
102 | its subcategories and store the result.
103 |
104 | First, let's load a dataset and see the current data for a small subset of the data:
105 |
106 | ```{code-cell}
107 | ds = primap2.open_dataset("../opulent_ds.nc")
108 |
109 | sel = {
110 | "area": "COL",
111 | "category": ["0", "1", "2", "3", "4", "5"],
112 | "animal": "cow",
113 | "product": "milk",
114 | "scenario": "highpop",
115 | "source": "RAND2020",
116 | }
117 | subset = ds.pr.loc[sel].squeeze()
118 |
119 | # TODO: currently, plotting with units still emits a warning
120 | import warnings
121 |
122 | with warnings.catch_warnings():
123 | warnings.simplefilter("ignore")
124 | subset["CO2"].plot.line(x="time", hue="category (IPCC 2006)")
125 | ```
126 |
127 | While it is hard to see any details in this plot, it is clearly visible
128 | that category 0 is not the sum of the other categories (which should not
129 | come as a surprise because the data were generated at random).
130 |
131 | We will now recompute category 0 for the entire dataset using set():
132 |
133 | ```{code-cell}
134 | cat0_new = ds.pr.loc[{"category": ["1", "2", "3", "4", "5"]}].pr.sum("category")
135 |
136 | ds = ds.pr.set(
137 | "category",
138 | "0",
139 | cat0_new,
140 | existing="overwrite",
141 | )
142 |
143 | # plot a small subset of the result
144 | subset = ds.pr.loc[sel].squeeze()
145 | # TODO: currently, plotting with units still emits a warning
146 | import warnings
147 |
148 | with warnings.catch_warnings():
149 | warnings.simplefilter("ignore")
150 | subset["CO2"].plot.line(x="time", hue="category (IPCC 2006)")
151 | ```
152 |
153 | As you can see in the plot, category 0 is now computed from its subcategories.
154 | The set() method of Datasets works on all data variables in the dataset which
155 | have the corresponding dimension. In this example, the "population" variable
156 | does not have categories, so it was unchanged.
157 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | """
2 | Configuration file for the Sphinx documentation builder.
3 |
4 | For the full list of built-in configuration values, see the documentation:
5 | https://www.sphinx-doc.org/en/master/usage/configuration.html
6 | """
7 |
8 | import sphinx_autosummary_accessors
9 |
10 | # -- Project information -----------------------------------------------------
11 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
12 |
13 | project = "primap2"
14 | # put the authors in their own variable, so they can be reused later
15 | author = "Mika Pflüger and Johannes Gütschow"
16 | copyright = "2021-2023: Potsdam Institute for Climate Impact Research; 2023-2024: Climate Resource"
17 | language = "en"
18 |
19 | # -- General configuration ---------------------------------------------------
20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
21 |
22 | extensions = [
23 | # Generate an API documentation automatically from docstrings
24 | "sphinx.ext.autodoc",
25 | # Numpy-style docstrings
26 | "numpydoc",
27 | # Better summaries for API docs
28 | "sphinx.ext.autosummary",
29 | # also for our xarray accessor
30 | "sphinx_autosummary_accessors",
31 | # jupytext rendered notebook support (also loads myst_parser)
32 | "myst_nb",
33 | # links to other docs
34 | "sphinx.ext.intersphinx",
35 | # add source code to docs
36 | "sphinx.ext.viewcode",
37 | # add copy code button to code examples
38 | "sphinx_copybutton",
39 | # math support
40 | "sphinx.ext.mathjax",
41 | # nicer tooltips
42 | "sphinx_tippy",
43 | # better favicons
44 | "sphinx_favicon",
45 | ]
46 |
47 | # general sphinx settings
48 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
49 | add_module_names = True
50 | # Add templates for sphinx autosummary accessors
51 | templates_path = ["_templates", sphinx_autosummary_accessors.templates_path]
52 | # Stop sphinx doing funny things with byte order markers
53 | source_encoding = "utf-8"
54 |
55 | # autodoc type hints settings
56 | # https://github.com/tox-dev/sphinx-autodoc-typehints
57 | # include full name of classes when expanding type hints?
58 | typehints_fully_qualified = True
59 | # Add rtype directive if needed
60 | typehints_document_rtype = True
61 | # Put the return type as part of the return documentation
62 | typehints_use_rtype = False
63 |
64 | # Generate autosummary stubs automatically
65 | autosummary_generate = True
66 |
67 |
68 | # Generate the necessary config for the API documentation
69 | def generate_api_docs(app):
70 | import subprocess
71 | import pathlib
72 |
73 | subprocess.run(
74 | ["python3", "generate_api_docs.py"],
75 | cwd=pathlib.Path(__file__).parent / "api",
76 | check=True,
77 | )
78 |
79 |
80 | def setup(app):
81 | app.connect("builder-inited", generate_api_docs)
82 |
83 |
84 | # Nicer formatting for numpydoc
85 | numpydoc_class_members_toctree = False
86 |
87 | # Left-align maths equations
88 | mathjax3_config = {"chtml": {"displayAlign": "center"}}
89 |
90 | # myst configuration
91 | myst_enable_extensions = ["amsmath", "dollarmath"]
92 | nb_execution_mode = "cache"
93 | nb_execution_raise_on_error = True
94 | nb_execution_show_tb = True
95 | nb_execution_timeout = 120
96 |
97 | # -- Options for HTML output -------------------------------------------------
98 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
99 |
100 | # Pick your theme for html output
101 | html_theme = "sphinx_book_theme"
102 | html_static_path = ["_static"]
103 | html_theme_options = {
104 | "repository_url": "https://github.com/primap-community/primap2/",
105 | "repository_branch": "main",
106 | "path_to_docs": "docs/source",
107 | "use_repository_button": True,
108 | "use_issues_button": True,
109 | "use_edit_page_button": True,
110 | "logo": {
111 | "text": "primap2 documentation",
112 | "image_light": "_static/primap_logo_transparent.png",
113 | "image_dark": "_static/primap_logo_transparent_dark.png",
114 | },
115 | }
116 | html_context = {
117 | # dark mode of sphinx-book-theme doesn't play nicely with xarray
118 | "default_mode": "light",
119 | }
120 |
121 | # favicon
122 | favicons = [
123 | {"href": "favicon-48x48.png", "sizes": "48x48"},
124 | {"href": "favicon.svg"},
125 | {"href": "favicon.ico", "rel": "shortcut icon"},
126 | {"href": "apple-touch-icon.png", "rel": "apple-touch-icon", "sizes": "180x180"},
127 | {"href": "site.webmanifest", "rel": "manifest"},
128 | ]
129 |
130 | # Intersphinx mapping
131 | intersphinx_mapping = {
132 | "numpy": ("https://numpy.org/doc/stable", None),
133 | "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
134 | "python": ("https://docs.python.org/3", None),
135 | "pyam": ("https://pyam-iamc.readthedocs.io/en/latest", None),
136 | "scmdata": ("https://scmdata.readthedocs.io/en/latest", None),
137 | "xarray": ("https://docs.xarray.dev/en/stable", None),
138 | "pint": (
139 | "https://pint.readthedocs.io/en/latest",
140 | None,
141 | ),
142 | "scipy": ("https://docs.scipy.org/doc/scipy/", None),
143 | "climate_categories": (
144 | "https://climate-categories.readthedocs.io/en/latest",
145 | None,
146 | ),
147 | }
148 |
--------------------------------------------------------------------------------
/primap2/tests/test_conversion.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import primap2.pm2io as pm2io
4 | import primap2.pm2io._conversion # noqa: F401
5 |
6 |
7 | class TestIPCCCodePrimapToPrimap2:
8 | @pytest.mark.parametrize(
9 | "code_in, expected_code_out",
10 | [
11 | ("IPC1A", "1.A"),
12 | ("CATM0EL", "M.0.EL"),
13 | ("IPC1A1B23", "1.A.1.b.ii.3"),
14 | ("1A1Bii3", "1.A.1.b.ii.3"),
15 | ("IPC_1.A.1.B.ii.3", "1.A.1.b.ii.3"),
16 | ("IPCM1B1C", "M.1.B.1.c"),
17 | ("M.1.B.1.C", "M.1.B.1.c"),
18 | ("M.1.B.1.C.", "M.1.B.1.c"),
19 | ("M1B1C", "M.1.B.1.c"),
20 | ],
21 | )
22 | def test_working(self, code_in, expected_code_out):
23 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2(code_in) == expected_code_out
24 |
25 | def test_too_short(self, caplog):
26 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC") == "error_IPC"
27 | assert "WARNING" in caplog.text
28 | assert "Too short to be a PRIMAP IPCC code after removal of prefix." in caplog.text
29 |
30 | def test_wrong_format(self, caplog):
31 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPD1A") == "error_IPD1A"
32 | assert "WARNING" in caplog.text
33 | # assert (
34 | # "Prefix is missing or unknown, known codes are 'IPC' and 'CAT'. "
35 | # "Assuming no code is present." in caplog.text
36 | # )
37 | assert "No digit found on first level." in caplog.text
38 |
39 | def test_end_after_m(self, caplog):
40 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPCM") == "error_IPCM"
41 | assert "WARNING" in caplog.text
42 | assert "Nothing follows the 'M' for an 'M'-code." in caplog.text
43 |
44 | def test_first_lvl(self, caplog):
45 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPCA1") == "error_IPCA1"
46 | assert "WARNING" in caplog.text
47 | assert "No digit found on first level." in caplog.text
48 |
49 | def test_second_lvl(self, caplog):
50 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC123") == "error_IPC123"
51 | assert "WARNING" in caplog.text
52 | assert "No letter found on second level." in caplog.text
53 |
54 | def test_third_lvl(self, caplog):
55 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC1AC") == "error_IPC1AC"
56 | assert "WARNING" in caplog.text
57 | assert "No number found on third level." in caplog.text
58 |
59 | def test_fourth_lvl(self, caplog):
60 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC1A2_") == "error_IPC1A2_"
61 | assert "WARNING" in caplog.text
62 | assert "No letter found on fourth level." in caplog.text
63 |
64 | def test_fifth_lvl(self, caplog):
65 | assert pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC1A2BB") == "error_IPC1A2BB"
66 | assert "WARNING" in caplog.text
67 | assert "No digit or roman numeral found on fifth level." in caplog.text
68 |
69 | def test_sixth_lvl(self, caplog):
70 | assert (
71 | pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC1A2B3X") == "error_IPC1A2B3X"
72 | )
73 | assert "WARNING" in caplog.text
74 | assert "No number found on sixth level." in caplog.text
75 |
76 | def test_after_sixth_lvl(self, caplog):
77 | assert (
78 | pm2io._conversion.convert_ipcc_code_primap_to_primap2("IPC1A2B33A")
79 | == "error_IPC1A2B33A"
80 | )
81 | assert "WARNING" in caplog.text
82 | assert "Chars left after sixth level." in caplog.text
83 |
84 |
85 | class TestUnitPrimapToPrimap2:
86 | @pytest.mark.parametrize(
87 | "unit_in, entity_in, expected_unit_out",
88 | [
89 | ("GgCO2eq", "KYOTOGHG", "Gg CO2 / yr"),
90 | ("MtC", "CO", "Mt C / yr"),
91 | ("GgN2ON", "N2O", "Gg N / yr"),
92 | ("t", "CH4", "t CH4 / yr"),
93 | ],
94 | )
95 | def test_working(self, unit_in, entity_in, expected_unit_out):
96 | assert pm2io._conversion.convert_unit_to_primap2(unit_in, entity_in) == expected_unit_out
97 |
98 | def test_no_prefix(self, caplog):
99 | assert pm2io._conversion.convert_unit_to_primap2("CO2eq", "FGASES") == "error_CO2eq_FGASES"
100 | assert "WARNING" in caplog.text
101 | assert "No unit prefix matched for unit." in caplog.text
102 |
103 | def test_unit_empty(self, caplog):
104 | assert pm2io._conversion.convert_unit_to_primap2("", "FGASES") == "error__FGASES"
105 | assert "WARNING" in caplog.text
106 | assert "Input unit is empty. Nothing converted." in caplog.text
107 |
108 | def test_entity_empty(self, caplog):
109 | assert pm2io._conversion.convert_unit_to_primap2("GgCO2eq", "") == "error_GgCO2eq_"
110 | assert "WARNING" in caplog.text
111 | assert "Input entity is empty. Nothing converted." in caplog.text
112 |
113 |
114 | @pytest.mark.parametrize(
115 | "entity_pm1, entity_pm2",
116 | [
117 | ("CO2", "CO2"),
118 | ("KYOTOGHG", "KYOTOGHG (SARGWP100)"),
119 | ("KYOTOGHGAR4", "KYOTOGHG (AR4GWP100)"),
120 | ],
121 | )
122 | def test_convert_entity_gwp_primap_to_primap2(entity_pm1, entity_pm2):
123 | assert pm2io._conversion.convert_entity_gwp_primap_to_primap2(entity_pm1) == entity_pm2
124 |
--------------------------------------------------------------------------------
/primap2/_overview.py:
--------------------------------------------------------------------------------
1 | """Summarization and descriptive statistics functions to get an overview of a data
2 | set.
3 | """
4 |
5 | import typing
6 |
7 | import pandas as pd
8 |
9 | from . import _accessor_base
10 | from ._selection import alias_dims
11 |
12 |
13 | class DataArrayOverviewAccessor(_accessor_base.BaseDataArrayAccessor):
14 | def to_df(self, name: str | None = None) -> pd.DataFrame | pd.Series:
15 | """Convert this array into an unstacked (i.e. non-tidy) pandas.DataFrame.
16 |
17 | Converting to an unstacked :py:class:`pandas.DataFrame` is most useful for two-dimensional
18 | data because then there is no MultiIndex, making the result very easy to read.
19 |
20 | If you want a tidy dataframe, use :py:meth:`xarray.DataArray.to_dataframe` instead.
21 |
22 | Parameters
23 | ----------
24 | name: str
25 | Name to give to this array (required if unnamed).
26 |
27 | Returns
28 | -------
29 | df : pandas.DataFrame
30 | """
31 | if name is None:
32 | name = self._da.name
33 | pandas_obj = self._da.reset_coords(drop=True).to_dataframe(name)[name]
34 | pandas_obj.name = name
35 | if isinstance(pandas_obj, pd.DataFrame) or isinstance(pandas_obj.index, pd.MultiIndex):
36 | return pandas_obj.unstack()
37 | else: # Series without MultiIndex can't be unstacked, return them as-is
38 | return pandas_obj
39 |
40 | @alias_dims(["dims"])
41 | def coverage(self, *dims: typing.Hashable) -> pd.DataFrame | pd.Series:
42 | """Summarize how many data points exist for a dimension combination.
43 |
44 | For each combinations of values in the given dimensions, count the number of
45 | non-NaN data points in the array. The result is returned as an
46 | N-dimensional pandas DataFrame.
47 |
48 | If the array's dtype is ``bool``, count the number of True values instead. This
49 | makes it possible to easily apply preprocessing. For example, to count the
50 | number of valid time series use ``da.notnull().any("time").coverage(...)``.
51 |
52 | Parameters
53 | ----------
54 | *dims: str
55 | Names or aliases of the dimensions to be used for summarizing.
56 | You can specify any number of dimensions, but the readability
57 | of the result is best for one or two dimensions.
58 |
59 | Returns
60 | -------
61 | coverage: pandas.DataFrame or pandas.Series
62 | N-dimensional dataframe (series for N=1) summarizing the number of non-NaN
63 | data points for each combination of values in the given dimensions.
64 | """
65 | if not dims:
66 | raise ValueError("Specify at least one dimension.")
67 | da = self._da
68 |
69 | if da.name is None:
70 | name = "coverage"
71 | else:
72 | name = da.name
73 |
74 | if da.dtype != bool:
75 | da = da.notnull()
76 |
77 | return da.pr.sum(reduce_to_dim=dims).transpose(*dims).pr.to_df(name)
78 |
79 |
80 | class DatasetOverviewAccessor(_accessor_base.BaseDatasetAccessor):
81 | def to_df(
82 | self,
83 | name: str | None = None,
84 | ) -> pd.DataFrame:
85 | """Convert this dataset into a pandas.DataFrame.
86 |
87 | It returns mostly the same as :py:meth:`xarray..Dataset.to_dataframe`, but discards
88 | additional coordinates instead of including them in the output.
89 |
90 | Parameters
91 | ----------
92 | name: str, optional
93 | Name to give to the output columns.
94 |
95 | Returns
96 | -------
97 | df: pandas.DataFrame
98 | """
99 | df = self._ds.pr.remove_processing_info().reset_coords(drop=True).to_dataframe()
100 | if name is not None:
101 | df.columns.name = name
102 | return df
103 |
104 | @alias_dims(["dims"], additional_allowed_values=["entity"])
105 | def coverage(self, *dims: typing.Hashable) -> pd.DataFrame | pd.Series:
106 | """Summarize how many data points exist for a dimension combination.
107 |
108 | For each combinations of values in the given dimensions, count the number of
109 | non-NaN data points in the dataset. The result is returned as an
110 | N-dimensional pandas DataFrame.
111 |
112 | Only those data variables in the dataset are considered which are defined on
113 | all given dims, i.e. each dim is in ``ds[key].dims``.
114 |
115 | If the dataset only contains boolean arrays, count the number of True values
116 | instead. This makes it possible to easily apply preprocessing. For example,
117 | to count the number of valid time series use
118 | ``ds.notnull().any("time").coverage(...)``.
119 |
120 | Parameters
121 | ----------
122 | *dims: str
123 | Names or aliases of the dimensions to be used for summarizing.
124 | To use the name of the data variables (usually, the gases) as a coordinate,
125 | use "entity". You can specify any number of dimensions, but the readability
126 | of the result is best for one or two dimensions.
127 |
128 | Returns
129 | -------
130 | coverage: pandas.DataFrame or pandas.Series
131 | N-dimensional dataframe (series for N=1) summarizing the number of non-NaN
132 | data points for each combination of values in the given dimensions.
133 | """
134 | if not dims:
135 | raise ValueError("Specify at least one dimension.")
136 |
137 | ds = self._ds
138 |
139 | for dim in dims:
140 | if dim == "entity":
141 | continue
142 | ds = ds.drop_vars([x for x in ds if dim not in ds[x].dims])
143 |
144 | all_boolean = all(ds[var].dtype == bool for var in ds)
145 | if not all_boolean: # Convert into boolean coverage array
146 | ds = ds.notnull()
147 |
148 | da = ds.pr.sum(reduce_to_dim=dims)
149 | if "entity" in dims:
150 | da = da.to_array("entity")
151 |
152 | return da.transpose(*dims).pr.to_df("coverage")
153 |
--------------------------------------------------------------------------------
/primap2/tests/test_fill_combine.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Tests for _fill_combine.py
3 |
4 | We only test the features regarding the buggy treatment of additional (non-indexed)
5 | coordinates here. All core functionality is assumed to be sufficiently tested in xarray
6 | """
7 |
8 | import numpy as np
9 |
10 |
11 | def test_fillna_ds_coord_present(minimal_ds):
12 | # add additional coordinate
13 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"]
14 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names))
15 |
16 | sel = {"area (ISO3)": ["COL", "MEX"]}
17 | sel_ds = full_ds.pr.loc[sel]
18 | nan_ds = full_ds.copy()
19 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = (
20 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan
21 | )
22 |
23 | result_ds = nan_ds.pr.fillna(sel_ds)
24 |
25 | # assert_ds_aligned_equal(result_ds, full_ds)
26 | # above fails because data type of country_names differs
27 |
28 | # check that the additional coord is present in result
29 | assert "country_name" in list(result_ds.coords)
30 | # check that the mapping of country names to country codes is intact
31 | # (meaning that the additional coordinate is aligned correctly)
32 | for country in full_ds.coords["area (ISO3)"].values:
33 | assert (
34 | result_ds.coords["country_name"].loc[{"area (ISO3)": country}]
35 | == full_ds.coords["country_name"].loc[{"area (ISO3)": country}]
36 | )
37 |
38 |
39 | def test_fillna_da_coord_present(minimal_ds):
40 | # add additional coordinate
41 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"]
42 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names))
43 |
44 | sel = {"area (ISO3)": ["COL", "MEX"]}
45 | sel_ds = full_ds.pr.loc[sel]
46 | nan_ds = full_ds.copy()
47 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = (
48 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan
49 | )
50 |
51 | result_da = nan_ds["CO2"].pr.fillna(sel_ds["CO2"])
52 |
53 | # check that the additional coord is present in result
54 | assert "country_name" in list(result_da.coords)
55 | # check that the mapping of country names to country codes is intact
56 | # (meaning that the additional coordinate is aligned correctly)
57 | for country in full_ds.coords["area (ISO3)"].values:
58 | assert (
59 | result_da.coords["country_name"].loc[{"area (ISO3)": country}]
60 | == full_ds.coords["country_name"].loc[{"area (ISO3)": country}]
61 | )
62 |
63 |
64 | def test_combine_first_ds_coord_present(minimal_ds):
65 | # add additional coordinate
66 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"]
67 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names))
68 |
69 | sel = {"area (ISO3)": ["COL", "MEX"]}
70 | sel_ds = full_ds.pr.loc[sel]
71 | nan_ds = full_ds.pr.loc[{"area (ISO3)": ["ARG", "COL"]}]
72 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = (
73 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan
74 | )
75 |
76 | result_ds = nan_ds.pr.combine_first(sel_ds)
77 | compare_ds = full_ds.pr.loc[{"area (ISO3)": ["ARG", "COL", "MEX"]}]
78 |
79 | # check that the additional coord is present in result
80 | assert "country_name" in list(result_ds.coords)
81 | # check that the mapping of country names to country codes is intact
82 | # (meaning that the additional coordinate is aligned correctly)
83 | for country in compare_ds.coords["area (ISO3)"].values:
84 | assert (
85 | result_ds.coords["country_name"].loc[{"area (ISO3)": country}]
86 | == compare_ds.coords["country_name"].loc[{"area (ISO3)": country}]
87 | )
88 |
89 |
90 | def test_combine_first_da_coord_present(minimal_ds):
91 | # add additional coordinate
92 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"]
93 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names))
94 |
95 | sel = {"area (ISO3)": ["COL", "MEX"]}
96 | sel_ds = full_ds.pr.loc[sel]
97 | nan_ds = full_ds.pr.loc[{"area (ISO3)": ["ARG", "COL"]}]
98 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = (
99 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan
100 | )
101 |
102 | result_da = nan_ds["CO2"].pr.combine_first(sel_ds["CO2"])
103 | compare_da = full_ds["CO2"].pr.loc[{"area (ISO3)": ["ARG", "COL", "MEX"]}]
104 |
105 | # check that the additional coord is present in result
106 | assert "country_name" in list(result_da.coords)
107 | # check that the mapping of country names to country codes is intact
108 | # (meaning that the additional coordinate is aligned correctly)
109 | for country in compare_da.coords["area (ISO3)"].values:
110 | assert (
111 | result_da.coords["country_name"].loc[{"area (ISO3)": country}]
112 | == compare_da.coords["country_name"].loc[{"area (ISO3)": country}]
113 | )
114 |
115 |
116 | # tests to check if xarray bug persists
117 | def test_fillna_ds_xr_fail(minimal_ds):
118 | # add additional coordinate
119 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"]
120 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names))
121 |
122 | sel = {"area (ISO3)": ["COL", "MEX"]}
123 | sel_ds = full_ds.pr.loc[sel]
124 | nan_ds = full_ds.copy()
125 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = (
126 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan
127 | )
128 |
129 | result_ds = nan_ds.fillna(sel_ds)
130 |
131 | assert "country_name" not in list(result_ds.coords)
132 |
133 |
134 | def test_combine_first_ds_xr_fail(minimal_ds):
135 | # add additional coordinate
136 | country_names = ["Colombia", "Argentina", "Mexico", "Bolivia"]
137 | full_ds = minimal_ds.assign_coords(country_name=("area (ISO3)", country_names))
138 |
139 | sel = {"area (ISO3)": ["COL", "MEX"]}
140 | sel_ds = full_ds.pr.loc[sel]
141 | nan_ds = full_ds.pr.loc[{"area (ISO3)": ["ARG", "COL"]}]
142 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] = (
143 | nan_ds["CO2"].pr.loc[{"area (ISO3)": "COL"}] * np.nan
144 | )
145 |
146 | result_ds = nan_ds.combine_first(sel_ds)
147 |
148 | assert "country_name" not in list(result_ds.coords)
149 |
--------------------------------------------------------------------------------
/primap2/tests/test_selection.py:
--------------------------------------------------------------------------------
1 | """Tests for _alias_selection.py"""
2 |
3 | import pytest
4 | import xarray as xr
5 | import xarray.testing
6 |
7 | import primap2
8 |
9 |
10 | @pytest.mark.parametrize(
11 | ["alias", "full_name"],
12 | [
13 | ("time", "time"),
14 | ("area", "area (ISO3)"),
15 | ("category", "category (IPCC 2006)"),
16 | ("cat", "category (IPCC 2006)"),
17 | ("animal", "animal (FAOSTAT)"),
18 | ("product", "product (FAOSTAT)"),
19 | ("scenario", "scenario (FAOSTAT)"),
20 | ("scen", "scenario (FAOSTAT)"),
21 | ("provenance", "provenance"),
22 | ("model", "model"),
23 | ("source", "source"),
24 | ("CO2", "CO2"),
25 | ("population", "population"),
26 | ],
27 | )
28 | def test_pr_getitem(opulent_ds, alias, full_name):
29 | da = opulent_ds.pr[alias]
30 | assert da.name == full_name
31 |
32 |
33 | @pytest.mark.parametrize(
34 | ["alias", "full_name"],
35 | [
36 | ("time", "time"),
37 | ("area", "area (ISO3)"),
38 | ("category", "category (IPCC 2006)"),
39 | ("cat", "category (IPCC 2006)"),
40 | ("animal", "animal (FAOSTAT)"),
41 | ("product", "product (FAOSTAT)"),
42 | ("scenario", "scenario (FAOSTAT)"),
43 | ("scen", "scenario (FAOSTAT)"),
44 | ("provenance", "provenance"),
45 | ("model", "model"),
46 | ("source", "source"),
47 | ("CO2", "CO2"),
48 | ("population", "population"),
49 | ],
50 | )
51 | def test_pr_getitem_no_attrs(opulent_ds, alias, full_name):
52 | da = opulent_ds.notnull().pr[alias]
53 | assert da.name == full_name
54 |
55 |
56 | @pytest.mark.parametrize(
57 | ["alias", "full_name"],
58 | [
59 | ("time", "time"),
60 | ("area", "area (ISO3)"),
61 | ("category", "category (IPCC 2006)"),
62 | ("cat", "category (IPCC 2006)"),
63 | ("animal", "animal (FAOSTAT)"),
64 | ("product", "product (FAOSTAT)"),
65 | ("scenario", "scenario (FAOSTAT)"),
66 | ("scen", "scenario (FAOSTAT)"),
67 | ("provenance", "provenance"),
68 | ("model", "model"),
69 | ("source", "source"),
70 | ],
71 | )
72 | def test_pr_alias_array(opulent_ds, alias, full_name):
73 | da = opulent_ds.pr["CO2"]
74 | actual = da.pr.sum(dim=alias)
75 | expected = da.sum(dim=full_name, keep_attrs=True)
76 | xr.testing.assert_identical(actual, expected)
77 |
78 |
79 | def test_pr_loc_select(opulent_ds):
80 | sel_pr = opulent_ds.pr.loc[
81 | {
82 | "time": slice("2002", "2005"),
83 | "area": ["COL", "ARG"],
84 | "animal": "cow",
85 | }
86 | ]
87 | sel = opulent_ds.loc[
88 | {
89 | "time": slice("2002", "2005"),
90 | "area (ISO3)": ["COL", "ARG"],
91 | "animal (FAOSTAT)": "cow",
92 | }
93 | ]
94 | xr.testing.assert_identical(sel_pr, sel)
95 |
96 |
97 | def test_pr_loc_select_da(opulent_ds):
98 | da = opulent_ds["CO2"]
99 | sel_pr = da.pr.loc[
100 | {
101 | "time": slice("2002", "2005"),
102 | "area": ["COL", "ARG"],
103 | "animal": "cow",
104 | }
105 | ]
106 | sel = da.loc[
107 | {
108 | "time": slice("2002", "2005"),
109 | "area (ISO3)": ["COL", "ARG"],
110 | "animal (FAOSTAT)": "cow",
111 | }
112 | ]
113 | xr.testing.assert_identical(sel_pr, sel)
114 |
115 |
116 | def test_pr_loc_select_not(opulent_ds):
117 | sel_pr = opulent_ds.pr.loc[
118 | {
119 | "time": slice("2002", "2005"),
120 | "area": ["COL", "ARG"],
121 | "animal": primap2.Not("cow"),
122 | "category": primap2.Not(["0", "1"]),
123 | }
124 | ]
125 | sel = opulent_ds.loc[
126 | {
127 | "time": slice("2002", "2005"),
128 | "area (ISO3)": ["COL", "ARG"],
129 | "animal (FAOSTAT)": ["swine", "goat"],
130 | "category (IPCC 2006)": ["2", "3", "4", "5", "1.A", "1.B"],
131 | }
132 | ]
133 | xr.testing.assert_identical(sel_pr, sel)
134 |
135 |
136 | def test_pr_loc_select_da_not(opulent_ds):
137 | da = opulent_ds["CO2"]
138 | sel_pr = da.pr.loc[
139 | {
140 | "time": slice("2002", "2005"),
141 | "area": ["COL", "ARG"],
142 | "animal": primap2.Not("cow"),
143 | "category": primap2.Not(["0", "1"]),
144 | }
145 | ]
146 | sel = da.loc[
147 | {
148 | "time": slice("2002", "2005"),
149 | "area (ISO3)": ["COL", "ARG"],
150 | "animal (FAOSTAT)": ["swine", "goat"],
151 | "category (IPCC 2006)": ["2", "3", "4", "5", "1.A", "1.B"],
152 | }
153 | ]
154 | xr.testing.assert_identical(sel_pr, sel)
155 |
156 |
157 | def test_resolve_not(opulent_ds):
158 | result = primap2._selection.resolve_not(
159 | input_selector={
160 | "a": "1",
161 | "b": ["1", "2"],
162 | "animal (FAOSTAT)": primap2.Not("cow"),
163 | "area (ISO3)": primap2.Not(["MEX", "COL"]),
164 | },
165 | xarray_obj=opulent_ds,
166 | )
167 | assert len(result) == 4
168 | assert result["a"] == "1"
169 | assert result["b"] == ["1", "2"]
170 | assert len(result["animal (FAOSTAT)"]) == 2
171 | assert "swine" in result["animal (FAOSTAT)"]
172 | assert "goat" in result["animal (FAOSTAT)"]
173 | assert len(result["area (ISO3)"]) == 2
174 | assert "ARG" in result["area (ISO3)"]
175 | assert "BOL" in result["area (ISO3)"]
176 |
177 |
178 | def test_resolve_not_da(opulent_ds):
179 | result = primap2._selection.resolve_not(
180 | input_selector={
181 | "a": "1",
182 | "b": ["1", "2"],
183 | "animal (FAOSTAT)": primap2.Not("cow"),
184 | "area (ISO3)": primap2.Not(["MEX", "COL"]),
185 | },
186 | xarray_obj=opulent_ds["CO2"],
187 | )
188 | assert len(result) == 4
189 | assert result["a"] == "1"
190 | assert result["b"] == ["1", "2"]
191 | assert len(result["animal (FAOSTAT)"]) == 2
192 | assert "swine" in result["animal (FAOSTAT)"]
193 | assert "goat" in result["animal (FAOSTAT)"]
194 | assert len(result["area (ISO3)"]) == 2
195 | assert "ARG" in result["area (ISO3)"]
196 | assert "BOL" in result["area (ISO3)"]
197 |
198 |
199 | def test_alias_special_cases():
200 | assert primap2._selection.alias(1, {"a": "b"}, [1, 2, 3]) == 1
201 | with pytest.raises(primap2._selection.DimensionNotExistingError):
202 | primap2._selection.alias(1, {"a": "b"}, ["b", "c"])
203 |
--------------------------------------------------------------------------------
/docs/source/usage/skipna.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupytext:
3 | formats: md:myst
4 | text_representation:
5 | extension: .md
6 | format_name: myst
7 | format_version: 0.13
8 | jupytext_version: 1.16.4
9 | kernelspec:
10 | display_name: Python 3 (ipykernel)
11 | language: python
12 | name: python3
13 | ---
14 |
15 | # Dealing with missing information
16 |
17 | ## Aggregation
18 |
19 | xarray provides robust functions for aggregation ({py:meth}`xarray.DataArray.sum`).
20 | PRIMAP2 adds functions which skip missing data points if the
21 | information is missing at all points along certain axes, for example for
22 | a whole time series.
23 | Let's first create an example with missing information:
24 |
25 | ```{code-cell} ipython3
26 | import pandas as pd
27 | import numpy as np
28 | import xarray as xr
29 | import primap2
30 |
31 | time = pd.date_range("2000-01-01", "2003-01-01", freq="YS")
32 | area_iso3 = np.array(["COL", "ARG", "MEX"])
33 | coords = [("area (ISO3)", area_iso3), ("time", time)]
34 | da = xr.DataArray(
35 | data=[
36 | [1, 2, 3, 4],
37 | [np.nan, np.nan, np.nan, np.nan],
38 | [1, 2, 3, np.nan],
39 | ],
40 | coords=coords,
41 | name="test data"
42 | )
43 |
44 | da.pr.to_df()
45 | ```
46 |
47 | Now, we can use the primap2 {py:meth}`xarray.DataArray.pr.sum` function to evaluate the sum of countries
48 | while ignoring only those countries where the whole timeseries is missing, using the
49 | `skipna_evaluation_dims` parameter:
50 |
51 | ```{code-cell} ipython3
52 | da.pr.sum(dim="area", skipna_evaluation_dims="time").pr.to_df()
53 | ```
54 |
55 | If you instead want to skip all NA values, use the `skipna` parameter:
56 |
57 | ```{code-cell} ipython3
58 | da.pr.sum(dim="area", skipna=True).pr.to_df()
59 | ```
60 |
61 | ```{code-cell} ipython3
62 | # compare this to the result of the standard xarray sum - it also skips NA values by default:
63 |
64 | da.sum(dim="area (ISO3)").pr.to_df()
65 | ```
66 |
67 | ## Infilling
68 |
69 | The same functionality is available for filling in missing information using the
70 | {py:meth}`xarray.DataArray.pr.fill_all_na` function.
71 | In this example, we fill missing information only where the whole time series is missing.
72 |
73 | ```{code-cell} ipython3
74 | da.pr.fill_all_na("time", value=10).pr.to_df()
75 | ```
76 |
77 | ## Bulk aggregation
78 |
79 | For larger aggregation tasks, e.g. aggregating several gas baskets from individual gases or aggregating a full category tree from leaves we have the functions {py:meth}`xarray.Dataset.pr.add_aggregates_variables`, {py:meth}`xarray.Dataset.pr.add_aggregates_coordinates`, and {py:meth}`xarray.DataArray.pr.add_aggregates_coordinates` which are highly configurable, but can also be used in a simplified mode for quick aggregation tasks. In the following we give a few examples. For the full feature set we refer to function descriptions linked above. The functions internally work with {py:meth}`xarray.Dataset.pr.merge` / {py:meth}`xarray.DataArray.pr.merge` to allow for consistency checks when target timeseries exist.
80 |
81 | ### Add aggregates for variables
82 |
83 | The {py:meth}`xarray.Dataset.pr.add_aggregates_variables` function aggregates data from individual variables to new variables (usually gas baskets). Several variables can be created in one call where the order of definition is the order of creation. Filters can be specified to limit aggregation to certain coordinate values.
84 |
85 | #### Examples
86 |
87 | Sum gases in the minimal example dataset
88 |
89 | ```{code-cell} ipython3
90 | ds_min = primap2.open_dataset("../minimal_ds.nc")
91 | summed_ds = ds_min.pr.add_aggregates_variables(
92 | gas_baskets={
93 | "test (SARGWP100)": {
94 | "sources": ["CO2", "SF6", "CH4"],
95 | },
96 | },
97 | )
98 | summed_ds["test (SARGWP100)"]
99 | ```
100 |
101 | We can also use a filter / selector to limit the aggregation to a selection e.g. a single country:
102 |
103 | ```{code-cell} ipython3
104 | filtered_ds = ds_min.pr.add_aggregates_variables(
105 | gas_baskets={
106 | "test (SARGWP100)": {
107 | "sources": ["CO2", "SF6", "CH4"],
108 | "sel": {"area (ISO3)": ["COL"]},
109 | },
110 | },
111 | )
112 | filtered_ds["test (SARGWP100)"]
113 | ```
114 | When filtering it is important to note that entities and variables are not the same thing. The difference between the `entity` and `variable` filters / selectors is that `'entity': ['SF6']` will match both variables `'SF6'` and `'SF6 (SARGWP100)'` (as both variables are for the entity `'SF6'`) while `'variable': ['SF6']` will match only the variable `'SF6'`.
115 |
116 | If we recompute an existing timeseries it has to be consistent with the existing data. Here we use the simple mode to specify the aggregation rules. The example below fails because the result is inconsistent with existing data.
117 |
118 | ```{code-cell} ipython3
119 | from xarray import MergeError
120 |
121 | try:
122 | recomputed_ds = filtered_ds.pr.add_aggregates_variables(
123 | gas_baskets={
124 | "test (SARGWP100)": ["CO2", "CH4"],
125 | },
126 | )
127 | recomputed_ds["test (SARGWP100)"]
128 | except MergeError as err:
129 | print(err)
130 | ```
131 |
132 | We can set the tolerance high enough such that the test passes and no error is thrown. This is only possible in the complex mode for the aggregation rules.
133 |
134 | ```{code-cell} ipython3
135 | recomputed_ds = filtered_ds.pr.add_aggregates_variables(
136 | gas_baskets={
137 | "test (SARGWP100)": {
138 | "sources": ["CO2", "CH4"],
139 | "tolerance": 1, # 100%
140 | },
141 | },
142 | )
143 | recomputed_ds["test (SARGWP100)"]
144 | ```
145 |
146 | ### Add aggregates for coordinates
147 |
148 | The {py:meth}`xarray.Dataset.pr.add_aggregates_coordinates` function aggregates data from individual coordinate values to new values (e.g. from subcategories to categories). Several values for several coordinates can be created in one call where the order of definition is the order of creation. Filters can be specified to limit aggregation to certain coordinate values, entities or variables. Most of the operation is similar to the variable aggregation. Thus we keep the examples here shorter. The {py:meth}`xarray.DataArray.pr.add_aggregates_coordinates` function uses the same syntax.
149 |
150 | #### Examples
151 |
152 | Sum countries in the minimal example dataset
153 |
154 | ```{code-cell} ipython3
155 | test_ds = ds_min.pr.add_aggregates_coordinates(
156 | agg_info={
157 | "area (ISO3)": {
158 | "all": {
159 | "sources": ["COL", "ARG", "MEX", "BOL"],
160 | }
161 | }
162 | }
163 | )
164 | test_ds
165 | ```
166 |
--------------------------------------------------------------------------------
/primap2/tests/test_overview.py:
--------------------------------------------------------------------------------
1 | """Tests for _overview.py"""
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import pytest
6 | import xarray as xr
7 |
8 | from primap2 import ureg
9 |
10 |
11 | def test_to_df():
12 | data = np.array([[1, 2], [3, 4]], dtype=np.int64)
13 | a = ["a1", "a2"]
14 | b = ["b1", "b2"]
15 | da = xr.DataArray(data, coords=[("a", a), ("b", b)], name="name")
16 | actual = da.pr.to_df()
17 |
18 | expected = pd.DataFrame(data, index=a, columns=b)
19 | expected.index.name = "a"
20 | expected.columns.name = "b"
21 |
22 | pd.testing.assert_frame_equal(actual, expected)
23 |
24 |
25 | def test_to_df_1d():
26 | data = np.array([1, 2], dtype=np.int64)
27 | a = ["a1", "a2"]
28 | da = xr.DataArray(data, coords=[("a", a)], name="name")
29 | actual = da.pr.to_df()
30 |
31 | expected = pd.Series(data, index=a, name="name")
32 | expected.index.name = "a"
33 |
34 | pd.testing.assert_series_equal(actual, expected)
35 |
36 |
37 | def test_to_df_set():
38 | data = np.array([1, 2], dtype=np.int64)
39 | a = ["a1", "a2"]
40 | da = xr.DataArray(data, coords=[("a", a)], name="name")
41 | ds = xr.Dataset({"b": da})
42 | actual = ds.pr.to_df("name")
43 |
44 | expected = pd.DataFrame(data, index=a, columns=["b"])
45 | expected.index.name = "a"
46 | expected.columns.name = "name"
47 |
48 | pd.testing.assert_frame_equal(actual, expected)
49 |
50 |
51 | def test_array_empty(empty_ds):
52 | with pytest.raises(ValueError, match="Specify at least one dimension"):
53 | empty_ds.pr.coverage()
54 | with pytest.raises(ValueError, match="Specify at least one dimension"):
55 | empty_ds["CO2"].pr.coverage()
56 |
57 |
58 | def test_array_coverage(empty_ds):
59 | da = empty_ds["CO2"]
60 | da.pint.magnitude[:] = np.nan
61 | da.name = None
62 |
63 | da.pr.loc[{"time": "2001", "area": "COL"}] = 12.0 * ureg("Gg CO2 / year")
64 | da.pr.loc[{"time": "2002", "area": "COL"}] = 13.0 * ureg("Gg CO2 / year")
65 |
66 | expected = pd.DataFrame(
67 | index=da["area (ISO3)"].values,
68 | columns=da["time"].to_index(),
69 | data=np.zeros((len(da["area (ISO3)"]), len(da["time"])), dtype=np.int32),
70 | )
71 | expected.loc["COL", "2001"] = 1
72 | expected.loc["COL", "2002"] = 1
73 | expected.index.name = "area (ISO3)"
74 | expected.columns.name = "time"
75 |
76 | pd.testing.assert_frame_equal(
77 | expected.astype(np.int32), da.pr.coverage("area", "time").astype(np.int32)
78 | )
79 | pd.testing.assert_frame_equal(
80 | expected.T.astype(np.int32),
81 | da.pr.coverage("time", "area (ISO3)").astype(np.int32),
82 | )
83 |
84 |
85 | def test_array_coverage_multidim(opulent_ds):
86 | da = opulent_ds["CO2"]
87 |
88 | da.pr.loc[{"product": "milk"}].pint.magnitude[:] = np.nan
89 |
90 | expected = pd.DataFrame(
91 | index=da.pr["animal"].values,
92 | columns=da.pr["product"].values,
93 | data=np.zeros((len(da.pr["animal"]), len(da.pr["product"])), dtype=np.int32),
94 | )
95 | expected[:] = np.prod(da.shape) // np.prod(expected.shape)
96 | expected.loc[:, "milk"] = 0
97 | expected.index.name = "animal (FAOSTAT)"
98 | expected.columns.name = "product (FAOSTAT)"
99 |
100 | pd.testing.assert_frame_equal(
101 | expected.astype(np.int32), da.pr.coverage("animal", "product").astype(np.int32)
102 | )
103 | pd.testing.assert_frame_equal(
104 | expected.T.astype(np.int32),
105 | da.pr.coverage("product", "animal").astype(np.int32),
106 | )
107 |
108 |
109 | def test_array_coverage_error(opulent_ds):
110 | da = opulent_ds["CO2"]
111 |
112 | with pytest.raises(ValueError, match="Dimension 'non-existing' does not exist"):
113 | da.pr.coverage("animal", "non-existing")
114 |
115 |
116 | def test_set_coverage(opulent_ds):
117 | ds = opulent_ds
118 | ds["CO2"].pr.loc[{"product": "milk"}].pint.magnitude[:] = np.nan
119 |
120 | expected = pd.DataFrame(
121 | index=ds.pr["product"].values,
122 | columns=ds.pr["animal"].values,
123 | data=np.zeros((len(ds.pr["product"]), len(ds.pr["animal"])), dtype=int),
124 | )
125 | expected[:] = np.prod(ds["CO2"].shape) // np.prod(expected.shape) * 4
126 | expected.loc["milk", :] = np.prod(ds["CO2"].shape) // np.prod(expected.shape) * 3
127 | expected.index.name = "product (FAOSTAT)"
128 | expected.columns.name = "animal (FAOSTAT)"
129 | expected.name = "coverage"
130 |
131 | pd.testing.assert_frame_equal(expected, ds.pr.coverage("product", "animal"))
132 | pd.testing.assert_frame_equal(expected.T, ds.pr.coverage("animal", "product"))
133 |
134 |
135 | def test_set_coverage_entity(opulent_ds):
136 | ds = opulent_ds
137 | ds["CO2"].pr.loc[{"product": "milk"}].pint.magnitude[:] = np.nan
138 |
139 | expected = pd.DataFrame(
140 | index=list(ds.keys()),
141 | columns=ds.pr["area"].values,
142 | data=np.zeros((len(ds), len(ds.pr["area"].values)), dtype=int),
143 | )
144 | expected[:] = np.prod(ds["CO2"].shape)
145 | expected.loc["population", :] = np.prod(ds["population"].shape)
146 | expected.loc["CO2", :] = np.prod(ds["CO2"].shape) - np.prod(
147 | ds["CO2"].pr.loc[{"product": "milk"}].shape
148 | )
149 | expected = expected // len(ds.pr["area"].values)
150 | expected.name = "coverage"
151 | expected.index.name = "entity"
152 | expected.columns.name = "area (ISO3)"
153 |
154 | pd.testing.assert_frame_equal(expected, ds.pr.coverage("entity", "area"))
155 |
156 |
157 | def test_set_coverage_boolean(opulent_ds):
158 | actual = opulent_ds.notnull().any("time").pr.coverage("entity", "area")
159 | expected = opulent_ds.pr.coverage("entity", "area") // len(opulent_ds["time"])
160 |
161 | pd.testing.assert_frame_equal(actual, expected)
162 |
163 |
164 | def test_set_coverage_entity_other_dim_not_existing(opulent_ds):
165 | ds = opulent_ds
166 |
167 | ds["CO2"].pr.loc[{"product": "milk"}].pint.magnitude[:] = np.nan
168 |
169 | entites_expected = [x for x in ds.keys() if x != "population"]
170 |
171 | expected = pd.DataFrame(
172 | index=ds.pr["product"].values,
173 | columns=entites_expected,
174 | data=np.zeros((len(ds.pr["product"]), len(entites_expected)), dtype=int),
175 | )
176 | expected[:] = np.prod(ds["CO2"].shape) // len(ds.pr["product"])
177 | expected.loc["milk", "CO2"] = 0
178 | expected.index.name = "product (FAOSTAT)"
179 | expected.columns.name = "entity"
180 |
181 | pd.testing.assert_frame_equal(expected, ds.pr.coverage("product", "entity"))
182 | pd.testing.assert_frame_equal(expected.T, ds.pr.coverage("entity", "product"))
183 |
184 |
185 | def test_set_coverage_error(opulent_ds):
186 | ds = opulent_ds["CO2"]
187 |
188 | with pytest.raises(ValueError, match="Dimension 'non-existing' does not exist"):
189 | ds.pr.coverage("animal", "non-existing")
190 |
--------------------------------------------------------------------------------
/primap2/pm2io/_GHG_inventory_reading.py:
--------------------------------------------------------------------------------
1 | """This file contains functions for reading of country GHG inventories
2 | from National Inventory Reports (NIR), biannual Update Reports (BUR),
3 | and other official country emissions inventories
4 | Most of the functions in this file are exposed to the outside yet they
5 | currently do not undergo the strict testing applied to the rest of PRIMAP2 as
6 | they are added during the process of reading an preparing data for the PRIMAP-hist
7 | update. Testing will be added in the future.
8 | """
9 |
10 | import re
11 |
12 | import pandas as pd
13 |
14 |
15 | def nir_add_unit_information(
16 | df_nir: pd.DataFrame,
17 | *,
18 | unit_row: str | int,
19 | entity_row: str | int | None = None,
20 | regexp_entity: str,
21 | regexp_unit: str | None = None,
22 | manual_repl_unit: dict[str, str] | None = None,
23 | manual_repl_entity: dict[str, str] | None = None,
24 | default_unit: str,
25 | ) -> pd.DataFrame:
26 | """Add unit information to a National Inventory Report (NIR) style DataFrame.
27 |
28 | Add unit information to the header of an "entity-wide" file as
29 | present in the standard table format of National Inventory Reports (NIRs). The
30 | unit and entity information is extracted from combined unit and entity information
31 | in the row defined by `unit_row`. The parameters `regexp_unit` and `regexp_entity`
32 | determines how this is done by regular expressions for unit and entity.
33 | Additionally, manual mappings can be defined in the `manual_repl_unit` and
34 | `manual_repl_entity` dicts. For each column the routine tries to extract a unit
35 | using the regular expression. If this fails it looks in the `manual_repl_unit`
36 | dict for unit and in `manual_repl_entity` for entity information. If there is no
37 | information the default unit given in `default_unit` is used. In this case the
38 | analyzed value is used as entity unchanged.
39 |
40 | Parameters
41 | ----------
42 | df_nir : pd.DataFrame
43 | Pandas DataFrame with the table to process
44 | unit_row : str or int
45 | String "header" to indicate that the column header should be used to derive the
46 | unit information or an integer specifying the row to use for unit information.
47 | If entity and unit information are given in the same row use only unit_row.
48 | entity_row : str or int
49 | String "header" to indicate that the column header should be used to derive the
50 | unit information or an integer specifying the row to use for entity information.
51 | If entity and unit information are given in the same row use only unit_row
52 | regexp_entity : str
53 | regular expression that extracts the entity from the cell value
54 | regexp_unit : str (optional)
55 | regular expression that extracts the unit from the cell value
56 | manual_repl_unit : dict (optional)
57 | dict defining unit for given cell values
58 | manual_repl_entity : dict (optional)
59 | dict defining entity for given cell values
60 | default_unit : str
61 | unit to be used if no unit can be extracted an no unit is given
62 |
63 | Returns
64 | -------
65 | pd.DataFrame
66 | DataFrame with explicit unit information (as column header)
67 | """
68 | if manual_repl_unit is None:
69 | manual_repl_unit = {}
70 |
71 | if manual_repl_entity is None:
72 | manual_repl_entity = {}
73 |
74 | cols_to_drop = []
75 |
76 | # get the data to extract the units and entities from
77 | # can be either the header row or a regular row
78 | if unit_row == "header":
79 | values_for_units = list(df_nir.columns)
80 | else:
81 | # unit_row must be an integer
82 | values_for_units = list(df_nir.iloc[unit_row])
83 | cols_to_drop.append(unit_row)
84 |
85 | if entity_row is not None:
86 | if entity_row == "header":
87 | values_for_entities = list(df_nir.columns)
88 | else:
89 | values_for_entities = list(df_nir.iloc[entity_row])
90 | if entity_row != unit_row:
91 | cols_to_drop.append(entity_row)
92 | else:
93 | values_for_entities = values_for_units
94 |
95 | if regexp_unit is not None:
96 | re_unit = re.compile(regexp_unit)
97 | re_entity = re.compile(regexp_entity)
98 |
99 | units = values_for_units.copy()
100 | entities = values_for_entities.copy()
101 |
102 | for idx, value in enumerate(values_for_units):
103 | if str(value) in manual_repl_unit:
104 | units[idx] = manual_repl_unit[str(value)]
105 | else:
106 | if regexp_unit is not None:
107 | unit = re_unit.findall(str(value))
108 | else:
109 | unit = False
110 |
111 | if unit:
112 | units[idx] = unit[0]
113 | else:
114 | units[idx] = default_unit
115 |
116 | for idx, value in enumerate(values_for_entities):
117 | if str(value) in manual_repl_entity:
118 | entities[idx] = manual_repl_entity[str(value)]
119 | else:
120 | entity = re_entity.findall(str(value))
121 | if entity:
122 | entities[idx] = entity[0]
123 | else:
124 | entities[idx] = value
125 |
126 | new_header = [entities, units]
127 |
128 | df_out = df_nir.copy()
129 | df_out.columns = new_header
130 | if cols_to_drop:
131 | df_out = df_out.drop(df_out.index[cols_to_drop])
132 |
133 | return df_out
134 |
135 |
136 | def nir_convert_df_to_long(
137 | df_nir: pd.DataFrame, year: int, header_long: list[str] | None = None
138 | ) -> pd.DataFrame:
139 | """Convert an entity-wide NIR table for a single year to a long format
140 | DataFrame.
141 |
142 | The input DataFrame is required to have the following structure:
143 | * Columns for category, original category name, and data in this order, where
144 | category and original category name form a multiindex.
145 | * Column header as multiindex for entity and unit
146 | A column for the year is added during the conversion.
147 |
148 | Parameters
149 | ----------
150 | df_nir: pd.DataFrame
151 | Pandas DataFrame with the NIR table to be converted
152 | year: int
153 | Year of the given data
154 | header_long: list, optional
155 | specify a non-standard column header, e.g. with only category code
156 | or orig_cat_name
157 |
158 | Returns
159 | -------
160 | pd.DataFrame
161 | converted DataFrame
162 | """
163 | if header_long is None:
164 | header_long = ["category", "orig_cat_name", "entity", "unit", "time", "data"]
165 |
166 | df_stacked = df_nir.stack([0, 1], future_stack=True).to_frame()
167 | df_stacked.insert(0, "year", str(year))
168 | df_stacked = df_stacked.reset_index()
169 | df_stacked.columns = header_long
170 | return df_stacked
171 |
--------------------------------------------------------------------------------
/primap2/csg/_strategies/global_least_squares.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import xarray as xr
3 | from attrs import frozen
4 | from scipy.linalg import lstsq
5 | from scipy.optimize import least_squares
6 |
7 | import primap2
8 |
9 | from .exceptions import StrategyUnableToProcess
10 |
11 |
12 | @frozen
13 | class GlobalLSStrategy:
14 | """Fill missing data by global least square matching.
15 |
16 | The NaNs in the first timeseries :math:`\\textrm{ts}(t)` are filled using harmonized data
17 | from the lower priority timeseries :math:`\\textrm{fill_ts}(t)`. For harmonization we use
18 |
19 | .. math::
20 |
21 | \\textrm{fill_ts}_h(t) = \\textrm{fill_ts}(t) \\times a + b,
22 |
23 | where :math:`\\textrm{fill_ts}_h(t)` is the harmonized dataset and :math:`a` and :math:`b` are
24 | determined by minimizing
25 | the least squares distance between :math:`\\textrm{ts}(t)` and :math:`\\textrm{fill_ts}_h(t)`.
26 |
27 | If the class is initialized with ``allow_shift = True`` the faster
28 | :py:func:`scipy.linalg.lstsq` function is used and :math:`b` can be arbitrary.
29 | For the case ``allow_shift = False`` (:math:`b = 0`) :py:func:`scipy.optimize.least_squares`
30 | is used.
31 |
32 | If there is no overlap in non-NaN data between :math:`\\textrm{ts}(t)` and
33 | :math:`\\textrm{fill_ts}(t)` a :py:class:`StrategyUnableToProcess` error will be raised.
34 |
35 | If ``allow_negative = False`` and the harmonized time-series :math:`\\textrm{fill_ts}_h(t)`
36 | contains negative data a :py:class:`StrategyUnableToProcess` error will be raised.
37 |
38 | Attributes
39 | ----------
40 | allow_shift: bool, default True
41 | Allow the filling time series to shift up and down using the additive constant
42 | :math:`b \\neq 0`.
43 | allow_negative: bool, default False
44 | Allow the filling time series to contain negative data initially.
45 | """
46 |
47 | allow_shift: bool = True
48 | allow_negative: bool = False
49 | type = "globalLS"
50 |
51 | def _factor_mult(self, a, e, e_ref):
52 | return a * e - e_ref
53 |
54 | def _jac(self, a, e, e_ref):
55 | J = np.empty((e.size, 1))
56 | J[:, 0] = e
57 | return J
58 |
59 | def fill(
60 | self,
61 | *,
62 | ts: xr.DataArray,
63 | fill_ts: xr.DataArray,
64 | fill_ts_repr: str,
65 | ) -> tuple[xr.DataArray, list[primap2.ProcessingStepDescription]]:
66 | """Fill missing data by global least square matching.
67 |
68 | For a description of the algorithm, see the documentation of this class.
69 |
70 | Parameters
71 | ----------
72 | ts
73 | Base timeseries. Missing data (NaNs) in this timeseries will be filled.
74 | This function does not modify the data in ts.
75 | fill_ts
76 | Fill timeseries. Data from this timeseries will be used (possibly after
77 | modification) to fill missing data in the base timeseries.
78 | This function does not modify the data in fill_ts.
79 | fill_ts_repr
80 | String representation of fill_ts. Human-readable short representation of
81 | the fill_ts (e.g. the source).
82 |
83 | Returns
84 | -------
85 | filled_ts, descriptions.
86 | filled_ts contains the result, where missing
87 | data in ts is (partly) filled using scaled data from fill_ts.
88 | descriptions contains information about which years were affected and
89 | filled how.
90 | """
91 | filled_mask = ts.isnull() & ~fill_ts.isnull()
92 | time_filled = filled_mask["time"][filled_mask].to_numpy()
93 |
94 | if time_filled.any():
95 | # check if we have overlap. If not raise error so users can define a fallback
96 | # strategy
97 | overlap = ts.notnull() & fill_ts.notnull()
98 | if overlap.any():
99 | if self.allow_shift:
100 | e = fill_ts[overlap.data].data
101 | A = np.vstack((e, np.ones_like(e))).transpose()
102 | e_ref = ts[overlap.data].data
103 | x, res, _rank, _s = lstsq(A, e_ref)
104 | fill_ts_harmo = fill_ts * x[0] + x[1]
105 | if any(fill_ts_harmo < 0):
106 | # use filling without shift
107 | raise StrategyUnableToProcess(
108 | reason="Negative data after harmonization excluded by configuration"
109 | )
110 | else:
111 | ts_aligned, fill_ts_aligned = xr.align(ts, fill_ts_harmo, join="exact")
112 | filled_ts = ts_aligned.fillna(fill_ts_aligned)
113 |
114 | descriptions = [
115 | primap2.ProcessingStepDescription(
116 | time=time_filled,
117 | description=f"filled with least squares matched data from "
118 | f"{fill_ts_repr}. a*x+b with a={x[0]:0.3f}, "
119 | f"b={x[1]:0.3f}",
120 | function=self.type,
121 | source=fill_ts_repr,
122 | )
123 | ]
124 | else:
125 | e = fill_ts[overlap.data].data
126 | e_ref = ts[overlap.data].data
127 | a0 = [1] # start with 1 as scaling factor
128 | res = least_squares(self._factor_mult, a0, jac=self._jac, args=(e, e_ref))
129 |
130 | fill_ts_h = fill_ts * res["x"][0]
131 |
132 | ts_aligned, fill_ts_aligned = xr.align(ts, fill_ts_h, join="exact")
133 | filled_ts = ts_aligned.fillna(fill_ts_aligned)
134 |
135 | descriptions = [
136 | primap2.ProcessingStepDescription(
137 | time=time_filled,
138 | description="filled with least squares matched data from "
139 | f"{fill_ts_repr}. Factor={res['x'][0]:0.3f}",
140 | function=self.type,
141 | source=fill_ts_repr,
142 | )
143 | ]
144 | else:
145 | raise StrategyUnableToProcess(reason="No overlap between timeseries, can't match")
146 |
147 | else:
148 | # if we don't have anything to fill we don't need to calculate anything
149 | filled_ts = ts
150 | descriptions = [
151 | primap2.ProcessingStepDescription(
152 | time=time_filled,
153 | description=f"no additional data in {fill_ts_repr}",
154 | function=self.type,
155 | source=fill_ts_repr,
156 | )
157 | ]
158 |
159 | return filled_ts, descriptions
160 |
--------------------------------------------------------------------------------
/primap2/tests/csg/test_wrapper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Tests for csg/_wrapper.py"""
3 |
4 | from datetime import datetime
5 | from pathlib import Path
6 |
7 | import pandas as pd
8 |
9 | import primap2.csg
10 | from primap2.csg import create_composite_source
11 | from primap2.csg._wrapper import create_time_index, set_priority_coords
12 | from primap2.tests.utils import assert_ds_aligned_equal
13 |
14 | DATA_PATH = Path(__file__).parent.parent / "data"
15 |
16 |
17 | def test_set_priority_coords(minimal_ds):
18 | prio_coords = {"scenario": {"value": "HISTORY", "terminology": "PRIMAP"}}
19 |
20 | prio_coord_ds = set_priority_coords(minimal_ds, prio_coords)
21 |
22 | assert "scenario (PRIMAP)" in prio_coord_ds.coords
23 | assert prio_coord_ds.coords["scenario (PRIMAP)"].values == ["HISTORY"]
24 |
25 |
26 | def test_create_time_index():
27 | start = "1990"
28 | end = "2000"
29 | start_dt = datetime.strptime(start, "%Y")
30 | end_dt = datetime.strptime(end, "%Y")
31 | start_ts = pd.Timestamp(start)
32 | end_ts = pd.Timestamp(end)
33 | expected = pd.date_range(start=start, end=end, freq="YS")
34 |
35 | # string tuple
36 | pd.testing.assert_index_equal(create_time_index((start, end)), expected)
37 |
38 | # datatime tuple
39 | pd.testing.assert_index_equal(create_time_index((start_dt, end_dt)), expected)
40 |
41 | # timestamp tuple
42 | pd.testing.assert_index_equal(create_time_index((start_ts, end_ts)), expected)
43 |
44 | # mixed tuple
45 | pd.testing.assert_index_equal(create_time_index((start, end_dt)), expected)
46 |
47 | # DatetimeIndex returned unchanged
48 | pd.testing.assert_index_equal(create_time_index(expected), expected)
49 |
50 |
51 | def test_create_composite_source():
52 | cat_terminology = "IPCC2006_PRIMAP"
53 |
54 | main_categories = ["1.A", "1.B.2", "2.A", "M.AG.ELV", "M.LULUCF", "4"]
55 | FGAS_categories = ["2"]
56 |
57 | native_entities = ["CO2", "CH4", "N2O", "SF6"]
58 | GWP_entities = ["HFCS"]
59 | GWPs = ["AR6GWP100"]
60 | GWP_variables = [f"{entity} ({GWP})" for entity in GWP_entities for GWP in GWPs]
61 | FGAS_entities = ["SF6", *GWP_entities]
62 | FGAS_variables = ["SF6", *GWP_variables]
63 | variables = native_entities + GWP_variables
64 | # priority
65 | priorities = [
66 | {"source": "CRF 2023, 240108"},
67 | {"source": "UNFCCC NAI, 240223"},
68 | {
69 | "source": "CDIAC 2023, HISTORY",
70 | f"category ({cat_terminology})": ["1.A", "2.A"],
71 | "entity": "CO2",
72 | },
73 | {
74 | "source": "Andrew cement, HISTORY",
75 | f"category ({cat_terminology})": ["2.A"],
76 | "entity": "CO2",
77 | },
78 | {
79 | "source": "EI 2023, HISTORY",
80 | f"category ({cat_terminology})": ["1.A", "1.B.2"],
81 | "entity": "CO2",
82 | },
83 | {"source": "Houghton, HISTORY", f"category ({cat_terminology})": "M.LULUCF"},
84 | {"source": "FAOSTAT 2023, HISTORY", f"category ({cat_terminology})": ["M.AG.ELV"]},
85 | {"source": "EDGAR 8.0, HISTORY", "entity": ["CO2", "CH4", "N2O"]},
86 | {
87 | "source": "EDGAR 7.0, HISTORY",
88 | f"category ({cat_terminology})": FGAS_categories,
89 | "variable": FGAS_variables,
90 | },
91 | ]
92 |
93 | used_sources = [prio["source"] for prio in priorities]
94 | FGAS_sources = [
95 | "CRF 2023, 240108",
96 | "CRF 2022, 230510",
97 | "UNFCCC NAI, 240223",
98 | "EDGAR 7.0, HISTORY",
99 | ]
100 |
101 | result_prio_coords = {
102 | "source": {"value": "PRIMAP-test"},
103 | "scenario": {"value": "HISTORY", "terminology": "PRIMAP"},
104 | }
105 |
106 | metadata = {"references": "test-data", "contact": "test@example.xx"}
107 |
108 | input_data = primap2.open_dataset(DATA_PATH / "primap2_test_data_v2.5.1_final.nc")
109 |
110 | # we use source as priority dimension, everything else are fixed coordinates.
111 | # we have one country-specific exception for each country in the prioritization
112 | # that's likely a bit more than realistic, but let's aim high
113 | priority_definition = primap2.csg.PriorityDefinition(
114 | priority_dimensions=["source"],
115 | priorities=priorities,
116 | exclude_result=[
117 | {
118 | "entity": ["CO2", "CH4", "N2O"],
119 | f"category ({cat_terminology})": FGAS_categories,
120 | },
121 | {
122 | "entity": FGAS_entities,
123 | f"category ({cat_terminology})": main_categories,
124 | },
125 | ],
126 | )
127 |
128 | strategies_FGAS = [
129 | (
130 | {
131 | "source": FGAS_sources,
132 | "entity": FGAS_entities,
133 | f"category ({cat_terminology})": FGAS_categories,
134 | },
135 | primap2.csg.GlobalLSStrategy(),
136 | ),
137 | (
138 | {
139 | "source": FGAS_sources,
140 | "entity": FGAS_entities,
141 | f"category ({cat_terminology})": FGAS_categories,
142 | },
143 | primap2.csg.SubstitutionStrategy(),
144 | ),
145 | ]
146 |
147 | strategies_CO2CH4N2O = [
148 | (
149 | {
150 | "source": used_sources,
151 | "entity": ["CO2", "CH4", "N2O"],
152 | f"category ({cat_terminology})": main_categories,
153 | },
154 | primap2.csg.GlobalLSStrategy(),
155 | ),
156 | (
157 | {
158 | "source": used_sources,
159 | "entity": ["CO2", "CH4", "N2O"],
160 | f"category ({cat_terminology})": main_categories,
161 | },
162 | primap2.csg.SubstitutionStrategy(),
163 | ),
164 | ]
165 |
166 | strategy_definition = primap2.csg.StrategyDefinition(
167 | strategies=strategies_CO2CH4N2O + strategies_FGAS
168 | )
169 |
170 | test_time_range = ("1962", "2022") # cut a few years to make sure that works
171 | # test_limit_coords = {'entity': ['CO2', 'CH4', 'N2O']}
172 | test_limit_coords = {
173 | "variable": variables,
174 | "category": main_categories + FGAS_categories,
175 | "source": used_sources,
176 | }
177 |
178 | result = create_composite_source(
179 | input_data,
180 | priority_definition=priority_definition,
181 | strategy_definition=strategy_definition,
182 | result_prio_coords=result_prio_coords,
183 | limit_coords=test_limit_coords,
184 | time_range=test_time_range,
185 | progress_bar=None,
186 | metadata=metadata,
187 | )
188 |
189 | # remove processing info as following functions can't deal with it yet
190 | # in this case to_netcdf can't deal with the None values in processing info
191 | result = result.pr.remove_processing_info()
192 |
193 | # assert results
194 | # load comparison data
195 | comp_filename = "PRIMAP-csg-test.nc"
196 | file_to_load = DATA_PATH / comp_filename
197 | data_comp = primap2.open_dataset(file_to_load)
198 |
199 | assert_ds_aligned_equal(data_comp, result, equal_nan=True)
200 |
--------------------------------------------------------------------------------
/primap2/csg/_wrapper.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import tqdm
6 | import xarray as xr
7 |
8 | from ._compose import compose
9 | from ._models import PriorityDefinition, StrategyDefinition
10 |
11 |
12 | def set_priority_coords(
13 | ds: xr.Dataset,
14 | dims: dict[str, dict[str, str]],
15 | ) -> xr.Dataset:
16 | """Set values for priority coordinates in output dataset.
17 |
18 | Parameters
19 | ----------
20 | ds
21 | Input dataset.
22 | dims
23 | Values to be set for priority coordinates. The format is
24 | {"name": {"value": value, "terminology": terminology}}, where the
25 | terminology is optional.
26 | Examples:
27 | {"source": {"value": "PRIMAP-hist"}} sets the "source" to "PRIMAP-hist".
28 | {"area": {"value": "WORLD", "terminology": "ISO3_primap"}} adds the dimension
29 | "area (ISO3_primap)" to "WORLD".
30 | """
31 | for dim in dims:
32 | if "terminology" in dims[dim]:
33 | terminology = dims[dim]["terminology"]
34 | else:
35 | terminology = None
36 | ds = ds.pr.expand_dims(dim=dim, coord_value=dims[dim]["value"], terminology=terminology)
37 |
38 | return ds
39 |
40 |
41 | def create_composite_source(
42 | input_ds: xr.Dataset,
43 | priority_definition: PriorityDefinition,
44 | strategy_definition: StrategyDefinition,
45 | result_prio_coords: dict[str, dict[str, str]],
46 | limit_coords: dict[str, str | list[str]] | None = None,
47 | time_range: tuple[str | np.datetime64, str | np.datetime64] | pd.DatetimeIndex | None = None,
48 | metadata: dict[str, str] | None = None,
49 | progress_bar: type[tqdm.tqdm] | None = tqdm.tqdm,
50 | ) -> xr.Dataset:
51 | """Create a composite data source
52 |
53 | This is a wrapper around `primap2.csg.compose` that prepares the input data and sets result
54 | values for the priority coordinates.
55 |
56 | Parameters
57 | ----------
58 | input_ds
59 | Dataset containing all input data
60 | priority_definition
61 | Defines the priorities to select timeseries from the input data. Priorities
62 | are formed by a list of selections and are used "from left to right", where the
63 | first matching selection has the highest priority. Each selection has to specify
64 | values for all priority dimensions (so that exactly one timeseries is selected
65 | from the input data), but can also specify other dimensions. That way it is,
66 | e.g., possible to define a different priority for a specific country by listing
67 | it early (i.e. with high priority) before the more general rules which should
68 | be applied for all other countries.
69 | You can also specify the "entity" or "variable" in the selection, which will
70 | limit the rule to a specific entity or variable, respectively. For each
71 | DataArray in the input_data Dataset, the variable is its name, the entity is
72 | the value of the key `entity` in its attrs.
73 | strategy_definition
74 | Defines the filling strategies to be used when filling timeseries with other
75 | timeseries. Again, the priority is defined by a list of selections and
76 | corresponding strategies which are used "from left to right". Selections can use
77 | any dimension and don't have to apply to only one timeseries. For example, to
78 | define a default strategy which should be used for all timeseries unless
79 | something else is configured, configure an empty selection as the last
80 | (rightmost) entry.
81 | You can also specify the "entity" or "variable" in the selection, which will
82 | limit the rule to a specific entity or variable, respectively. For each
83 | DataArray in the input_data Dataset, the variable is its name, the entity is
84 | the value of the key `entity` in its attrs.
85 | result_prio_coords
86 | Defines the vales for the priority coordinates in the output dataset. As the
87 | priority coordinates differ for all input sources there is no canonical value
88 | for the result and it has to be explicitly defined.
89 | limit_coords
90 | Optional parameter to remove data for coordinate values not needed for the
91 | composition from the input data. The time coordinate is treated separately.
92 | time_range
93 | Optional parameter to limit the time coverage of the input data.
94 | Can either be a pandas `DatetimeIndex` or a tuple of `str` or `np.datetime64` in
95 | the form (year_from, year_to) where both boundaries are included in the range.
96 | Only the overlap of the supplied index or index created from the tuple with
97 | the time coordinate of the input dataset will be used.
98 | metadata
99 | Set metadata values such as title and references.
100 | progress_bar
101 | By default, show progress bars using the tqdm package during the
102 | operation. If None, don't show any progress bars. You can supply a class
103 | compatible to tqdm.tqdm's protocol if you want to customize the progress bar.
104 |
105 | Returns
106 | -------
107 | xr.Dataset with composed data according to the given priority and strategy
108 | definitions
109 | """
110 | # limit input data to these values
111 | if limit_coords is not None:
112 | if "variable" in limit_coords:
113 | variable = limit_coords.pop("variable")
114 | input_ds = input_ds[variable].pr.loc[limit_coords]
115 | else:
116 | input_ds = input_ds.pr.loc[limit_coords]
117 |
118 | # set time range according to input
119 | if time_range is not None:
120 | time_index = create_time_index(time_range)
121 | time_index = time_index.intersection(input_ds.coords["time"])
122 | input_ds = input_ds.pr.loc[{"time": time_index}]
123 |
124 | # run compose
125 | result_ds = compose(
126 | input_data=input_ds,
127 | priority_definition=priority_definition,
128 | strategy_definition=strategy_definition,
129 | progress_bar=progress_bar,
130 | )
131 |
132 | # set priority coordinates
133 | result_ds = set_priority_coords(result_ds, result_prio_coords)
134 |
135 | if metadata is not None:
136 | for key in metadata.keys():
137 | result_ds.attrs[key] = metadata[key]
138 |
139 | result_ds.pr.ensure_valid()
140 | return result_ds
141 |
142 |
143 | def create_time_index(
144 | time_range: tuple[
145 | str | np.datetime64 | datetime | pd.Timestamp, str | np.datetime64 | datetime | pd.Timestamp
146 | ]
147 | | pd.DatetimeIndex
148 | | None = None,
149 | ) -> pd.DatetimeIndex:
150 | """
151 | Unify different input options for a time range to a `pd.DatetimeIndex`.
152 |
153 | Parameters
154 | ----------
155 | time_range
156 | Can either be pandas `DatetimeIndex` or a tuple of `str` or datetime-like in
157 | the form (year_from, year_to) where both boundaries are included in the range.
158 | Only the overlap of the supplied index or index created from the tuple with
159 | the time coordinate of the input dataset will be used.
160 |
161 | Returns
162 | -------
163 | Pandas DatetimeIndex according to the time range input
164 | """
165 |
166 | if isinstance(time_range, pd.DatetimeIndex):
167 | time_index = time_range
168 | elif isinstance(time_range, tuple):
169 | time_index = pd.date_range(time_range[0], time_range[1], freq="YS", inclusive="both")
170 | else:
171 | raise ValueError("time_range must be a datetime index or a tuple")
172 |
173 | return time_index
174 |
--------------------------------------------------------------------------------