├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── main.yaml
    │   └── pypi-release.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierrc.toml
├── LICENSE
├── MANIFEST.in
├── README.md
├── carbonplan_data
    ├── __init__.py
    ├── catalogs
    │   ├── fia.yaml
    │   ├── fluxnet.yaml
    │   ├── gcp.yaml
    │   ├── gridmet.yaml
    │   ├── grids.yaml
    │   ├── maca.yaml
    │   ├── master.yaml
    │   ├── mtbs.yaml
    │   ├── nftd.yaml
    │   ├── nlcd.yaml
    │   ├── projects.yaml
    │   ├── spawnetal2020.yaml
    │   └── terraclimate.yaml
    ├── metadata.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_catalogs.py
    │   └── test_utils.py
    └── utils.py
├── ci
    └── environment.yaml
├── codecov.yml
├── pyproject.toml
├── scripts
    ├── fia
    │   ├── 00_download.ipynb
    │   ├── 01_raw_to_parquet.ipynb
    │   └── 01_raw_to_parquet_part2.ipynb
    ├── fluxnet
    │   ├── 01_raw_to_parquet.ipynb
    │   └── download.sh
    ├── gcp
    │   └── 01_raw_to_parquet.ipynb
    ├── glas
    │   └── 01_cache_glas_data.ipynb
    ├── global-biomass
    │   └── 01_biomass_to_cogs.ipynb
    ├── gridmet
    │   └── 01_gridmet_to_zarr.ipynb
    ├── grids
    │   └── make_grid.ipynb
    ├── iiasa
    │   └── 01_raw_to_parquet.ipynb
    ├── mtbs
    │   ├── 01_raw_to_cogs.ipynb
    │   ├── 02_downsampling_and_reprojection.ipynb
    │   ├── 02_mtbs_to_zarr.ipynb
    │   ├── 03_mtbs_to_zarr.ipynb
    │   ├── 04_mtbs_perims_to_raster.ipynb
    │   ├── 05_monthly_downsampling.ipynb
    │   ├── 05_monthly_mtbs_to_zarr.ipynb
    │   ├── 05_monthly_mtbs_to_zarr.py
    │   ├── 06_annual_downsampling.py
    │   ├── 06_annual_mtbs_to_zarr.py
    │   ├── README.md
    │   └── prepare.py
    ├── nftd
    │   ├── 00_download.ipynb
    │   ├── 01_nftd_to_cogs.ipynb
    │   └── 02_downsampling_and_reprojection.ipynb
    ├── nlcd
    │   ├── 00_download.ipynb
    │   ├── 01_nlcd_to_cogs.ipynb
    │   ├── 02_downsampling_and_reprojection.ipynb
    │   └── README.md
    ├── prism
    │   ├── 00_download.ipynb
    │   ├── 01_prism_to_cogs.ipynb
    │   └── 02_downsample_and_reproject.ipynb
    ├── statsgo
    │   └── 01_raw_to_tif.py
    ├── terraclimate
    │   ├── 01_terraclimate_aux_fileds_to_zarr.ipynb
    │   ├── 01_terraclimate_to_zarr3.ipynb
    │   ├── 02_terraclimate_regrid.ipynb
    │   └── 02_terraclimate_to_fiaplots.ipynb
    └── worldclim
    │   └── 01_raw_to_zarr.ipynb
└── sources.yaml


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "github-actions"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "monthly"
 7 |     groups:
 8 |       actions:
 9 |         patterns:
10 |           - "*"
11 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.ref }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   test:
15 |     name: test-py${{ matrix.python }}-${{matrix.CARBONPLAN_DATA}}
16 |     runs-on: ubuntu-latest
17 |     defaults:
18 |       run:
19 |         shell: bash -l {0}
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python: ["3.9", "3.10", "3.11", "3.12"]
24 |         CARBONPLAN_DATA: ["gs://carbonplan-data"]
25 |     steps:
26 |       - uses: actions/checkout@v4
27 |       - name: Install Conda environment from environment.yml
28 |         uses: mamba-org/setup-micromamba@v2
29 |         with:
30 |           # environment-file is not assumed anymore
31 |           environment-file: ci/environment.yaml
32 |           create-args: >-
33 |             python=${{ matrix.python-version }}
34 |       - name: Install package
35 |         run: |
36 |           python -m pip install -e .
37 |       - name: Conda list information
38 |         run: |
39 |           conda env list
40 |           conda list
41 |       - name: Run tests
42 |         env:
43 |           CARBONPLAN_DATA: ${{ matrix.CARBONPLAN_DATA }}
44 |         run: |
45 |           python -m pytest
46 | 


--------------------------------------------------------------------------------
/.github/workflows/pypi-release.yaml:
--------------------------------------------------------------------------------
  1 | name: Build and Upload carbonplan_data to PyPI
  2 | on:
  3 |   release:
  4 |     types:
  5 |       - published
  6 |   # Runs for pull requests should be disabled other than for testing purposes
  7 |   # pull_request:
  8 |   #   branches:
  9 |   #     - main
 10 | 
 11 | permissions:
 12 |   contents: read
 13 | 
 14 | jobs:
 15 |   build-artifacts:
 16 |     runs-on: ubuntu-latest
 17 |     if: github.repository == 'carbonplan/data'
 18 |     steps:
 19 |       - uses: actions/checkout@v4
 20 |         with:
 21 |           fetch-depth: 0
 22 |       - uses: actions/setup-python@v5.4.0
 23 |         name: Install Python
 24 |         with:
 25 |           python-version: "3.11"
 26 | 
 27 |       - name: Install dependencies
 28 |         run: |
 29 |           python -m pip install --upgrade pip
 30 |           python -m pip install build twine
 31 |           git clean -xdf
 32 |           git restore -SW .
 33 | 
 34 |       # This step is only necessary for testing purposes and for TestPyPI
 35 |       - name: Fix up version string for TestPyPI
 36 |         if: ${{ !startsWith(github.ref, 'refs/tags') }}
 37 |         run: |
 38 |           # Change setuptools-scm local_scheme to "no-local-version" so the
 39 |           # local part of the version isn't included, making the version string
 40 |           # compatible with PyPI.
 41 |           sed --in-place "s/node-and-date/no-local-version/g" pyproject.toml
 42 | 
 43 |       - name: Build tarball and wheels
 44 |         run: |
 45 |           python -m build
 46 |       - name: Check built artifacts
 47 |         run: |
 48 |           python -m twine check --strict dist/*
 49 |           pwd
 50 |           if [ -f dist/carbonplan_data-0.0.0.tar.gz ]; then
 51 |             echo "❌ INVALID VERSION NUMBER"
 52 |             exit 1
 53 |           else
 54 |             echo "✅ Looks good"
 55 |           fi
 56 |       - uses: actions/upload-artifact@v4
 57 |         with:
 58 |           name: releases
 59 |           path: dist
 60 | 
 61 |   test-built-dist:
 62 |     needs: build-artifacts
 63 |     runs-on: ubuntu-latest
 64 |     steps:
 65 |       - uses: actions/setup-python@v5.4.0
 66 |         name: Install Python
 67 |         with:
 68 |           python-version: "3.11"
 69 |       - uses: actions/download-artifact@v4
 70 |         with:
 71 |           name: releases
 72 |           path: dist
 73 |       - name: List contents of built dist
 74 |         run: |
 75 |           ls -ltrh
 76 |           ls -ltrh dist
 77 |       - name: Verify the built dist/wheel is valid
 78 |         run: |
 79 |           python -m pip install --upgrade pip
 80 |           python -m pip install dist/carbonplan_data*.whl
 81 |           python -c "from carbonplan_data.utils import get_versions; print(get_versions())"
 82 |       - name: Publish package to TestPyPI
 83 |         uses: pypa/gh-action-pypi-publish@v1.12.4
 84 |         with:
 85 |           password: ${{ secrets.TEST_PYPI_TOKEN }}
 86 |           repository-url: https://test.pypi.org/legacy/
 87 |           # verbose: true
 88 | 
 89 |   upload-to-pypi:
 90 |     needs: test-built-dist
 91 |     if: github.event_name == 'release'
 92 |     runs-on: ubuntu-latest
 93 |     steps:
 94 |       - uses: actions/download-artifact@v4
 95 |         with:
 96 |           name: releases
 97 |           path: dist
 98 |       - name: Publish package to PyPI
 99 |         uses: pypa/gh-action-pypi-publish@v1.12.4
100 |         with:
101 |           password: ${{ secrets.PYPI_TOKEN }}
102 |           # verbose: true
103 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | processed/
  2 | .DS_Store
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | carbonplan_data/_version.py
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | 
135 | # web
136 | .next
137 | node_modules
138 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |   autoupdate_schedule: monthly
 3 | 
 4 | repos:
 5 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 6 |     rev: v5.0.0
 7 |     hooks:
 8 |       - id: trailing-whitespace
 9 |       - id: end-of-file-fixer
10 |       - id: check-docstring-first
11 |       - id: check-json
12 |       - id: check-yaml
13 |       - id: debug-statements
14 |       - id: mixed-line-ending
15 | 
16 |   - repo: https://github.com/astral-sh/ruff-pre-commit
17 |     rev: "v0.11.8"
18 |     hooks:
19 |       - id: ruff
20 |         args: ["--fix"]
21 |       - id: ruff-format
22 | 
23 |   - repo: https://github.com/pre-commit/mirrors-prettier
24 |     rev: v4.0.0-alpha.8
25 |     hooks:
26 |       - id: prettier
27 | 
28 |   - repo: https://github.com/kynan/nbstripout
29 |     rev: 0.8.1
30 |     hooks:
31 |       - id: nbstripout
32 | 
33 |   - repo: https://github.com/pre-commit/mirrors-prettier
34 |     rev: "v4.0.0-alpha.8"
35 |     hooks:
36 |       - id: prettier
37 |         name: prettier-markdown
38 |         entry: prettier --write --parser mdx
39 |         files: "\\.(\
40 |           |md|markdown|mdown|mkdn\
41 |           |mdx\
42 |           )$"
43 | 


--------------------------------------------------------------------------------
/.prettierrc.toml:
--------------------------------------------------------------------------------
1 | tabWidth = 2
2 | semi = false
3 | singleQuote = true
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 carbonplan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include carbonplan_data/catalogs *
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="left" >
 2 | <a href='https://carbonplan.org'>
 3 | <picture>
 4 |   <source media="(prefers-color-scheme: dark)" srcset="https://carbonplan-assets.s3.amazonaws.com/monogram/light-small.png">
 5 |   <img alt="CarbonPlan monogram." height="48" src="https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png">
 6 | </picture>
 7 | </a>
 8 | </p>
 9 | 
10 | # carbonplan / data
11 | 
12 | **data catalog and curation**
13 | 
14 | [![CI](https://github.com/carbonplan/data/actions/workflows/main.yaml/badge.svg)](https://github.com/carbonplan/data/actions/workflows/main.yaml)
15 | [![PyPI](https://img.shields.io/pypi/v/carbonplan-data)](https://pypi.org/project/carbonplan-data/)
16 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
17 | 
18 | This repository includes our main data catalog as well as our pre-processing utilities.
19 | 
20 | ## install
21 | 
22 | ```shell
23 | python -m pip install carbonplan[data]
24 | ```
25 | 
26 | ## usage
27 | 
28 | The CarbonPlan data archives are currently mirrored on Google Cloud Storage (US-Central) and
29 | AWS (us-west-2). Set the `CARBONPLAN_DATA` environment variable before using the
30 | Intake catalog below:
31 | 
32 | ```shell
33 | # google (us-central)
34 | export CARBONPLAN_DATA="https://storage.googleapis.com/carbonplan-data"
35 | # or
36 | # s3 (us-west-2)
37 | export CARBONPLAN_DATA="https://carbonplan-data.s3.us-west-2.amazonaws.com"
38 | ```
39 | 
40 | ```python
41 | # open the top level catalog
42 | from carbonplan.data import cat
43 | 
44 | # extract an entry as a Dask-backed Xarray Dataset
45 | cat.mtbs["raster"](region="conus", resolution="4000m").to_dask()
46 | ```
47 | 
48 | ---
49 | 
50 | ## developer documentation
51 | 
52 | To run the unit and integration tests for this API, run:
53 | 
54 | ```shell
55 | $ pytest -v
56 | ```
57 | 
58 | Catalog entries scan be marked as either _skip_ or _xfail_ by setting the `ci` key in the metadata dictionary:
59 | 
60 | ```yaml
61 | foo:
62 |   description: "skip this entry in the CI tests"
63 |   metadata:
64 |     ci: skip
65 | ```
66 | 
67 | ## license
68 | 
69 | All the code in this repository is [MIT](https://choosealicense.com/licenses/mit/)-licensed. When possible, the data is licensed using the [CC-BY-4.0](https://choosealicense.com/licenses/cc-by-4.0/) license. We include attribution and additional license information for third party datasets, and we request that you also maintain that attribution if using this data.
70 | 
71 | ## about us
72 | 
73 | CarbonPlan is a nonprofit organization that uses data and science for climate action. We aim to improve the transparency and scientific integrity of climate solutions with open data and tools. Find out more at [carbonplan.org](https://carbonplan.org/) or get in touch by [opening an issue](https://github.com/carbonplan/data/issues/new) or [sending us an email](mailto:hello@carbonplan.org).
74 | 


--------------------------------------------------------------------------------
/carbonplan_data/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import os
 4 | import pathlib
 5 | from importlib.metadata import PackageNotFoundError as _PackageNotFoundError
 6 | from importlib.metadata import version as _version
 7 | 
 8 | import intake
 9 | 
10 | try:
11 |     version = _version(__name__)
12 | except _PackageNotFoundError:
13 |     # package is not installed
14 |     version = "unknown"
15 | __version__ = version
16 | 
17 | CATALOG_DIR_PATH = pathlib.Path(__file__)
18 | MASTER_CATALOG_PATH = str(CATALOG_DIR_PATH.parent / "catalogs/master.yaml")
19 | KNOWN_DATA_LOCATIONS = [
20 |     "https://storage.googleapis.com/carbonplan-data",
21 |     "https://carbonplan-data.s3.us-west-2.amazonaws.com",
22 | ]
23 | 
24 | # open master catalog
25 | if "CARBONPLAN_DATA" not in os.environ:
26 |     os.environ["CARBONPLAN_DATA"] = "https://storage.googleapis.com/carbonplan-data"
27 | 
28 | cat = intake.open_catalog(MASTER_CATALOG_PATH)
29 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/fia.yaml:
--------------------------------------------------------------------------------
 1 | plugins:
 2 |   source:
 3 |     - module: intake_parquet
 4 | 
 5 | # TODOs:
 6 | #  - remove csvs in gcp.
 7 | #  - add aux data source(s)
 8 | 
 9 | sources:
10 |   raw_table:
11 |     metadata:
12 |       title: Forest Inventory Analysis (raw)
13 |       summary: Data on status and trends in forest location, health, growth, mortality, and production.
14 |       description: |
15 |         The Forest Inventory and Analysis dataset is a nationwide survey of the forest assets of
16 |         the United States. The Forest Inventory and Analysis (FIA) research program has been in
17 |         existence since mandated by Congress in 1928. FIA's primary objective is to determine
18 |         the extent, condition, volume, growth, and use of trees on the Nation's forest land.
19 |       tags: [forests]
20 |       type: application/parquet
21 |       license: Public domain
22 |       providers:
23 |         - name: USDA Forest Service
24 |           description: Data provided by the United States Department of Agriculture Forest Service.
25 |           url: https://www.fia.fs.fed.us/
26 |     driver: parquet
27 |     parameters:
28 |       name:
29 |         description: FIA data product name
30 |         type: str
31 |         default: plot
32 |         allowed:
33 |           [
34 |             boundary,
35 |             cond,
36 |             cond_dwm_calc,
37 |             county,
38 |             dwm_coarse_woody_debris,
39 |             dwm_duff_litter_fuel,
40 |             dwm_fine_woody_debris,
41 |             dwm_microplot_fuel,
42 |             dwm_residual_pile,
43 |             dwm_transect_segment,
44 |             dwm_visit,
45 |             grnd_cvr,
46 |             invasive_subplot_spp,
47 |             lichen_lab,
48 |             lichen_plot_summary,
49 |             lichen_visit,
50 |             ozone_biosite_summary,
51 |             ozone_plot,
52 |             ozone_plot_summary,
53 |             ozone_species_summary,
54 |             ozone_validation,
55 |             ozone_visit,
56 |             p2veg_subplot_spp,
57 |             p2veg_subp_structure,
58 |             plot,
59 |             plotgeom,
60 |             plotsnap,
61 |             plot_regen,
62 |             pop_estn_unit,
63 |             pop_eval,
64 |             pop_eval_attribute,
65 |             pop_eval_grp,
66 |             pop_eval_typ,
67 |             pop_plot_stratum_assgn,
68 |             pop_stratum,
69 |             seedling,
70 |             seedling_regen,
71 |             sitetree,
72 |             soils_erosion,
73 |             soils_lab,
74 |             soils_sample_loc,
75 |             soils_visit,
76 |             subplot,
77 |             subplot_regen,
78 |             subp_cond,
79 |             subp_cond_chng_mtrx,
80 |             survey,
81 |             tree,
82 |             tree_grm_begin,
83 |             tree_grm_component,
84 |             tree_grm_estn,
85 |             tree_grm_midpt,
86 |             tree_grm_threshold,
87 |             tree_regional_biomass,
88 |             tree_woodland_stems,
89 |             veg_plot_species,
90 |             veg_quadrat,
91 |             veg_subplot,
92 |             veg_subplot_spp,
93 |             veg_visit,
94 |           ]
95 |     args:
96 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/fia/{{ name }}.parquet"
97 |       engine: "pyarrow"
98 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/fluxnet.yaml:
--------------------------------------------------------------------------------
 1 | plugins:
 2 |   source:
 3 |     - module: intake_parquet
 4 | 
 5 | sources:
 6 |   raw_aux:
 7 |     metadata:
 8 |       title: FLUXNET Auxiliary Data (raw)
 9 |       summary: Supporting metadata for the FLUXNET dataset.
10 |       description: |
11 |         The preparation of this FLUXNET Dataset has been possible thanks only to the efforts of
12 |         many scientists and technicians around the world and the coordination among teams from
13 |         regional networks. The previous versions of FLUXNET Dataset releases are the FLUXNET
14 |         Marconi Dataset (2000) and the FLUXNET LaThuile Dataset (2007). The FLUXNET2015 Dataset
15 |         includes several improvements to the data quality control protocols and the data
16 |         processing pipeline. Examples include close interaction with tower teams to improve data
17 |         quality, new methods for uncertainty quantification, use of reanalysis data to fill long
18 |         gaps of micrometeorological variable records, among others (see the data processing
19 |         pipeline page for details).
20 |       tags: [climate, carbon]
21 |       type: application/parquet
22 |       license: Creative Commons Attribution 4.0 International
23 |       providers:
24 |         - name: FLUXNET
25 |           description: |
26 |             FLUXNET is organized through the Regional Networks that contribute data to FLUXNET
27 |             datasets available at the FLUXNET webiste (https://fluxnet.org/), hosted at the
28 |             Lawrence Berkeley National Laboratory (USA).
29 |           url: https://fluxnet.org/
30 |     parameters:
31 |       station:
32 |         description: fluxnet station code
33 |         type: str
34 |         default: it-noe
35 |       kind:
36 |         description: fluxnet data stream
37 |         type: str
38 |         default: auxmeteo
39 |         allowed: [auxmeteo, auxnee]
40 |     driver: parquet
41 |     args:
42 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/fluxnet/{{ station }}_{{ kind }}.parquet"
43 | 
44 |   raw_fullset:
45 |     metadata:
46 |       title: FLUXNET FULLSET Data (raw)
47 |       summary: Global network of micrometeorological flux measurement measuring carbon, energy and water cycles.
48 |       description: |
49 |         The preparation of this FLUXNET Dataset has been possible thanks only to the efforts of
50 |         many scientists and technicians around the world and the coordination among teams from
51 |         regional networks. The previous versions of FLUXNET Dataset releases are the FLUXNET
52 |         Marconi Dataset (2000) and the FLUXNET LaThuile Dataset (2007). The FLUXNET2015 Dataset
53 |         includes several improvements to the data quality control protocols and the data
54 |         processing pipeline. Examples include close interaction with tower teams to improve data
55 |         quality, new methods for uncertainty quantification, use of reanalysis data to fill long
56 |         gaps of micrometeorological variable records, among others (see the data processing
57 |         pipeline page for details).
58 |       tags: [climate, carbon]
59 |       type: application/parquet
60 |       license: Creative Commons Attribution 4.0 International
61 |       providers:
62 |         - name: FLUXNET
63 |           description: |
64 |             FLUXNET is organized through the Regional Networks that contribute data to FLUXNET
65 |             datasets available at the FLUXNET webiste (https://fluxnet.org/), hosted at the
66 |             Lawrence Berkeley National Laboratory (USA).
67 |           url: https://fluxnet.org/
68 |     parameters:
69 |       station:
70 |         description: fluxnet station code
71 |         type: str
72 |         default: it-noe
73 |       kind:
74 |         description: fluxnet data stream
75 |         type: str
76 |         default: fullset
77 |         allowed: [erai, fullset]
78 |       freq:
79 |         description: temporal frequency
80 |         type: str
81 |         default: dd
82 |         allowed: [dd, hh, mm, ww, yy]
83 |     driver: parquet
84 |     args:
85 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/fluxnet/{{ station }}_{{ kind }}_{{ freq }}.parquet"
86 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/gcp.yaml:
--------------------------------------------------------------------------------
 1 | sources:
 2 |   raw_table:
 3 |     metadata:
 4 |       title: Global Carbon Project
 5 |       summary: Timeseries of the global carbon budget and carbon emissions.
 6 |       description: |
 7 |         The annually updated Global Carbon Budget produced by the Global Carbon Project.
 8 |         All datasets and modeling output to complete the Global Carbon Budget 2019 are
 9 |         described in detail in
10 |         [Friedlingstein et al. (2019)](https://doi.org/10.5194/essd-11-1783-2019).
11 |       tags: [carbon]
12 |       type: application/parquet
13 |       license: The use of data is conditional on citing the original data sources.
14 |       providers:
15 |         - name: Integrated Carbon Observation System
16 |           description: |
17 |             The Integrated Carbon Observation System, ICOS, is a European-wide greenhouse gas
18 |             research infrastructure. ICOS produces standardised data on greenhouse gas
19 |             concentrations in the atmosphere, as well as on carbon fluxes between the
20 |             atmosphere, the earth and oceans.
21 |           url: https://www.icos-cp.eu/global-carbon-budget-2019
22 |     parameters:
23 |       name:
24 |         description: name of GCB dataset
25 |         type: str
26 |         default: global_carbon_budget
27 |         allowed:
28 |           [
29 |             global_carbon_budget,
30 |             fossil_emissions_by_fuel_type,
31 |             land_use_change_emissions,
32 |             ocean_sink,
33 |             terrestrial_sink,
34 |             historical_budget,
35 |             consumption_emissions,
36 |             territorial_emissions,
37 |             transfer_emissions,
38 |           ]
39 |     driver: parquet
40 |     args:
41 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/gcp/{{ name }}.parquet"
42 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/gridmet.yaml:
--------------------------------------------------------------------------------
 1 | plugins:
 2 |   source:
 3 |     - module: intake_xarray
 4 | 
 5 | sources:
 6 |   raw_gridmet:
 7 |     metadata:
 8 |       title: gridMET (raw)
 9 |       summary: High-resolution surface meteorologicaldata covering the conus US from 1979-yesterday.
10 |       description: |
11 |         gridMET is a dataset of daily high-spatial resolution (~4-km, 1/24th degree) surface
12 |         meteorological data covering the contiguous US from 1979-yesterday.
13 |         These data can provide important inputs for ecological, agricultural, and
14 |         hydrological models. These data are updated daily.  gridMET is the preferred naming
15 |         convention for these data; however, the data are also known as cited as METDATA.
16 |       tags: [climate]
17 |       type: application/netcdf
18 |       license: Public Domain Mark 1.0
19 |       providers:
20 |         - name: Climatology Lab, University of California, Merced
21 |           description: Data provided by Dr. John Abatzoglou's Climatology Lab at the University of California, Merced.
22 |           url: http://www.climatologylab.org
23 |     driver: opendap
24 |     parameters:
25 |       variable:
26 |         description: climate variable
27 |         type: str
28 |         default: pr
29 |         allowed:
30 |           [
31 |             "pr",
32 |             "tmmn",
33 |             "tmmx",
34 |             "rmax",
35 |             "rmin",
36 |             "sph",
37 |             "srad",
38 |             "th",
39 |             "vs",
40 |             "bi",
41 |             "fm100",
42 |             "fm1000",
43 |             "erc",
44 |             "pdsi",
45 |             "etr",
46 |             "pet",
47 |             "vpd",
48 |           ]
49 |       year:
50 |         description: year
51 |         type: int
52 |         default: 2000
53 |     args:
54 |       urlpath: http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/{{ variable }}/{{ variable }}_{{ '%04d' % year }}.nc
55 |       auth: null
56 |       chunks:
57 |         lat: 585
58 |         lon: 1386
59 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/grids.yaml:
--------------------------------------------------------------------------------
 1 | plugins:
 2 |   source:
 3 |     - module: intake_xarray
 4 | 
 5 | sources:
 6 |   albers4k:
 7 |     metadata:
 8 |       title: Albers Equal Area 4km grid
 9 |       summary: Grid definition for the CONUS/AK 4km Albers Equal Area study area
10 |       description: |
11 |         This dataset defines the grid used for many of our Albers Equal Area 4km data.
12 |         products. It is particularly useful as a target grid when regridding/reprojecting
13 |          other datasets to this (common) grid.
14 |       tags: [meta]
15 |       type: application/zarr
16 |       license: Creative Commons Attribution 4.0 International
17 |       providers:
18 |         - name: CarbonPlan
19 |           description: |
20 |             CarbonPlan is a registered non-profit public benefit corporation working on
21 |             the science and data of carbon removal.
22 |           url: https://carbonplan.org
23 |     parameters:
24 |       region:
25 |         description: conus or ak
26 |         type: str
27 |         default: conus
28 |         allowed: [conus, ak]
29 |     driver: zarr
30 |     args:
31 |       urlpath: "{{env(CARBONPLAN_DATA)}}/processed/grids/{{ region }}/4000m/domain.zarr/"
32 |       consolidated: True
33 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/maca.yaml:
--------------------------------------------------------------------------------
 1 | plugins:
 2 |   source:
 3 |     - module: intake_xarray
 4 | 
 5 | sources:
 6 |   raw_maca:
 7 |     metadata:
 8 |       title: MACA (raw)
 9 |       summary: Historical and future climate projections derived from CMIP5 using the MACA statistical downscaling technique.
10 |       description: |
11 |         Multivariate Adaptive Constructed Analogs (MACA) is a statistical method for downscaling
12 |         Global Climate Models (GCMs) from their native coarse resolution to a higher spatial
13 |         resolution that captures reflects observed patterns of daily near-surface meteorology and
14 |         simulated changes in GCMs experiments.
15 |       tags: [climate]
16 |       type: application/netcdf
17 |       license: Creative Commons CC0 1.0 Universal
18 |       providers:
19 |         - name: Climatology Lab, University of California, Merced
20 |           description: Data provided by Dr. John Abatzoglou's Climatology Lab at the University of California, Merced.
21 |           url: http://www.climatologylab.org
22 |     driver: opendap
23 |     parameters:
24 |       variable:
25 |         description: climate variable
26 |         type: str
27 |         default: pr
28 |         allowed:
29 |           [
30 |             "huss",
31 |             "pr",
32 |             "rhsmin",
33 |             "rhsmax",
34 |             "rsds",
35 |             "tasmax",
36 |             "tasmin",
37 |             "uas",
38 |             "vas",
39 |             "vpd",
40 |           ]
41 |       gcm:
42 |         description: climate model
43 |         type: str
44 |         default: IPSL-CM5A-LR
45 |         # allowed: TODO: add list of defaults
46 |       scenario:
47 |         description: climate scenario
48 |         type: str
49 |         default: historical_1950_2005
50 |         allowed: ["historical_1950_2005", "rcp45_2006_2099", "rcp85_2006_2099"]
51 |     args:
52 |       urlpath: http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_macav2metdata_{{ variable }}_{{ gcm }}_r1i1p1_{{ scenario }}_CONUS_daily.nc
53 |       auth: null
54 |       chunks:
55 |         lat: 585
56 |         lon: 1386
57 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/master.yaml:
--------------------------------------------------------------------------------
  1 | description: "CarbonPlan Master Data Catalog"
  2 | sources:
  3 |   gridmet:
  4 |     name: "gridMET"
  5 |     description: "Gridded daily surface meteorological data covering the continental US"
  6 |     metadata:
  7 |       tags: [climate]
  8 |     driver: intake.catalog.local.YAMLFileCatalog
  9 |     args:
 10 |       path: "{{CATALOG_DIR}}/gridmet.yaml"
 11 | 
 12 |   terraclimate:
 13 |     name: "TerraClimate"
 14 |     description: "Global gridded monthly climate and hydroclimate data from 1958-present."
 15 |     metadata:
 16 |       tags: [climate]
 17 |     driver: intake.catalog.local.YAMLFileCatalog
 18 |     args:
 19 |       path: "{{CATALOG_DIR}}/terraclimate.yaml"
 20 | 
 21 |   maca:
 22 |     name: "MACA"
 23 |     description: "Statistically downscaled climate data using the MACA method."
 24 |     metadata:
 25 |       tags: [climate]
 26 |     driver: intake.catalog.local.YAMLFileCatalog
 27 |     args:
 28 |       path: "{{CATALOG_DIR}}/maca.yaml"
 29 | 
 30 |   fia:
 31 |     name: "Forest Inventory Analysis (FIA)"
 32 |     description: "Catalog for data from Forest Inventory Analysis (FIA) database"
 33 |     metadata:
 34 |       tags: [forests]
 35 |     driver: intake.catalog.local.YAMLFileCatalog
 36 |     args:
 37 |       path: "{{CATALOG_DIR}}/fia.yaml"
 38 | 
 39 |   fluxnet:
 40 |     name: "FLUXNET"
 41 |     description: "Catalog for data from the FLUXNET dataset"
 42 |     metadata:
 43 |       tags: [climate, carbon]
 44 |     driver: intake.catalog.local.YAMLFileCatalog
 45 |     args:
 46 |       path: "{{CATALOG_DIR}}/fluxnet.yaml"
 47 | 
 48 |   gcp:
 49 |     name: "Global Carbon Project (GCP)"
 50 |     description: "Catalog for data from the Global Carbon Project"
 51 |     metadata:
 52 |       tags: [climate, carbon]
 53 |     driver: intake.catalog.local.YAMLFileCatalog
 54 |     args:
 55 |       path: "{{CATALOG_DIR}}/gcp.yaml"
 56 | 
 57 |   mtbs:
 58 |     name: "Monitoring Trends in Burn Severity (MTBS)"
 59 |     description: "Catalog for data from the Monitoring Trends in Burn Severity (MTBS) dataset"
 60 |     metadata:
 61 |       tags: [forests]
 62 |     driver: intake.catalog.local.YAMLFileCatalog
 63 |     args:
 64 |       path: "{{CATALOG_DIR}}/mtbs.yaml"
 65 | 
 66 |   nftd:
 67 |     name: "National Forest Type Database (NFTD)"
 68 |     description: "Catalog for data from the National Forest Type Database (NFTD)"
 69 |     metadata:
 70 |       tags: [forests]
 71 |     driver: intake.catalog.local.YAMLFileCatalog
 72 |     args:
 73 |       path: "{{CATALOG_DIR}}/nftd.yaml"
 74 | 
 75 |   nlcd:
 76 |     name: "National Land Cover Database (NLCD)"
 77 |     description: "Catalog for data from the National Land Cover Database (NLCD)"
 78 |     metadata:
 79 |       tags: [forests]
 80 |     driver: intake.catalog.local.YAMLFileCatalog
 81 |     args:
 82 |       path: "{{CATALOG_DIR}}/nlcd.yaml"
 83 | 
 84 |   projects:
 85 |     name: "CarbonPlan Project Reports"
 86 |     description: "CarbonPlan Projects Dataset Catalog"
 87 |     metadata:
 88 |       tags: [carbon]
 89 |     driver: intake.catalog.local.YAMLFileCatalog
 90 |     args:
 91 |       path: "{{CATALOG_DIR}}/projects.yaml"
 92 | 
 93 |   spawnetal2020:
 94 |     name: "Global Above- and Belowground Biomass"
 95 |     description: "Catalog for data from Global Aboveground and Belowground Biomass Carbon Density Maps for the Year 2010 from Spawn et al (2020)"
 96 |     metadata:
 97 |       tags: [forests]
 98 |     driver: intake.catalog.local.YAMLFileCatalog
 99 |     args:
100 |       path: "{{CATALOG_DIR}}/spawnetal2020.yaml"
101 | 
102 |   grids:
103 |     name: "Project Grids"
104 |     description: "Catalog grid files and domain definitions."
105 |     metadata:
106 |       tags: [meta]
107 |     driver: intake.catalog.local.YAMLFileCatalog
108 |     args:
109 |       path: "{{CATALOG_DIR}}/grids.yaml"
110 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/mtbs.yaml:
--------------------------------------------------------------------------------
  1 | plugins:
  2 |   source:
  3 |     - module: intake_xarray
  4 | 
  5 | sources:
  6 |   raw_raster:
  7 |     metadata:
  8 |       title: MTBS (raw)
  9 |       summary: Annual burn severity mosaics for the continental United States and Alaska.
 10 |       description: |
 11 |         Monitoring Trends in Burn Severity (MTBS) is an interagency program whose goal is to
 12 |         consistently map the burn severity and extent of large fires across all lands of the
 13 |         United States from 1984 to present. This includes all fires 1000 acres or greater in
 14 |         the western United States and 500 acres or greater in the eastern Unites States. The
 15 |         extent of coverage includes the continental U.S., Alaska, Hawaii and Puerto Rico.
 16 | 
 17 |         The burn severity mosaics consist of thematic raster images of MTBS burn severity
 18 |         classes for all currently completed MTBS fires for the continental United States,
 19 |         Alaska, Hawaii and Puerto Rico. Mosaicked burn severity images are compiled annually
 20 |         for each year by US State and the continental United States.
 21 |       tags: [forests]
 22 |       type: image/tiff; application=geotiff; profile=cloud-optimized
 23 |       license: Public Domain
 24 |       providers:
 25 |         - name: Monitoring Trends in Burn Severity
 26 |           description: Monitoring Trends in Burn Severity (MTBS) is an interagency program that includes the USGS, NASA, USFS, USDI, and USDA.
 27 |           url: https://www.mtbs.gov/
 28 |     parameters:
 29 |       year:
 30 |         description: year
 31 |         type: int
 32 |         default: 1984
 33 |       region:
 34 |         description: mtbs region
 35 |         type: str
 36 |         default: conus
 37 |         allowed: [conus, ak]
 38 |     driver: rasterio
 39 |     args:
 40 |       # urlpath: "https://storage.googleapis.com/carbonplan-data/raw/mtbs/{{ region }}/30m/{{ '%d' % year }}.tif"
 41 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/mtbs/{{ region }}/30m/{{ '%d' % year }}.tif"
 42 |       chunks: { "y": 5120, "x": 5120 }
 43 | 
 44 |   raster:
 45 |     metadata:
 46 |       title: MTBS (processed)
 47 |       summary: Annual burn severity mosaics for the continental United States and Alaska.
 48 |       description: |
 49 |         Monitoring Trends in Burn Severity (MTBS) is an interagency program whose goal is to
 50 |         consistently map the burn severity and extent of large fires across all lands of the
 51 |         United States from 1984 to present. This includes all fires 1000 acres or greater in
 52 |         the western United States and 500 acres or greater in the eastern Unites States.
 53 | 
 54 |         The burn severity mosaics consist of thematic raster images of MTBS burn severity
 55 |         classes for all currently completed MTBS fires for the continental United States,
 56 |         Alaska, Hawaii and Puerto Rico. Mosaicked burn severity images are compiled annually
 57 |         for each year by US State and the continental United States.
 58 | 
 59 |         These data have been processed to CarbonPlan's CONUS and Alaska study domains.
 60 |       tags: [forests]
 61 |       type: application/zarr
 62 |       license: Creative Commons Attribution 4.0 International
 63 |       providers:
 64 |         - name: Monitoring Trends in Burn Severity
 65 |           description: Monitoring Trends in Burn Severity (MTBS) is an interagency program that includes the USGS, NASA, USFS, USDI, and USDA.
 66 |           url: https://www.mtbs.gov/
 67 |     parameters:
 68 |       region:
 69 |         description: conus or ak
 70 |         type: str
 71 |         default: conus
 72 |         allowed: [conus, ak]
 73 |       resolution:
 74 |         description: Pixel resolution in meters
 75 |         type: str
 76 |         default: "4000m"
 77 |         allowed: ["4000m"]
 78 |     driver: zarr
 79 |     args:
 80 |       urlpath: "{{env(CARBONPLAN_DATA)}}/processed/mtbs/{{ region }}/{{ resolution }}/raster.zarr"
 81 |       consolidated: True
 82 | 
 83 |   rasterized_perims:
 84 |     metadata:
 85 |       title: MTBS Rasterized Fire Perimeters (processed)
 86 |       summary: Monthly burned area rasters for the continental United States and Alaska.
 87 |       description: |
 88 |         Monitoring Trends in Burn Severity (MTBS) is an interagency program whose goal is to
 89 |         consistently map the burn severity and extent of large fires across all lands of the
 90 |         United States from 1984 to present.
 91 | 
 92 |         TODO...
 93 | 
 94 |       tags: [forests]
 95 |       type: image/tiff; application=geotiff; profile=cloud-optimized
 96 |       license: Creative Commons Attribution 4.0 International
 97 |       providers:
 98 |         - name: Monitoring Trends in Burn Severity
 99 |           description: Monitoring Trends in Burn Severity (MTBS) is an interagency program that includes the USGS, NASA, USFS, USDI, and USDA.
100 |           url: https://www.mtbs.gov/
101 |     parameters:
102 |       region:
103 |         description: conus or ak
104 |         type: str
105 |         default: conus
106 |         allowed: [conus, ak]
107 |       resolution:
108 |         description: Pixel resolution in meters
109 |         type: str
110 |         default: "30m"
111 |         allowed: ["30m"]
112 |       size:
113 |         description: Fire size
114 |         type: str
115 |         default: "lf"
116 |         allowed: ["lf", "vlf"]
117 |       date:
118 |         description: "Year and month (format: YYYY.MM)"
119 |         type: str
120 |         default: "2018.11"
121 |     driver: rasterio
122 |     args:
123 |       urlpath: "{{env(CARBONPLAN_DATA)}}/processed/mtbs/{{ region }}/{{ resolution }}/{{ size }}_{{ date }}.tif"
124 |       chunks: { "y": 5120, "x": 5120 }
125 | 
126 |   fod_shp:
127 |     metadata:
128 |       title: MTBS Occurance (vector data)
129 |       summary: Fire occurance location dataset in vector/point format.
130 |       description: |
131 |         The fire occurrence location dataset is a vector point ESRI shapefile of the centroids of
132 |         all currently completed MTBS fires occurring in the continental United States, Alaska,
133 |         Hawaii and Puerto Rico.
134 |       tags: [fire, forests]
135 |       type: application/octet-stream
136 |       license: Public Domain
137 |       providers:
138 |         - name: Monitoring Trends in Burn Severity
139 |           description: Monitoring Trends in Burn Severity (MTBS) is an interagency program that includes the USGS, NASA, USFS, USDI, and USDA.
140 |           url: https://www.mtbs.gov/
141 |       ci: skip
142 |     driver: shapefile
143 |     args:
144 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/mtbs/mtbs_fod_pts_data/mtbs_fod_pts_DD.shp"
145 | 
146 |   perims_shp:
147 |     metadata:
148 |       title: MTBS Boundaries (vector data)
149 |       summary: Burned area boundaries data in vector/polygon format.
150 |       description: |
151 |         The burned area boundaries dataset is a vector polygon ESRI shapefile of the extent of the
152 |         burned areas of all currently completed MTBS fires for the continental United States,
153 |         Alaska, Hawaii and Puerto Rico.
154 |       tags: [forests]
155 |       type: application/octet-stream
156 |       license: Public Domain
157 |       providers:
158 |         - name: Monitoring Trends in Burn Severity
159 |           description: Monitoring Trends in Burn Severity (MTBS) is an interagency program that includes the USGS, NASA, USFS, USDI, and USDA.
160 |           url: https://www.mtbs.gov/
161 |       ci: skip
162 |     driver: shapefile
163 |     args:
164 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/mtbs/mtbs_perimeter_data/mtbs_perims_DD.shp"
165 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/nftd.yaml:
--------------------------------------------------------------------------------
 1 | plugins:
 2 |   source:
 3 |     - module: intake_xarray
 4 | 
 5 | sources:
 6 |   raw_raster:
 7 |     metadata:
 8 |       title: National Forest Type Dataset (raw)
 9 |       summary: Extent, distribution, and forest type composition of the nation’s forests.
10 |       description: |
11 |         This geospatial dataset was created by the USFS Forest Inventory and Analysis (FIA) program
12 |         and the Geospatial Technology and Applications Center (GTAC) to show the extent,
13 |         distribution, and forest type composition of the nation’s forests.
14 | 
15 |         The dataset was created by modeling forest type from FIA plot data as a function of more
16 |         than one hundred geospatially continuous predictor layers.
17 | 
18 |         This process results in a view of forest type distribution in greater detail than is
19 |         possible with the FIA plot data alone.
20 |       tags: [forests]
21 |       type: image/tiff; application=geotiff; profile=cloud-optimized
22 |       license: Public Domain
23 |       providers:
24 |         - name: USDA Forest Service
25 |           description: Data provided by the United States Department of Agriculture Forest Service.
26 |           url: https://www.fia.fs.fed.us/
27 |       ci: xfail
28 |     parameters:
29 |       option:
30 |         description: error or raster
31 |         type: str
32 |         default: raster
33 |         allowed: [error, raster]
34 |       region:
35 |         description: conus or ak
36 |         type: str
37 |         default: conus
38 |         allowed: [conus, ak]
39 |       variable:
40 |         description: foresttype or forestgroup
41 |         type: str
42 |         default: foresttype
43 |         allowed: [foresttype, forestgroup]
44 |     driver: rasterio
45 |     args:
46 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/nftd/{{ region }}_{{ variable }}/250m/{{ option }}.tif"
47 |       chunks: { "y": 5120, "x": 5120 }
48 | 
49 |   raster:
50 |     metadata:
51 |       title: National Forest Type Dataset (processed)
52 |       summary: Extent, distribution, and forest type composition of the nation’s forests.
53 |       description: |
54 |         This geospatial dataset was created by the USFS Forest Inventory and Analysis (FIA) program
55 |         and the Geospatial Technology and Applications Center (GTAC) to show the extent,
56 |         distribution, and forest type composition of the nation’s forests.
57 | 
58 |         The dataset was created by modeling forest type from FIA plot data as a function of more
59 |         than one hundred geospatially continuous predictor layers.
60 | 
61 |         This process results in a view of forest type distribution in greater detail than is
62 |         possible with the FIA plot data alone.
63 | 
64 |         These data have been processed to CarbonPlan's CONUS and Alaska study domains.
65 |       tags: [forests]
66 |       type: image/tiff; application=geotiff; profile=cloud-optimized
67 |       license: Public Domain
68 |       providers:
69 |         - name: USDA Forest Service
70 |           description: Data provided by the United States Department of Agriculture Forest Service.
71 |           url: https://www.fia.fs.fed.us/
72 |       ci: xfail
73 |     parameters:
74 |       region:
75 |         description: conus or ak
76 |         type: str
77 |         default: conus
78 |         allowed: [conus, ak]
79 |       option:
80 |         description: group/type [optional _error]
81 |         type: str
82 |         default: type
83 |         allowed: [group, type, group_error, type_error]
84 |       resolution:
85 |         description: pixel resolution in meters
86 |         type: str
87 |         default: 4000m
88 |         allowed: [250m, 4000m]
89 |     driver: rasterio
90 |     args:
91 |       urlpath: "{{env(CARBONPLAN_DATA)}}/processed/nftd/{{ region }}/{{ resolution }}/{{ option }}.tif"
92 |       chunks: { "y": 5120, "x": 5120 }
93 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/nlcd.yaml:
--------------------------------------------------------------------------------
  1 | plugins:
  2 |   source:
  3 |     - module: intake_xarray
  4 | 
  5 | sources:
  6 |   raw_raster:
  7 |     metadata:
  8 |       title: National Land Cover Database (raw)
  9 |       summary: The National Land Cover Database - 2001 to 2016.
 10 |       description: |
 11 |         The U.S. Geological Survey (USGS), in partnership with several federal agencies, has
 12 |         developed and released four National Land Cover Database (NLCD) products over the past
 13 |         two decades: NLCD 1992, 2001, 2006, and 2011. These products provide spatially explicit
 14 |         and reliable information on the Nation’s land cover and land cover change. To continue
 15 |         the legacy of NLCD and further establish a long-term monitoring capability for the
 16 |         Nation’s land resources, the USGS has designed a new generation of NLCD products named
 17 |         NLCD 2016. The NLCD 2016 design aims to provide innovative, consistent, and robust
 18 |         methodologies for production of a multi-temporal land cover and land cover change
 19 |         database from 2001 to 2016 at 2–3-year intervals. Comprehensive research was conducted
 20 |         and resulted in developed strategies for NLCD 2016: a streamlined process for assembling
 21 |         and preprocessing Landsat imagery and geospatial ancillary datasets; a multi-source
 22 |         integrated training data development and decision-tree based land cover classifications;
 23 |         a temporally, spectrally, and spatially integrated land cover change analysis strategy;
 24 |         a hierarchical theme-based post-classification and integration protocol for generating
 25 |         land cover and change products; a continuous fields biophysical parameters modeling
 26 |         method; and an automated scripted operational system for the NLCD 2016 production. The
 27 |         performance of the developed strategies and methods were tested in twenty World Reference
 28 |         System-2 path/row throughout the conterminous U.S. An overall agreement ranging from
 29 |         71% to 97% between land cover classification and reference data was achieved for all
 30 |         tested area and all years. Results from this study confirm the robustness of this
 31 |         comprehensive and highly automated procedure for NLCD 2016 operational mapping.
 32 |       tags: [forests]
 33 |       type: image/tiff; application=geotiff; profile=cloud-optimized
 34 |       license: Public Domain
 35 |       providers:
 36 |         - name: Multi-Resolution Land Characteristics (MRLC) Consortium
 37 |           description: The Multi-Resolution Land Characteristics (MRLC) consortium is a group of federal agencies who coordinate and generate consistent and relevant land cover information at the national scale for a wide variety of environmental, land management, and modeling applications.
 38 |           url: https://www.mrlc.gov/
 39 |     parameters:
 40 |       option:
 41 |         description: year (int) or change
 42 |         type: str
 43 |         default: 2016
 44 |       region:
 45 |         description: conus or ak
 46 |         type: str
 47 |         default: conus
 48 |         allowed: [conus, ak]
 49 |     driver: rasterio
 50 |     args:
 51 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/nlcd/{{ region }}/30m/{{ option }}.tif"
 52 |       chunks: { "y": 5120, "x": 5120 }
 53 | 
 54 |   raster:
 55 |     metadata:
 56 |       title: National Land Cover Database (processed)
 57 |       summary: The National Land Cover Database - 2001 to 2016.
 58 |       description: |
 59 |         The U.S. Geological Survey (USGS), in partnership with several federal agencies, has
 60 |         developed and released four National Land Cover Database (NLCD) products over the past
 61 |         two decades: NLCD 1992, 2001, 2006, and 2011. These products provide spatially explicit
 62 |         and reliable information on the Nation’s land cover and land cover change. To continue
 63 |         the legacy of NLCD and further establish a long-term monitoring capability for the
 64 |         Nation’s land resources, the USGS has designed a new generation of NLCD products named
 65 |         NLCD 2016. The NLCD 2016 design aims to provide innovative, consistent, and robust
 66 |         methodologies for production of a multi-temporal land cover and land cover change
 67 |         database from 2001 to 2016 at 2–3-year intervals. Comprehensive research was conducted
 68 |         and resulted in developed strategies for NLCD 2016: a streamlined process for assembling
 69 |         and preprocessing Landsat imagery and geospatial ancillary datasets; a multi-source
 70 |         integrated training data development and decision-tree based land cover classifications;
 71 |         a temporally, spectrally, and spatially integrated land cover change analysis strategy;
 72 |         a hierarchical theme-based post-classification and integration protocol for generating
 73 |         land cover and change products; a continuous fields biophysical parameters modeling
 74 |         method; and an automated scripted operational system for the NLCD 2016 production. The
 75 |         performance of the developed strategies and methods were tested in twenty World Reference
 76 |         System-2 path/row throughout the conterminous U.S. An overall agreement ranging from
 77 |         71% to 97% between land cover classification and reference data was achieved for all
 78 |         tested area and all years. Results from this study confirm the robustness of this
 79 |         comprehensive and highly automated procedure for NLCD 2016 operational mapping.
 80 | 
 81 |         These data have been processed to CarbonPlan's CONUS and Alaska study domains.
 82 |       tags: [forests]
 83 |       type: image/tiff; application=geotiff; profile=cloud-optimized
 84 |       license: Public Domain
 85 |       providers:
 86 |         - name: Multi-Resolution Land Characteristics (MRLC) Consortium
 87 |           description: The Multi-Resolution Land Characteristics (MRLC) consortium is a group of federal agencies who coordinate and generate consistent and relevant land cover information at the national scale for a wide variety of environmental, land management, and modeling applications.
 88 |           url: https://www.mrlc.gov/
 89 |     parameters:
 90 |       option:
 91 |         description: year (int) or change
 92 |         type: str
 93 |         default: 2016
 94 |       resolution:
 95 |         description: pixel resolution in meters
 96 |         type: str
 97 |         default: 4000m
 98 |         allowed: [250m, 4000m]
 99 |       region:
100 |         description: conus or ak
101 |         type: str
102 |         default: conus
103 |         allowed: [conus, ak]
104 |     driver: rasterio
105 |     args:
106 |       urlpath: "{{env(CARBONPLAN_DATA)}}/processed/nlcd/{{ region }}/{{ resolution }}/{{ option }}.tif"
107 |       chunks: { "y": 5120, "x": 5120 }
108 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/projects.yaml:
--------------------------------------------------------------------------------
 1 | sources:
 2 |   reports:
 3 |     metadata:
 4 |       title: CarbonPlan Project's Database
 5 |       summary: Public database of carbon removal project proposals evaluated by CarbonPlan.
 6 |       description: |
 7 |         This is a public database of reports on carbon removal project proposals. These reports
 8 |         reflect our independent analysis of public information.
 9 |       tags: [carbon]
10 |       type: text/csv
11 |       license: Creative Commons Attribution 4.0 International
12 |       providers:
13 |         - name: CarbonPlan
14 |           description: |
15 |             CarbonPlan is a registered non-profit public benefit corporation working on
16 |             the science and data of carbon removal.
17 |           url: https://carbonplan.org
18 |     driver: csv
19 |     args:
20 |       urlpath: "https://api.carbonplan.org/projects.csv"
21 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/spawnetal2020.yaml:
--------------------------------------------------------------------------------
 1 | plugins:
 2 |   source:
 3 |     - module: intake_xarray
 4 | 
 5 | sources:
 6 |   raw_raster:
 7 |     metadata:
 8 |       title: Global Biomass (Spawn and Gibbs, 2020)
 9 |       summary: Global aboveground and belowground biomass carbon density maps for the year 2010
10 |       description: |
11 |         This dataset provides temporally consistent and harmonized global maps of aboveground and
12 |         belowground biomass carbon density for the year 2010 at a 300-m spatial resolution. The
13 |         aboveground biomass map integrates land-cover specific, remotely sensed maps of woody,
14 |         grassland, cropland, and tundra biomass. Input maps were amassed from the published
15 |         literature and, where necessary, updated to cover the focal extent or time period. The
16 |         belowground biomass map similarly integrates matching maps derived from each aboveground
17 |         biomass map and land-cover specific empirical models. Aboveground and belowground maps were
18 |         then integrated separately using ancillary maps of percent tree cover and landcover and a
19 |         rule-based decision tree. Maps reporting the accumulated uncertainty of pixel-level
20 |         estimates are also provided.
21 |       tags: [biomass, forests]
22 |       type: image/tiff; application=geotiff; profile=cloud-optimized
23 |       license: Public domain
24 |       providers:
25 |         - name: Oak Ridge National Laboratory
26 |           description: |
27 |             The Oak Ridge National Laboratory Distributed Active Archive Center (ORNL DAAC) for
28 |             Biogeochemical Dynamics is a NASA Earth Observing System Data and Information System
29 |             (EOSDIS) data center managed by the Earth Science Data and Information System (ESDIS)
30 |             Project.
31 |           url: https://doi.org/10.3334/ORNLDAAC/1763
32 |       ci: xfail
33 |     parameters:
34 |       variable:
35 |         description: aboveground, aboveground_uncertainty, belowground, or belowground_uncertainty
36 |         type: str
37 |         default: aboveground
38 |         allowed:
39 |           [
40 |             aboveground,
41 |             aboveground_uncertainty,
42 |             belowground,
43 |             belowground_uncertainty,
44 |           ]
45 |     driver: rasterio
46 |     args:
47 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/2010-harmonized-biomass/global/300m/{{ variable }}.tif"
48 |       chunks: { "y": 5120, "x": 5120 }
49 | 


--------------------------------------------------------------------------------
/carbonplan_data/catalogs/terraclimate.yaml:
--------------------------------------------------------------------------------
 1 | plugins:
 2 |   source:
 3 |     - module: intake_xarray
 4 | 
 5 | sources:
 6 |   raw_raster:
 7 |     metadata:
 8 |       title: TerraClimate (raw)
 9 |       summary: Global climate and climaticwater balance data from 1958-2019.
10 |       description: |
11 |         TerraClimate is a dataset of monthly climate and climatic water balance for global
12 |         terrestrial surfaces from 1958-2019. These data provide important inputs for ecological
13 |         and hydrological studies at global scales that require high spatial resolution and
14 |         time-varying data. All data have monthly temporal resolution and a ~4-km (1/24th degree)
15 |         spatial resolution. The data cover the period from 1958-2019.
16 |       tags: [climate]
17 |       type: application/netcdf
18 |       license: Creative Commons Public Domain (CC0)
19 |       providers:
20 |         - name: Climatology Lab, University of California, Merced
21 |           description: Data provided by Dr. John Abatzoglou's Climatology Lab at the University of California, Merced.
22 |           url: http://www.climatologylab.org
23 |     driver: zarr
24 |     args:
25 |       urlpath: "{{env(CARBONPLAN_DATA)}}/raw/terraclimate/4000m/raster.zarr"
26 |       consolidated: True
27 | 
28 |   raster:
29 |     metadata:
30 |       title: TerraClimate (processed)
31 |       summary: Climate and climaticwater balance data from 1958-2019.
32 |       description: |
33 |         TerraClimate is a dataset of monthly climate and climatic water balance for global
34 |         terrestrial surfaces from 1958-2019. All data have monthly temporal resolution and a
35 |         ~4-km (1/24th degree) spatial resolution. The data cover the period from 1958-2019.
36 | 
37 |         These data have been processed to CarbonPlan's CONUS and Alaska study domains.
38 |       tags: [climate]
39 |       type: application/zarr
40 |       license: Creative Commons Public Domain (CC0)
41 |       providers:
42 |         - name: Climatology Lab, University of California, Merced
43 |           description: Data provided by Dr. John Abatzoglou's Climatology Lab at the University of California, Merced.
44 |           url: http://www.climatologylab.org
45 |       ci: skip
46 |     parameters:
47 |       region:
48 |         description: conus or ak
49 |         type: str
50 |         default: conus
51 |         allowed: [conus, ak]
52 |       resolution:
53 |         description: Pixel resolution in meters
54 |         type: str
55 |         default: "4000m"
56 |         allowed: ["4000m"]
57 |     driver: zarr
58 |     args:
59 |       urlpath: "{{env(CARBONPLAN_DATA)}}/processed/terraclimate/{{ region }}/{{ resolution }}/raster.zarr"
60 |       consolidated: True
61 | 


--------------------------------------------------------------------------------
/carbonplan_data/metadata.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import getpass
 4 | import os
 5 | import socket
 6 | import sys
 7 | import time
 8 | 
 9 | from . import __version__
10 | 
11 | 
12 | def get_cf_global_attrs(**attrs):
13 |     if "history" not in attrs:
14 |         attrs["history"] = f"Created: {time.ctime(time.time())}"
15 | 
16 |     if "insitution" not in attrs:
17 |         attrs["institution"] = "CarbonPlan"
18 | 
19 |     if "source" not in attrs:
20 |         attrs["source"] = sys.argv[0]
21 | 
22 |     if "hostname" not in attrs:
23 |         attrs["hostname"] = socket.gethostname()
24 | 
25 |     if "username" not in attrs:
26 |         attrs["username"] = os.getenv("JUPYTERHUB_USER", getpass.getuser())
27 | 
28 |     if "version" not in attrs:
29 |         attrs["version"] = __version__
30 | 
31 |     return attrs
32 | 


--------------------------------------------------------------------------------
/carbonplan_data/tests/__init__.py:
--------------------------------------------------------------------------------
  1 | # Based on scikit-learn/sklearn/utils/estimator_checks.py
  2 | import itertools
  3 | from functools import partial
  4 | 
  5 | 
  6 | def get_entry_params(entry):
  7 |     user_parameters = entry.describe()["user_parameters"]
  8 |     if not user_parameters:
  9 |         return []
 10 | 
 11 |     keys = [p["name"] for p in user_parameters]
 12 |     try:
 13 |         values = [p["allowed"] for p in user_parameters]
 14 |     except KeyError:
 15 |         return []
 16 |     params = [None]
 17 |     params.extend([dict(zip(keys, p)) for p in itertools.product(*values)])
 18 |     return params
 19 | 
 20 | 
 21 | def _set_check_ids(obj):
 22 |     """Create pytest ids for checks.
 23 |     When `obj` is an intake entry, this returns the pprint version of the
 24 |     intake entry. When `obj` is a function, the name of the function is
 25 |     returned with its keyworld arguments.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     obj : intake entry or function
 30 |         Items generated by `check_entry`
 31 | 
 32 |     Returns
 33 |     -------
 34 |     id : string or None
 35 | 
 36 |     See also
 37 |     --------
 38 |     check_entry
 39 |     """
 40 |     if hasattr(obj, "container"):
 41 |         c = getattr(obj, "_catalog", None)
 42 |         if c:
 43 |             name = f"{c.name}.{obj.name}"
 44 |         else:
 45 |             name = f"{obj.name}"
 46 |         return name
 47 |     if callable(obj):
 48 |         if not isinstance(obj, partial):
 49 |             return obj.__name__
 50 | 
 51 |         if not obj.keywords:
 52 |             return obj.func.__name__
 53 | 
 54 |         kwstring = ",".join([f"{k}={v}" for k, v in obj.keywords.items()])
 55 |         return f"{obj.func.__name__}({kwstring})"
 56 | 
 57 | 
 58 | def parametrize_with_checks(catalog):
 59 |     """Pytest specific decorator for parametrizing catalog checks.
 60 |     The `id` of each check is set to be a pprint version of the catalog
 61 |     and the name of the check with its keyword arguments.
 62 |     This allows to use `pytest -k` to specify which tests to run::
 63 |         pytest test_check_catalogs.py -k check_catalog_metadata
 64 | 
 65 |     Parameters
 66 |     ----------
 67 |     catalog : Intake Catalog
 68 |         Catalog to generated checks for.
 69 | 
 70 |     Returns
 71 |     -------
 72 |     decorator : `pytest.mark.parametrize`
 73 | 
 74 |     Examples
 75 |     --------
 76 |     >>> from carbonplan.data.tests import parametrize_with_checks
 77 |     >>> from carbonplan.data import cat
 78 |     >>> @parametrize_with_checks(cat)
 79 |     ... def test_catalog(entry, check):
 80 |     ...     check(entry)
 81 |     ...
 82 | 
 83 |     """
 84 |     import pytest
 85 | 
 86 |     checks_generator = itertools.chain.from_iterable(
 87 |         check_entry(name, entry) for name, entry in dict(catalog.walk(depth=10)).items()
 88 |     )
 89 | 
 90 |     checks_with_marks = list(
 91 |         _mark_xfail_checks(estimator, check, pytest) for estimator, check in checks_generator
 92 |     )
 93 | 
 94 |     return pytest.mark.parametrize("entry, check", checks_with_marks, ids=_set_check_ids)
 95 | 
 96 | 
 97 | def _mark_xfail_checks(entry, check, pytest):
 98 |     # TODO
 99 |     return entry, check
100 | 
101 | 
102 | def _yield_all_checks(name, entry):
103 |     yield check_entry_metadata
104 | 
105 |     for params in get_entry_params(entry):
106 |         yield partial(check_get_entry_data, params=params)
107 | 
108 | 
109 | def check_entry(name, entry):
110 |     yield from ((entry, partial(check, name)) for check in _yield_all_checks(name, entry))
111 | 
112 | 
113 | def check_get_entry_data(name, entry, params=None):
114 |     import pytest
115 | 
116 |     if params is not None:
117 |         entry = entry(**params)
118 |     else:
119 |         entry = entry()
120 | 
121 |     if entry.container == "catalog":
122 |         entry.reload()
123 |     elif entry.container in ["xarray", "dataframe"]:
124 |         if entry.metadata.get("ci", None) == "skip":
125 |             pytest.skip("dataset marked as ci: skip")  # TODO: move to _mark_xfail_checks
126 |         elif entry.metadata.get("ci", None) == "xfail":
127 |             pytest.xfail("dataset marked as ci: xfail")  # TODO: move to _mark_xfail_checks
128 |         try:
129 |             _ = entry.to_dask()
130 |         except NotImplementedError:
131 |             _ = entry.read()
132 | 
133 | 
134 | def check_entry_metadata(name, entry):
135 |     import pytest
136 | 
137 |     expected_keys = ["title", "summary", "description", "tags", "license", "providers"]
138 |     if entry.container == "catalog":
139 |         pytest.skip(
140 |             "not checking metadata in top level catalog objects."
141 |         )  # TODO: move to _mark_xfail_checks
142 |     for key in expected_keys:
143 |         assert key in entry().metadata
144 | 


--------------------------------------------------------------------------------
/carbonplan_data/tests/test_catalogs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from carbonplan_data import MASTER_CATALOG_PATH, cat
 4 | 
 5 | from . import parametrize_with_checks
 6 | 
 7 | 
 8 | def test_yaml_catalogs_in_distribution():
 9 |     assert os.path.exists(MASTER_CATALOG_PATH)
10 | 
11 | 
12 | @parametrize_with_checks(cat)
13 | def test_catalog_entries(entry, check):
14 |     check(entry)
15 | 


--------------------------------------------------------------------------------
/carbonplan_data/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import xarray as xr
 3 | from zarr.storage import MemoryStore
 4 | 
 5 | from carbonplan_data.utils import get_versions, set_zarr_encoding
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def temperature():
10 |     ds = xr.tutorial.open_dataset("air_temperature")
11 |     return ds
12 | 
13 | 
14 | def test_set_zarr_encoding(temperature):
15 |     store = MemoryStore()
16 |     temperature["air"].encoding["foo"] = "bar"
17 |     ds = set_zarr_encoding(temperature)
18 |     ds.to_zarr(store)
19 |     assert "foo" not in ds.air.encoding
20 |     assert ds.air.encoding.get("compressor", None)
21 |     assert ds.air.encoding.get("_FillValue", None)
22 | 
23 |     ds = set_zarr_encoding(temperature, float_dtype="float16")
24 |     ds.to_zarr(store, mode="w")
25 |     assert "f2" in ds.air.dtype.str
26 | 
27 | 
28 | def test_get_versions():
29 |     versions = get_versions()
30 |     assert versions["carbonplan_data"]
31 | 


--------------------------------------------------------------------------------
/carbonplan_data/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import importlib
  4 | import os
  5 | import pathlib
  6 | import zipfile
  7 | 
  8 | import numpy as np
  9 | import validators
 10 | import wget
 11 | import xarray as xr
 12 | import yaml
 13 | from numpy.typing import DTypeLike
 14 | 
 15 | root = pathlib.Path(__file__).parents[2]
 16 | 
 17 | # from netCDF4 and netCDF4-python
 18 | default_fillvals = {
 19 |     "S1": "\x00",
 20 |     "i1": -127,
 21 |     "u1": 255,
 22 |     "i2": -32767,
 23 |     "u2": 65535,
 24 |     "i4": -2147483647,
 25 |     "u4": 4294967295,
 26 |     "i8": -9223372036854775806,
 27 |     "u8": 18446744073709551614,
 28 |     "f4": 9.969209968386869e36,
 29 |     "f8": 9.969209968386869e36,
 30 | }
 31 | 
 32 | 
 33 | def projections(name, region):
 34 |     if name == "albers":
 35 |         if region == "conus":
 36 |             crs = albers_conus_crs()
 37 |             extent = albers_conus_extent()
 38 |         elif region == "ak":
 39 |             crs = albers_ak_crs()
 40 |             extent = albers_ak_extent()
 41 |         else:
 42 |             raise ValueError(f'region "{region}" not found')
 43 |     else:
 44 |         raise ValueError(f'projection "{name}" name not found')
 45 |     return crs, extent
 46 | 
 47 | 
 48 | def albers_conus_extent():
 49 |     return "-2493045.0 177285.0 2342655.0 3310005.0"
 50 | 
 51 | 
 52 | def albers_conus_crs():
 53 |     return (
 54 |         'PROJCS["Albers_Conical_Equal_Area",'
 55 |         'GEOGCS["WGS 84",DATUM["WGS_1984",'
 56 |         'SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],'
 57 |         "TOWGS84[0,0,0,-0,-0,-0,0],"
 58 |         'AUTHORITY["EPSG","6326"]],'
 59 |         'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],'
 60 |         'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],'
 61 |         'AUTHORITY["EPSG","4326"]],'
 62 |         'PROJECTION["Albers_Conic_Equal_Area"],'
 63 |         'PARAMETER["standard_parallel_1",29.5],'
 64 |         'PARAMETER["standard_parallel_2",45.5],'
 65 |         'PARAMETER["latitude_of_center",23],'
 66 |         'PARAMETER["longitude_of_center",-96],'
 67 |         'PARAMETER["false_easting",0],'
 68 |         'PARAMETER["false_northing",0],'
 69 |         'UNIT["meters",1]]'
 70 |     )
 71 | 
 72 | 
 73 | def albers_conus_transform(res=4000):
 74 |     return [res, 0.0, -2493045.0, 0.0, -res, 3310005.0]
 75 | 
 76 | 
 77 | def albers_ak_extent():
 78 |     return "-2232345.0 344805.0 1494735.0 2380125.0"
 79 | 
 80 | 
 81 | def albers_ak_crs():
 82 |     return (
 83 |         'PROJCS["WGS_1984_Albers",'
 84 |         'GEOGCS["WGS 84",DATUM["WGS_1984",'
 85 |         'SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],'
 86 |         'AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],'
 87 |         'UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4326"]],'
 88 |         'PROJECTION["Albers_Conic_Equal_Area"],'
 89 |         'PARAMETER["standard_parallel_1",55],'
 90 |         'PARAMETER["standard_parallel_2",65],'
 91 |         'PARAMETER["latitude_of_center",50],'
 92 |         'PARAMETER["longitude_of_center",-154],'
 93 |         'PARAMETER["false_easting",0],'
 94 |         'PARAMETER["false_northing",0],'
 95 |         'UNIT["metre",1,AUTHORITY["EPSG","9001"]]]'
 96 |     )
 97 | 
 98 | 
 99 | def albers_ak_transform(res=4000):
100 |     return [res, 0.0, -2232345.0, 0.0, -res, 2380125.0]
101 | 
102 | 
103 | def setup(name):
104 |     if name == "jeremy":
105 |         creds = "/Users/freeman/.config/gcloud/legacy_credentials/jeremy@carbonplan.org/adc.json"
106 |         workdir = pathlib.Path("/Users/freeman/workdir/carbonplan-data/")
107 |     if name == "joe":
108 |         creds = "/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json"
109 |         workdir = pathlib.Path("/Users/jhamman/workdir/carbonplan_data_downloads/")
110 |     os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds
111 | 
112 |     from google.cloud import storage
113 | 
114 |     storage.blob._DEFAULT_CHUNKSIZE = 5 * 1024 * 1024  # 5 MB
115 |     storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024  # 5 MB
116 | 
117 |     def upload(src, target, bucket="carbonplan-data"):
118 |         storage_client = storage.Client("carbonplan")
119 |         bucket = storage_client.bucket(bucket)
120 |         blob = bucket.blob(target)
121 |         blob.upload_from_filename(src)
122 | 
123 |     return workdir, upload
124 | 
125 | 
126 | def get_sources():
127 |     with open(root / "sources.yaml") as f:
128 |         sources = yaml.load(f, Loader=yaml.FullLoader)
129 | 
130 |     return sources
131 | 
132 | 
133 | def get_workdir(workdir):
134 |     # fallback to cwd
135 |     if workdir is None:
136 |         workdir = os.getcwd()
137 | 
138 |     # cast to pathlib obj
139 |     if isinstance(workdir, str):
140 |         workdir = pathlib.Path(workdir)
141 | 
142 |     # make sure workdir exists
143 |     workdir.mkdir(parents=True, exist_ok=True)
144 | 
145 |     return workdir
146 | 
147 | 
148 | def process_sources(name, workdir=None):
149 |     sources = get_sources()
150 |     workdir = get_workdir(workdir)
151 | 
152 |     results = {"download": [], "unzip": []}
153 | 
154 |     for key, dset in sources[name]["data"].items():
155 |         # download
156 |         if "download" in dset["actions"]:
157 |             for url in dset["urlpath"]:
158 |                 if not validators.url(url):
159 |                     raise ValueError(f'url "{url}" not valid')
160 |                 out = workdir / url.name
161 |                 if not out.exists():
162 |                     print(f"downloading {url}")
163 |                     wget.download(str(url), out=str(out))
164 | 
165 |                 results["download"].append(out)
166 | 
167 |                 # unzip
168 |                 if "unzip" in dset["actions"]:
169 |                     outdir = workdir / out.stem
170 |                     if not outdir.exists():
171 |                         outdir.mkdir(parents=True)
172 |                         with zipfile.ZipFile(out, "r") as f:
173 |                             print(f"extracting contents of {out}")
174 |                             f.extractall(outdir)
175 | 
176 |                     results["unzip"].append(outdir.glob("**/*"))
177 | 
178 |     return results
179 | 
180 | 
181 | def set_zarr_encoding(
182 |     ds: xr.Dataset,
183 |     codec_config: dict | None = None,
184 |     float_dtype: DTypeLike | None = None,
185 |     int_dtype: DTypeLike | None = None,
186 | ) -> xr.Dataset:
187 |     """Set zarr encoding for each variable in the dataset
188 | 
189 |     Parameters
190 |     ----------
191 |     ds : xr.Dataset
192 |         Input dataset
193 |     codec_config : dict, optional
194 |         Dictionary of parameters to pass to numcodecs.get_codec, default is {'id': 'zlib', 'level': 1}
195 |     float_dtype : str or dtype, optional
196 |         Dtype to cast floating point variables to
197 | 
198 |     Returns
199 |     -------
200 |     ds : xr.Dataset
201 |         Output dataset with updated variable encodings
202 |     """
203 |     import numcodecs
204 | 
205 |     ds = ds.copy()
206 | 
207 |     if codec_config is None:
208 |         codec_config = {"id": "zlib", "level": 1}
209 |     compressor = numcodecs.get_codec(codec_config)
210 | 
211 |     for k, da in ds.variables.items():
212 |         # maybe cast float type
213 |         if np.issubdtype(da.dtype, np.floating) and float_dtype is not None:
214 |             da = da.astype(float_dtype)
215 | 
216 |         if np.issubdtype(da.dtype, np.integer) and int_dtype is not None:
217 |             da = da.astype(int_dtype)
218 | 
219 |         # remove old encoding
220 |         da.encoding.clear()
221 | 
222 |         # update with new encoding
223 |         da.encoding["compressor"] = compressor
224 |         try:
225 |             del da.atrrs["_FillValue"]
226 |         except AttributeError:
227 |             pass
228 |         da.encoding["_FillValue"] = default_fillvals.get(
229 |             da.dtype.str[-2:], None
230 |         )  # TODO: handle date/time types
231 | 
232 |         ds[k] = da
233 | 
234 |     return ds
235 | 
236 | 
237 | def get_versions(
238 |     packages=[
239 |         "carbonplan",
240 |         "carbonplan_data",
241 |         "xarray",
242 |         "dask",
243 |         "numpy",
244 |         "scipy",
245 |         "fsspec",
246 |         "intake",
247 |         "rasterio",
248 |         "zarr",
249 |     ],
250 | ) -> dict[str, str]:
251 |     """Helper to fetch commonly used package versions
252 |     Parameters
253 |     ----------
254 |     packages : list
255 |         List of packages to fetch versions for
256 |     Returns
257 |     -------
258 |     versions : dict
259 |         Version dictionary with keys of package names and values of version strings
260 |     """
261 |     versions = {"docker_image ": os.getenv("REPO_HASH", None)}
262 | 
263 |     for p in packages:
264 |         try:
265 |             mod = importlib.import_module(p)
266 |             versions[p] = getattr(mod, "__version__", None)
267 |         except ModuleNotFoundError:
268 |             versions[p] = None
269 | 
270 |     return versions
271 | 
272 | 
273 | def zarr_is_complete(store, check=".zmetadata"):
274 |     """Return true if Zarr store is complete"""
275 |     return check in store
276 | 


--------------------------------------------------------------------------------
/ci/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: carbonplan-data
 2 | channels:
 3 |   - conda-forge
 4 |   - nodefaults
 5 | dependencies:
 6 |   - dask
 7 |   - fastparquet
 8 |   - gcsfs
 9 |   - gdal
10 |   - google-cloud-storage
11 |   - intake<=0.7.0
12 |   - intake-geopandas
13 |   - intake-parquet
14 |   - intake-xarray
15 |   - libgdal
16 |   - netcdf4
17 |   - pandas
18 |   - pip
19 |   - pooch
20 |   - pydap
21 |   - pytest
22 |   - pytest-cov
23 |   - pre-commit
24 |   - rasterio
25 |   - xarray
26 |   - zarr
27 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | codecov:
 2 |   require_ci_to_pass: no
 3 |   max_report_age: off
 4 | 
 5 | comment: false
 6 | 
 7 | ignore:
 8 |   - "tests/*.py"
 9 |   - "setup.py"
10 | 
11 | coverage:
12 |   precision: 2
13 |   round: down
14 |   status:
15 |     project:
16 |       default:
17 |         target: 95
18 |         informational: true
19 |     patch: off
20 |     changes: off
21 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools>=64", "setuptools-scm[toml]>=6.2", "wheel"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "carbonplan-data"
  7 | description = "Preprocessing utilities for CarbonPlan's data catalog"
  8 | readme = "README.md"
  9 | license = { text = "MIT" }
 10 | authors = [{ name = "CarbonPlan", email = "tech@carbonplan.org" }]
 11 | requires-python = ">=3.9"
 12 | classifiers = [
 13 |     "Development Status :: 3 - Alpha",
 14 |     "License :: OSI Approved :: MIT License",
 15 |     "Operating System :: OS Independent",
 16 |     "Intended Audience :: Science/Research",
 17 |     "Programming Language :: Python",
 18 |     "Programming Language :: Python :: 3",
 19 |     "Programming Language :: Python :: 3.9",
 20 |     "Programming Language :: Python :: 3.10",
 21 |     "Programming Language :: Python :: 3.11",
 22 |     "Programming Language :: Python :: 3.12",
 23 |     "Topic :: Scientific/Engineering",
 24 | ]
 25 | dynamic = ["version"]
 26 | 
 27 | dependencies = ["intake<=0.7.0", "validators", "wget", "numpy", "xarray"]
 28 | 
 29 | [project.urls]
 30 | repository = "https://github.com/carbonplan/data"
 31 | 
 32 | [tool.setuptools.packages.find]
 33 | include = ["carbonplan_data*"]
 34 | 
 35 | [tool.setuptools_scm]
 36 | local_scheme = "node-and-date"
 37 | fallback_version = "999"
 38 | 
 39 | 
 40 | [tool.black]
 41 | line-length = 100
 42 | target-version = ['py39']
 43 | skip-string-normalization = true
 44 | 
 45 | 
 46 | [tool.ruff]
 47 | line-length = 100
 48 | target-version = "py39"
 49 | builtins = ["ellipsis"]
 50 | extend-include = ["*.ipynb"]
 51 | # Exclude a variety of commonly ignored directories.
 52 | exclude = [
 53 |     ".bzr",
 54 |     ".direnv",
 55 |     ".eggs",
 56 |     ".git",
 57 |     ".hg",
 58 |     ".mypy_cache",
 59 |     ".nox",
 60 |     ".pants.d",
 61 |     ".ruff_cache",
 62 |     ".svn",
 63 |     ".tox",
 64 |     ".venv",
 65 |     "__pypackages__",
 66 |     "_build",
 67 |     "buck-out",
 68 |     "build",
 69 |     "dist",
 70 |     "node_modules",
 71 |     "venv",
 72 | ]
 73 | [tool.ruff.lint]
 74 | per-file-ignores = {}
 75 | ignore = [
 76 |     "E721", # Comparing types instead of isinstance
 77 |     "E741", # Ambiguous variable names
 78 |     "E501", # Conflicts with ruff format
 79 |     "E722", # Bare except
 80 | ]
 81 | select = [
 82 |     # Pyflakes
 83 |     "F",
 84 |     # Pycodestyle
 85 |     "E",
 86 |     "W",
 87 |     # isort
 88 |     "I",
 89 |     # Pyupgrade
 90 |     "UP",
 91 | ]
 92 | 
 93 | 
 94 | [tool.ruff.lint.mccabe]
 95 | max-complexity = 18
 96 | 
 97 | [tool.ruff.lint.isort]
 98 | known-first-party = ["carbonplan_data"]
 99 | 
100 | [tool.pytest.ini_options]
101 | console_output_style = "count"
102 | addopts = "--cov=./ --cov-report=xml --verbose"
103 | 


--------------------------------------------------------------------------------
/scripts/fia/00_download.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# Download FIA Database\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), June 29, 2020_\n",
 12 |     "\n",
 13 |     "This notebook downloads local copies of the FIA database for processing.\n",
 14 |     "\n",
 15 |     "**Inputs:**\n",
 16 |     "\n",
 17 |     "- sources.yaml\n",
 18 |     "\n",
 19 |     "**Outputs:**\n",
 20 |     "\n",
 21 |     "- Local copies of the FIA database\n",
 22 |     "\n",
 23 |     "**Notes:**\n",
 24 |     "\n",
 25 |     "- No reprojection or processing of the data is done in this notebook.\n"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": []
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "import pathlib\n",
 42 |     "import zipfile\n",
 43 |     "\n",
 44 |     "import urlpath\n",
 45 |     "import wget\n",
 46 |     "import yaml\n",
 47 |     "\n",
 48 |     "workdir = pathlib.Path(\"/Users/jhamman/workdir/carbonplan_data_downloads/fia/\")\n",
 49 |     "workdir.mkdir(parents=True, exist_ok=True)\n",
 50 |     "workdir"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "with open(\"../../sources.yaml\") as f:\n",
 60 |     "    sources = yaml.load(f, Loader=yaml.FullLoader)[\"fia\"]"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "sources"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "for key, dset in sources[\"data\"].items():\n",
 79 |     "    if \"download\" in dset[\"actions\"]:\n",
 80 |     "        for url in dset[\"urlpath\"]:\n",
 81 |     "            url = urlpath.URL(url)\n",
 82 |     "            out = workdir / url.name\n",
 83 |     "            if not out.exists():\n",
 84 |     "                print(f\"downloading {url}\")\n",
 85 |     "                wget.download(str(url), out=str(out))\n",
 86 |     "\n",
 87 |     "            if \"unzip\" in dset[\"actions\"]:\n",
 88 |     "                outdir = workdir / out.stem\n",
 89 |     "                if not outdir.exists():\n",
 90 |     "                    outdir.mkdir(parents=True)\n",
 91 |     "                    with zipfile.ZipFile(out, \"r\") as f:\n",
 92 |     "                        print(f\"extracting contents of {out}\")\n",
 93 |     "                        f.extractall(outdir)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "print(\"done\")"
103 |    ]
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "kernelspec": {
108 |    "display_name": "Python 3",
109 |    "language": "python",
110 |    "name": "python3"
111 |   },
112 |   "language_info": {
113 |    "codemirror_mode": {
114 |     "name": "ipython",
115 |     "version": 3
116 |    },
117 |    "file_extension": ".py",
118 |    "mimetype": "text/x-python",
119 |    "name": "python",
120 |    "nbconvert_exporter": "python",
121 |    "pygments_lexer": "ipython3",
122 |    "version": "3.7.8"
123 |   }
124 |  },
125 |  "nbformat": 4,
126 |  "nbformat_minor": 4
127 | }
128 | 


--------------------------------------------------------------------------------
/scripts/fia/01_raw_to_parquet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# FIA to Parquet\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), June 30, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts FIA csv files to Parquet format and stages them in a\n",
 14 |     "Google Cloud Storage bucket.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- `ENTIRE` directory\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- One Parquet dataset per CSV: `gs://carbonplan-data/raw/fia/<name>.parquet`\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- No reprojection or processing of the data is done in this notebook.\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import gcsfs\n",
 36 |     "import pandas as pd\n",
 37 |     "\n",
 38 |     "from carbonplan_data.utils import setup\n",
 39 |     "\n",
 40 |     "# run `gcloud auth login` on the command line, or try switching token to `browser`\n",
 41 |     "fs = gcsfs.GCSFileSystem(\n",
 42 |     "    project=\"carbonplan\",\n",
 43 |     "    token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n",
 44 |     ")"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "workdir, upload = setup(\"joe\")"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "csvs = (workdir / \"fia/ENTIRE\").glob(\"*csv\")"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "import numpy as np\n",
 72 |     "\n",
 73 |     "\n",
 74 |     "def force_float32(fname):\n",
 75 |     "    memmap = fname.stat().st_size > 1e8\n",
 76 |     "\n",
 77 |     "    df = pd.read_csv(fname, engine=\"c\", low_memory=False, memory_map=memmap)\n",
 78 |     "    for c in df:\n",
 79 |     "        if \"f8\" in df[c].dtype.str:\n",
 80 |     "            df[c] = df[c].astype(np.float32)\n",
 81 |     "\n",
 82 |     "    return df"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "def exists(blob):\n",
 92 |     "    try:\n",
 93 |     "        f = fs.open(blob, \"rb\")\n",
 94 |     "        f.close()\n",
 95 |     "        return True\n",
 96 |     "    except:\n",
 97 |     "        return False"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "failed = []\n",
107 |     "for fname in csvs:\n",
108 |     "    blob = f\"carbonplan-data/raw/fia/{fname.stem}.parquet\"\n",
109 |     "    print(fname.stem)\n",
110 |     "\n",
111 |     "    if \"TREE.csv\" in str(fname):\n",
112 |     "        continue\n",
113 |     "\n",
114 |     "    if exists(blob):\n",
115 |     "        continue\n",
116 |     "\n",
117 |     "    df = force_float32(fname)\n",
118 |     "\n",
119 |     "    print(blob)\n",
120 |     "\n",
121 |     "    try:\n",
122 |     "        df.to_parquet(\n",
123 |     "            blob,\n",
124 |     "            compression=\"gzip\",\n",
125 |     "            open_with=fs.open,\n",
126 |     "            row_group_offsets=1000,\n",
127 |     "            engine=\"fastparquet\",\n",
128 |     "        )\n",
129 |     "        # consider using dask dataframe here to write to chunked dataframes here.\n",
130 |     "        print(\"  --> \", blob)\n",
131 |     "    except Exception as e:\n",
132 |     "        failed.append((fname, e))"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "failed"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "# TREE.csv is a special case\n",
151 |     "\n",
152 |     "import dask.dataframe as dd\n",
153 |     "\n",
154 |     "row_group_offsets = 1000\n",
155 |     "dtype = {\n",
156 |     "    \"AGENTCD\": \"float64\",\n",
157 |     "    \"CULL\": \"float64\",\n",
158 |     "    \"P2A_GRM_FLG\": \"object\",\n",
159 |     "    \"TREECLCD\": \"float64\",\n",
160 |     "    \"TREEHISTCD\": \"float64\",\n",
161 |     "    \"MODIFIED_IN_INSTANCE\": \"float64\",\n",
162 |     "    \"GST_PNWRS\": \"object\",\n",
163 |     "    \"SPGRPCD\": \"float64\",\n",
164 |     "    \"DIAHTCD\": \"float64\",\n",
165 |     "    \"SUBCYCLE\": \"float64\",\n",
166 |     "    \"CAVITY_USE_PNWRS\": \"object\",\n",
167 |     "}\n",
168 |     "\n",
169 |     "blob = \"TREE.parquet\"\n",
170 |     "\n",
171 |     "df = dd.read_csv(\n",
172 |     "    \"/Users/jhamman/workdir/carbonplan_data_downloads/fia/ENTIRE/TREE.csv\",\n",
173 |     "    dtype=dtype,\n",
174 |     ")\n",
175 |     "\n",
176 |     "df.to_parquet(\"gs://carbonplan-data/raw/fia/TREE.parquet\")"
177 |    ]
178 |   }
179 |  ],
180 |  "metadata": {
181 |   "kernelspec": {
182 |    "display_name": "Python 3",
183 |    "language": "python",
184 |    "name": "python3"
185 |   },
186 |   "language_info": {
187 |    "codemirror_mode": {
188 |     "name": "ipython",
189 |     "version": 3
190 |    },
191 |    "file_extension": ".py",
192 |    "mimetype": "text/x-python",
193 |    "name": "python",
194 |    "nbconvert_exporter": "python",
195 |    "pygments_lexer": "ipython3",
196 |    "version": "3.8.2"
197 |   }
198 |  },
199 |  "nbformat": 4,
200 |  "nbformat_minor": 4
201 | }
202 | 


--------------------------------------------------------------------------------
/scripts/fluxnet/01_raw_to_parquet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# FLUXNET to Parquet\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), August 7, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts FLUXNET csv files to Parquet format and stages them in a\n",
 14 |     "Google Cloud Storage bucket.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- `fluxnet` directory\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- One Parquet dataset per CSV: `gs://carbonplan-data/raw/fluxnet/<name>.parquet`\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- No reprojection or processing of the data is done in this notebook.\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import pathlib\n",
 36 |     "\n",
 37 |     "import dask.dataframe as dd\n",
 38 |     "import gcsfs\n",
 39 |     "import pandas as pd\n",
 40 |     "from fsspec.implementations.zip import ZipFileSystem\n",
 41 |     "from tqdm import tqdm\n",
 42 |     "\n",
 43 |     "# run `gcloud auth login` on the command line, or try switching token to `browser`\n",
 44 |     "fs = gcsfs.GCSFileSystem(\n",
 45 |     "    project=\"carbonplan\",\n",
 46 |     "    token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n",
 47 |     ")"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "workdir = pathlib.Path(\"/Users/jhamman/workdir/carbonplan_data_downloads/\")"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "storage_options = {\"token\": fs.session.credentials, \"project\": \"carbonplan\"}"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {
 72 |     "jupyter": {
 73 |      "outputs_hidden": true
 74 |     }
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "zips = (workdir / \"fluxnet\").glob(\"*zip\")\n",
 79 |     "\n",
 80 |     "\n",
 81 |     "def make_fname(stem):\n",
 82 |     "    p = stem.lower().split(\"_\")\n",
 83 |     "    if \"AUX\" in stem:\n",
 84 |     "        name = \"_\".join([p[1], *p[3:4]])\n",
 85 |     "    else:\n",
 86 |     "        name = \"_\".join([p[1], *p[3:5]])\n",
 87 |     "    return name\n",
 88 |     "\n",
 89 |     "\n",
 90 |     "for zipfile in tqdm(zips):\n",
 91 |     "    print(zipfile)\n",
 92 |     "\n",
 93 |     "    zipfs = ZipFileSystem(zipfile, mode=\"r\")\n",
 94 |     "    csvs = zipfs.glob(\"*csv\")\n",
 95 |     "\n",
 96 |     "    for csv in csvs:\n",
 97 |     "        fname = pathlib.PosixPath(csv)\n",
 98 |     "        name = make_fname(fname.stem)\n",
 99 |     "        blob = blob = f\"gcs://carbonplan-data/raw/fluxnet/{name}.parquet\"\n",
100 |     "\n",
101 |     "        df = pd.read_csv(zipfs.open(csv, mode=\"rb\"))\n",
102 |     "        ddf = dd.from_pandas(df, chunksize=1000).repartition(partition_size=\"50MB\")\n",
103 |     "        ddf.to_parquet(blob, storage_options=storage_options)\n",
104 |     "\n",
105 |     "        print(\"--> \", blob)"
106 |    ]
107 |   }
108 |  ],
109 |  "metadata": {
110 |   "kernelspec": {
111 |    "display_name": "Python 3",
112 |    "language": "python",
113 |    "name": "python3"
114 |   },
115 |   "language_info": {
116 |    "codemirror_mode": {
117 |     "name": "ipython",
118 |     "version": 3
119 |    },
120 |    "file_extension": ".py",
121 |    "mimetype": "text/x-python",
122 |    "name": "python",
123 |    "nbconvert_exporter": "python",
124 |    "pygments_lexer": "ipython3",
125 |    "version": "3.8.2"
126 |   }
127 |  },
128 |  "nbformat": 4,
129 |  "nbformat_minor": 4
130 | }
131 | 


--------------------------------------------------------------------------------
/scripts/gcp/01_raw_to_parquet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# Global Carbon Project to Parquet\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), August 17, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts faw Excel files from the Global Carbon Project to Parquet\n",
 14 |     "format and stages them in a Google Cloud Storage bucket.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- `gcp` directory\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- One Parquet dataset per Excel sheet:\n",
 23 |     "  `gs://carbonplan-data/raw/gcp/<name>.parquet`\n",
 24 |     "\n",
 25 |     "**Notes:**\n",
 26 |     "\n",
 27 |     "- No reprojection or processing of the data is done in this notebook.\n"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import dask.dataframe as dd\n",
 37 |     "import gcsfs\n",
 38 |     "import pandas as pd"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# run `gcloud auth login` on the command line, or try switching token to `browser`\n",
 48 |     "fs = gcsfs.GCSFileSystem(\n",
 49 |     "    project=\"carbonplan\",\n",
 50 |     "    token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n",
 51 |     ")\n",
 52 |     "\n",
 53 |     "storage_options = {\"token\": fs.session.credentials, \"project\": \"carbonplan\"}\n",
 54 |     "\n",
 55 |     "\n",
 56 |     "def process(fname, target, **open_kwargs):\n",
 57 |     "    df = pd.read_excel(fname, **open_kwargs)\n",
 58 |     "    df = df.loc[:, ~df.columns.str.contains(\"^Unnamed\")]\n",
 59 |     "    df = dd.from_pandas(df, npartitions=1)\n",
 60 |     "    df.to_parquet(target, engine=\"fastparquet\", storage_options=storage_options)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "## National Carbon Emissions\n"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "fname = (\n",
 77 |     "    \"/Users/jhamman/workdir/carbonplan_data_downloads/gcp/National_Carbon_Emissions_2019v1.0.xlsx\"\n",
 78 |     ")\n",
 79 |     "\n",
 80 |     "# Territorial Emissions\n",
 81 |     "target = \"gs://carbonplan-data/raw/gcp/consumption_emissions.parquet\"\n",
 82 |     "open_kwargs = dict(sheet_name=\"Territorial Emissions\", skiprows=16, index_col=0)\n",
 83 |     "process(fname, target, **open_kwargs)\n",
 84 |     "\n",
 85 |     "# Consumption Emissions\n",
 86 |     "target = \"gs://carbonplan-data/raw/gcp/territorial_emissions.parquet\"\n",
 87 |     "open_kwargs = dict(sheet_name=\"Consumption Emissions\", skiprows=8, index_col=0)\n",
 88 |     "process(fname, target, **open_kwargs)\n",
 89 |     "\n",
 90 |     "# Emissions Transfers\n",
 91 |     "target = \"gs://carbonplan-data/raw/gcp/transfer_emissions.parquet\"\n",
 92 |     "open_kwargs = dict(sheet_name=\"Emissions Transfers\", skiprows=8, index_col=0)\n",
 93 |     "process(fname, target, **open_kwargs)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "## Global Carbon Budget\n"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "fname = \"/Users/jhamman/workdir/carbonplan_data_downloads/gcp/raw_gcb_Global_Carbon_Budget_2019v1.0.xlsx\"\n",
110 |     "\n",
111 |     "# Global Carbon Budget\n",
112 |     "target = \"gs://carbonplan-data/raw/gcp/global_carbon_budget.parquet\"\n",
113 |     "open_kwargs = dict(sheet_name=\"Global Carbon Budget\", skiprows=18, index_col=0)\n",
114 |     "process(fname, target, **open_kwargs)\n",
115 |     "\n",
116 |     "# Fossil Emissions by Fuel Type\n",
117 |     "target = \"gs://carbonplan-data/raw/gcp/fossil_emissions_by_fuel_type.parquet\"\n",
118 |     "open_kwargs = dict(sheet_name=\"Fossil Emissions by Fuel Type\", skiprows=12, index_col=0)\n",
119 |     "process(fname, target, **open_kwargs)\n",
120 |     "\n",
121 |     "# Land-Use Change Emissions\n",
122 |     "target = \"gs://carbonplan-data/raw/gcp/land_use_change_emissions.parquet\"\n",
123 |     "open_kwargs = dict(sheet_name=\"Land-Use Change Emissions\", skiprows=25, index_col=0)\n",
124 |     "process(fname, target, **open_kwargs)\n",
125 |     "\n",
126 |     "# Ocean Sink\n",
127 |     "target = \"gs://carbonplan-data/raw/gcp/ocean_sink.parquet\"\n",
128 |     "open_kwargs = dict(sheet_name=\"Ocean Sink\", skiprows=22, index_col=0)\n",
129 |     "process(fname, target, **open_kwargs)\n",
130 |     "\n",
131 |     "# Terrestrial Sink\n",
132 |     "target = \"gs://carbonplan-data/raw/gcp/terrestrial_sink.parquet\"\n",
133 |     "open_kwargs = dict(sheet_name=\"Terrestrial Sink\", skiprows=23, index_col=0)\n",
134 |     "process(fname, target, **open_kwargs)\n",
135 |     "\n",
136 |     "# Historical Budget\n",
137 |     "target = \"gs://carbonplan-data/raw/gcp/historical_budget.parquet\"\n",
138 |     "open_kwargs = dict(sheet_name=\"Historical Budget\", skiprows=14, index_col=0)\n",
139 |     "process(fname, target, **open_kwargs)"
140 |    ]
141 |   }
142 |  ],
143 |  "metadata": {
144 |   "kernelspec": {
145 |    "display_name": "Python 3",
146 |    "language": "python",
147 |    "name": "python3"
148 |   },
149 |   "language_info": {
150 |    "codemirror_mode": {
151 |     "name": "ipython",
152 |     "version": 3
153 |    },
154 |    "file_extension": ".py",
155 |    "mimetype": "text/x-python",
156 |    "name": "python",
157 |    "nbconvert_exporter": "python",
158 |    "pygments_lexer": "ipython3",
159 |    "version": "3.8.2"
160 |   }
161 |  },
162 |  "nbformat": 4,
163 |  "nbformat_minor": 4
164 | }
165 | 


--------------------------------------------------------------------------------
/scripts/global-biomass/01_biomass_to_cogs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# Convert Global Biomass data to COGs\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), June 29, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts Global Aboveground and Belowground Biomass Carbon Density\n",
 14 |     "Maps for the Year 2010 to COG format.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- local copy of biomass rasters\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- Local copies of biomass rasters in COG format\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- No reprojection or processing of the data is done in this notebook.\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import os\n",
 36 |     "\n",
 37 |     "from rio_cogeo.cogeo import cog_translate\n",
 38 |     "from rio_cogeo.profiles import cog_profiles\n",
 39 |     "\n",
 40 |     "from carbonplan_data.utils import setup\n",
 41 |     "\n",
 42 |     "# This is the COG profile:\n",
 43 |     "dst_profile = cog_profiles.get(\"deflate\")"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "workdir, upload = setup(\"joe\")"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "keys = {\n",
 62 |     "    \"aboveground_biomass_carbon_2010\": \"aboveground\",\n",
 63 |     "    \"aboveground_biomass_carbon_2010_uncertainty\": \"aboveground_uncertainty\",\n",
 64 |     "    \"belowground_biomass_carbon_2010\": \"belowground\",\n",
 65 |     "    \"belowground_biomass_carbon_2010_uncertainty\": \"belowground_uncertainty\",\n",
 66 |     "}\n",
 67 |     "\n",
 68 |     "for skey, tkey in keys.items():\n",
 69 |     "    # raw file\n",
 70 |     "    source = workdir / f\"Global_Maps_C_Density_2010_1763/data/{skey}.tif\"\n",
 71 |     "\n",
 72 |     "    # local target\n",
 73 |     "    target = \"./raster.tif\"\n",
 74 |     "\n",
 75 |     "    # This is where we'll write the COGs when we're done\n",
 76 |     "    cloud_target = f\"raw/2010-harmonized-biomass/global/300m/{tkey}.tif\"\n",
 77 |     "\n",
 78 |     "    # translate to COG\n",
 79 |     "    cog_translate(source, target, dst_profile)\n",
 80 |     "\n",
 81 |     "    # Upload to GCS\n",
 82 |     "    upload(target, cloud_target)\n",
 83 |     "\n",
 84 |     "    # Remove temporary file\n",
 85 |     "    os.remove(target)"
 86 |    ]
 87 |   }
 88 |  ],
 89 |  "metadata": {
 90 |   "kernelspec": {
 91 |    "display_name": "Python 3",
 92 |    "language": "python",
 93 |    "name": "python3"
 94 |   },
 95 |   "language_info": {
 96 |    "codemirror_mode": {
 97 |     "name": "ipython",
 98 |     "version": 3
 99 |    },
100 |    "file_extension": ".py",
101 |    "mimetype": "text/x-python",
102 |    "name": "python",
103 |    "nbconvert_exporter": "python",
104 |    "pygments_lexer": "ipython3",
105 |    "version": "3.7.8"
106 |   }
107 |  },
108 |  "nbformat": 4,
109 |  "nbformat_minor": 4
110 | }
111 | 


--------------------------------------------------------------------------------
/scripts/gridmet/01_gridmet_to_zarr.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# gridMET to Zarr\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), June 29, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts the raw gridMET dataset to Zarr format.\n",
 14 |     "\n",
 15 |     "**Inputs:**\n",
 16 |     "\n",
 17 |     "- intake catalog: `climate.gridmet_opendap`\n",
 18 |     "\n",
 19 |     "**Outputs:**\n",
 20 |     "\n",
 21 |     "- Cloud copy of gridMET\n",
 22 |     "\n",
 23 |     "**Notes:**\n",
 24 |     "\n",
 25 |     "- No reprojection or processing of the data is done in this notebook.\n"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import gcsfs\n",
 35 |     "import xarray as xr\n",
 36 |     "from numcodecs.zlib import Zlib\n",
 37 |     "\n",
 38 |     "fs = gcsfs.GCSFileSystem(\n",
 39 |     "    project=\"carbonplan\",\n",
 40 |     "    token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n",
 41 |     ")"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "years = list(range(1979, 2021))\n",
 51 |     "variables = [\n",
 52 |     "    \"pr\",\n",
 53 |     "    \"tmmn\",\n",
 54 |     "    \"tmmx\",\n",
 55 |     "    \"rmax\",\n",
 56 |     "    \"rmin\",\n",
 57 |     "    \"sph\",\n",
 58 |     "    \"srad\",\n",
 59 |     "    \"th\",\n",
 60 |     "    \"vs\",\n",
 61 |     "    \"bi\",\n",
 62 |     "    \"fm100\",\n",
 63 |     "    \"fm1000\",\n",
 64 |     "    \"erc\",\n",
 65 |     "    \"pdsi\",\n",
 66 |     "    \"etr\",\n",
 67 |     "    \"pet\",\n",
 68 |     "    \"vpd\",\n",
 69 |     "]"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "source_pattern = \"https://www.northwestknowledge.net/metdata/data/{var}_{year}.nc\""
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "jupyter": {
 86 |      "outputs_hidden": true
 87 |     }
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "ds_list = []\n",
 92 |     "for v in variables:\n",
 93 |     "    print(v)\n",
 94 |     "    ds_list.append(xr.concat([source(variable=v, year=y).to_dask() for y in years], dim=\"day\"))  # noqa"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "ds = xr.merge(ds_list, compat=\"override\")\n",
104 |     "ds[\"crs\"] = ds_list[0][\"crs\"]\n",
105 |     "ds"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "ds.nbytes / 1e9"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "mapper = fs.get_mapper(\"carbonplan-data/raw/gridmet/4km/raster.zarr\")"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "ds = ds.chunk({\"day\": 1000, \"lat\": 256, \"lon\": 256})\n",
133 |     "ds"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {},
140 |    "outputs": [],
141 |    "source": [
142 |     "encoding = {v: {\"compressor\": Zlib(4)} for v in ds.variables}\n",
143 |     "encoding"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "future = ds.to_zarr(mapper, mode=\"w\", encoding=encoding, compute=False)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "from dask.diagnostics import ProgressBar\n",
162 |     "\n",
163 |     "with ProgressBar():\n",
164 |     "    future.compute(scheduler=\"threading\")"
165 |    ]
166 |   }
167 |  ],
168 |  "metadata": {
169 |   "kernelspec": {
170 |    "display_name": "Python 3",
171 |    "language": "python",
172 |    "name": "python3"
173 |   },
174 |   "language_info": {
175 |    "codemirror_mode": {
176 |     "name": "ipython",
177 |     "version": 3
178 |    },
179 |    "file_extension": ".py",
180 |    "mimetype": "text/x-python",
181 |    "name": "python",
182 |    "nbconvert_exporter": "python",
183 |    "pygments_lexer": "ipython3",
184 |    "version": "3.8.2"
185 |   }
186 |  },
187 |  "nbformat": 4,
188 |  "nbformat_minor": 4
189 | }
190 | 


--------------------------------------------------------------------------------
/scripts/iiasa/01_raw_to_parquet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# IIASA to Parquet\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), July 1, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts IIASA CSV and DAT files to Parquet format and stages them\n",
 14 |     "in a Google Cloud Storage bucket.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- various data files downloaded from IIASA website (manual process).\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- One Parquet dataset per local data file:\n",
 23 |     "  `gs://carbonplan-data-restricted/raw/iiasa/<name>.parquet`\n",
 24 |     "\n",
 25 |     "**Notes:**\n",
 26 |     "\n",
 27 |     "- No reprojection or processing of the data is done in this notebook.\n"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import pathlib\n",
 37 |     "\n",
 38 |     "import gcsfs\n",
 39 |     "import pandas as pd\n",
 40 |     "\n",
 41 |     "# run `gcloud auth login` on the command line, or try switching token to `browser`\n",
 42 |     "fs = gcsfs.GCSFileSystem(\n",
 43 |     "    project=\"carbonplan\",\n",
 44 |     "    token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n",
 45 |     ")"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "source_dir = pathlib.Path(\"../../carbonplan_data/iiasa/SSP_CMIP6_201811.csv/\")\n",
 55 |     "blob_prefix = \"carbonplan-data-restricted/raw/iiasa/SSP_CMIP6_201811\"\n",
 56 |     "csvs = source_dir.glob(\"*csv\")\n",
 57 |     "\n",
 58 |     "for csv in csvs:\n",
 59 |     "    blob = f\"{blob_prefix}/{csv.stem.lower()}.parquet\"\n",
 60 |     "    print(blob)\n",
 61 |     "\n",
 62 |     "    df = pd.read_csv(csv)\n",
 63 |     "    df.to_parquet(blob, compression=\"gzip\", open_with=fs.open, engine=\"fastparquet\")"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "source_dir = pathlib.Path(\"../../carbonplan_data/iiasa/SSP_IAM_V2_201811.csv/\")\n",
 73 |     "blob_prefix = \"carbonplan-data-restricted/raw/iiasa/SSP_IAM_V2_201811\"\n",
 74 |     "csvs = source_dir.glob(\"*csv\")\n",
 75 |     "\n",
 76 |     "for csv in csvs:\n",
 77 |     "    blob = f\"{blob_prefix}/{csv.stem.lower()}.parquet\"\n",
 78 |     "    print(blob)\n",
 79 |     "\n",
 80 |     "    df = pd.read_csv(csv)\n",
 81 |     "    df.to_parquet(blob, compression=\"gzip\", open_with=fs.open, engine=\"fastparquet\")"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "source = \"../../carbonplan_data/iiasa/SspDb_compare_regions_2013-06-12.csv\"\n",
 91 |     "blob = \"carbonplan-data-restricted/raw/iiasa/SspDb_compare_regions_2013-06-12.parquet\"\n",
 92 |     "df = pd.read_csv(source)\n",
 93 |     "df.to_parquet(blob, compression=\"gzip\", open_with=fs.open, engine=\"fastparquet\")"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "source = \"../../carbonplan_data/iiasa/SspDb_country_data_2013-06-12.csv\"\n",
103 |     "blob = \"carbonplan-data-restricted/raw/iiasa/SspDb_country_data_2013-06-12.parquet\"\n",
104 |     "df = pd.read_csv(source)\n",
105 |     "df.to_parquet(blob, compression=\"gzip\", open_with=fs.open, engine=\"fastparquet\")"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "# TODO:\n",
113 |     "\n",
114 |     "- write parser for RCP DAT files."
115 |    ]
116 |   }
117 |  ],
118 |  "metadata": {
119 |   "kernelspec": {
120 |    "display_name": "Python 3",
121 |    "language": "python",
122 |    "name": "python3"
123 |   },
124 |   "language_info": {
125 |    "codemirror_mode": {
126 |     "name": "ipython",
127 |     "version": 3
128 |    },
129 |    "file_extension": ".py",
130 |    "mimetype": "text/x-python",
131 |    "name": "python",
132 |    "nbconvert_exporter": "python",
133 |    "pygments_lexer": "ipython3",
134 |    "version": "3.8.2"
135 |   }
136 |  },
137 |  "nbformat": 4,
138 |  "nbformat_minor": 4
139 | }
140 | 


--------------------------------------------------------------------------------
/scripts/mtbs/01_raw_to_cogs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# MTBS to Cloud Optimized GeoTIFF\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), June 5, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts MTBS 30m yearly rasters to Cloud Optimized GeoTIFF and\n",
 14 |     "stages them in a Google Cloud Storage bucket.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- `DATA.zip` from MTBS website\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- One COG per year: `gs://carbonplan-data/raw/MTBS/30m/<YEAR>/raster.tif`\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- No reprojection or processing of the data is done in this notebook.\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import io\n",
 36 |     "import os.path\n",
 37 |     "\n",
 38 |     "import gcsfs\n",
 39 |     "from fsspec.implementations import zip\n",
 40 |     "from rasterio.io import MemoryFile\n",
 41 |     "from rio_cogeo.cogeo import cog_translate\n",
 42 |     "from rio_cogeo.profiles import cog_profiles\n",
 43 |     "\n",
 44 |     "# run `gcloud auth login` on the command line, or try switching token to `browser`\n",
 45 |     "fs = gcsfs.GCSFileSystem(\n",
 46 |     "    project=\"carbonplan\",\n",
 47 |     "    token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n",
 48 |     ")"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "The input for this script is a zip file called `DATA.zip`. This was downloaded\n",
 56 |     "from: https://www.mtbs.gov/direct-download Specifically, it came from:\n",
 57 |     "\n",
 58 |     "```\n",
 59 |     "  - [select] Burn Severity Mosaics\n",
 60 |     "    -> [select] Continental U.S.\n",
 61 |     "      -> [click] all years\n",
 62 |     "        -> [click] Download 34 Files\n",
 63 |     "```\n",
 64 |     "\n",
 65 |     "This file does not need to be un-zipped for the rest of the script to run.\n"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "# raw zip file\n",
 75 |     "raw_zips = \"~/Downloads/DATA.zip\"\n",
 76 |     "\n",
 77 |     "# This is where we'll write the COGs when we're done\n",
 78 |     "bucket = \"carbonplan-data/raw/MTBS/30m/\"\n",
 79 |     "\n",
 80 |     "# This is the COG profile:\n",
 81 |     "dst_profile = cog_profiles.get(\"deflate\")"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "def translate(fo, out_file):\n",
 91 |     "    \"\"\"translate a file object (`fo`) to cloud optimized geotiff\n",
 92 |     "\n",
 93 |     "    the resulting COG is written to the filesystem (`fs`) defined above.\n",
 94 |     "    \"\"\"\n",
 95 |     "    dst_profile = cog_profiles.get(\"deflate\")\n",
 96 |     "    with MemoryFile() as mem_dst:\n",
 97 |     "        # Important, we pass `mem_dst.name` as output dataset path\n",
 98 |     "        cog_translate(fo, mem_dst.name, dst_profile, in_memory=True)\n",
 99 |     "        print(f\"writing to {out_file}\")\n",
100 |     "        with fs.open(out_file, \"wb\") as f:\n",
101 |     "            f.write(mem_dst.read())"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "# iterate through the zip file, extracting individual years\n",
111 |     "# write only files with `tif` or `htm` suffixes to the cloud bucket\n",
112 |     "# Warning: this step takes a while to run, go get some coffee.\n",
113 |     "root = zip.ZipFileSystem(raw_zips).get_mapper(\"composite_data\")\n",
114 |     "for key in root:\n",
115 |     "    year = key.split(\"/\")[1]\n",
116 |     "    sub = io.BytesIO(root[key])\n",
117 |     "    r2 = zip.ZipFileSystem(sub).get_mapper(\"\")\n",
118 |     "\n",
119 |     "    for fname in r2:\n",
120 |     "        if fname.endswith(\"tif\"):\n",
121 |     "            fo = io.BytesIO(r2[fname])\n",
122 |     "            out_name = os.path.join(bucket, f\"{year}.tif\")\n",
123 |     "            translate(fo, out_name)\n",
124 |     "        elif fname.endswith(\"htm\"):\n",
125 |     "            out_name = os.path.join(bucket, f\"{year}.htm\")\n",
126 |     "            with fs.open(out_name, \"wb\") as f:\n",
127 |     "                f.write(r2[fname])\n",
128 |     "        else:\n",
129 |     "            continue\n",
130 |     "        print(f\"done with {out_name}\")"
131 |    ]
132 |   }
133 |  ],
134 |  "metadata": {
135 |   "kernelspec": {
136 |    "display_name": "Python 3",
137 |    "language": "python",
138 |    "name": "python3"
139 |   },
140 |   "language_info": {
141 |    "codemirror_mode": {
142 |     "name": "ipython",
143 |     "version": 3
144 |    },
145 |    "file_extension": ".py",
146 |    "mimetype": "text/x-python",
147 |    "name": "python",
148 |    "nbconvert_exporter": "python",
149 |    "pygments_lexer": "ipython3",
150 |    "version": "3.8.2"
151 |   }
152 |  },
153 |  "nbformat": 4,
154 |  "nbformat_minor": 4
155 | }
156 | 


--------------------------------------------------------------------------------
/scripts/mtbs/02_downsampling_and_reprojection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# MTBS downsampling and reprojection\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), August 5, 2020_\n",
 12 |     "\n",
 13 |     "This notebook downsamples and reprojects MTBS 250m yearly rasters stored in\n",
 14 |     "Cloud Optimized GeoTIFF into 250m and 4000m GeoTIFFs.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- COG outputs from `01_mtbs_to_cogs.ipynb`\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- COG outputs after downsampling and reprojection\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- Source CRS and projection extent come from MTBS\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import os"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "from carbonplan_data.utils import projections, setup\n",
 45 |     "\n",
 46 |     "workdir, upload = setup(\"joe\")\n",
 47 |     "workdir"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "from rio_cogeo.cogeo import cog_translate\n",
 57 |     "from rio_cogeo.profiles import cog_profiles\n",
 58 |     "\n",
 59 |     "dst_profile = cog_profiles.get(\"deflate\")"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "def get_files(region):\n",
 69 |     "    return [\n",
 70 |     "        {\n",
 71 |     "            \"source\": workdir / f\"mtbs/{region}_foresttype/250m/raster.tif\",\n",
 72 |     "            \"target\": f\"processed/nftd/{region}/{resolution}m/type.tif\",\n",
 73 |     "        },\n",
 74 |     "        {\n",
 75 |     "            \"source\": workdir / f\"nftd/{region}_forestgroup/250m/raster.tif\",\n",
 76 |     "            \"target\": f\"processed/nftd/{region}/{resolution}m/group.tif\",\n",
 77 |     "        },\n",
 78 |     "        {\n",
 79 |     "            \"source\": workdir / f\"nftd/{region}_foresttype/250m/error.tif\",\n",
 80 |     "            \"target\": f\"processed/nftd/{region}/{resolution}m/type_error.tif\",\n",
 81 |     "        },\n",
 82 |     "        {\n",
 83 |     "            \"source\": workdir / f\"nftd/{region}_forestgroup/250m/error.tif\",\n",
 84 |     "            \"target\": f\"processed/nftd/{region}/{resolution}m/group_error.tif\",\n",
 85 |     "        },\n",
 86 |     "    ]"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "for resolution in [250, 4000]:\n",
 96 |     "    for region in [\"ak\", \"conus\"]:\n",
 97 |     "        files = get_files(region)\n",
 98 |     "        crs, extent = projections(\"albers\", region)\n",
 99 |     "        for f in files:\n",
100 |     "            if \"error\" in str(f[\"source\"]):\n",
101 |     "                resampling = \"average\"\n",
102 |     "            elif resolution == 4000:\n",
103 |     "                resampling = \"mode\"\n",
104 |     "            else:\n",
105 |     "                resampling = \"near\"\n",
106 |     "            cmd = (\"gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} {} {}\").format(\n",
107 |     "                crs,\n",
108 |     "                extent,\n",
109 |     "                resolution,\n",
110 |     "                resolution,\n",
111 |     "                resampling,\n",
112 |     "                f[\"source\"],\n",
113 |     "                \"./raster.tif\",\n",
114 |     "            )\n",
115 |     "            os.system(cmd)\n",
116 |     "            cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n",
117 |     "            upload(\"./raster.tif\", f[\"target\"])\n",
118 |     "            os.remove(\"./raster.tif\")"
119 |    ]
120 |   }
121 |  ],
122 |  "metadata": {
123 |   "kernelspec": {
124 |    "display_name": "Python 3",
125 |    "language": "python",
126 |    "name": "python3"
127 |   },
128 |   "language_info": {
129 |    "codemirror_mode": {
130 |     "name": "ipython",
131 |     "version": 3
132 |    },
133 |    "file_extension": ".py",
134 |    "mimetype": "text/x-python",
135 |    "name": "python",
136 |    "nbconvert_exporter": "python",
137 |    "pygments_lexer": "ipython3",
138 |    "version": "3.8.2"
139 |   }
140 |  },
141 |  "nbformat": 4,
142 |  "nbformat_minor": 4
143 | }
144 | 


--------------------------------------------------------------------------------
/scripts/mtbs/02_mtbs_to_zarr.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# MTBS to Zarr\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), June 17, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts MTBS 30m yearly rasters stored in Cloud Optimized GeoTIFF\n",
 14 |     "and stages them in a single Zarr archive.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- COG outputs from `01_raw_to_cogs.ipynb`\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- 1 Zarr archive:\n",
 23 |     "  `gs://carbonplan-data/processed/MTBS/raster.zarr/<RESOLUTION>/<VAR>`\n",
 24 |     "\n",
 25 |     "**Notes:**\n",
 26 |     "\n",
 27 |     "- In the process of processing this dataset, we found that the behavior in\n",
 28 |     "  rasterio's `reproject` function was sensitive to the package version for\n",
 29 |     "  rasterio and/or gdal. Versions we found to work were\n",
 30 |     "  `rasterio=1.0.25,gdal=2.4.2`. Versions that we found to fail were\n",
 31 |     "  `rasterio=1.1.5,gdal=3.1.0`\n"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "import os\n",
 41 |     "\n",
 42 |     "import gcsfs\n",
 43 |     "import numpy as np\n",
 44 |     "import rasterio\n",
 45 |     "import xarray as xr\n",
 46 |     "from numcodecs.zlib import Zlib\n",
 47 |     "from rasterio import Affine\n",
 48 |     "from rasterio.crs import CRS\n",
 49 |     "from rasterio.warp import Resampling, reproject, transform\n",
 50 |     "\n",
 51 |     "scratch = os.environ[\"SCRATCH\"]"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "def base_crs():\n",
 61 |     "    return (\n",
 62 |     "        'PROJCS[\"Albers_Conical_Equal_Area\",'\n",
 63 |     "        'GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",'\n",
 64 |     "        'SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],'\n",
 65 |     "        \"TOWGS84[0,0,0,-0,-0,-0,0],\"\n",
 66 |     "        'AUTHORITY[\"EPSG\",\"6326\"]],'\n",
 67 |     "        'PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],'\n",
 68 |     "        'UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],'\n",
 69 |     "        'AUTHORITY[\"EPSG\",\"4326\"]],'\n",
 70 |     "        'PROJECTION[\"Albers_Conic_Equal_Area\"],'\n",
 71 |     "        'PARAMETER[\"standard_parallel_1\",29.5],'\n",
 72 |     "        'PARAMETER[\"standard_parallel_2\",45.5],'\n",
 73 |     "        'PARAMETER[\"latitude_of_center\",23],'\n",
 74 |     "        'PARAMETER[\"longitude_of_center\",-96],'\n",
 75 |     "        'PARAMETER[\"false_easting\",0],'\n",
 76 |     "        'PARAMETER[\"false_northing\",0],'\n",
 77 |     "        'UNIT[\"meters\",1]]'\n",
 78 |     "    )"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "def make_dst_band(src_band, src_resolution):\n",
 88 |     "    left = -2493045.0\n",
 89 |     "    right = 2342655.0\n",
 90 |     "    top = 3310005.0\n",
 91 |     "    bottom = 177285.0\n",
 92 |     "    dst_transform = Affine(30.0, 0.0, left, 0.0, -30.0, top)\n",
 93 |     "    dst_resolution = dst_transform[0]\n",
 94 |     "    dst_transform = dst_transform * Affine.scale(\n",
 95 |     "        src_resolution / dst_resolution, src_resolution / dst_resolution\n",
 96 |     "    )\n",
 97 |     "    dst_crs = CRS.from_wkt(base_crs())\n",
 98 |     "\n",
 99 |     "    dst_shape = [\n",
100 |     "        round((top - bottom) / src_resolution),\n",
101 |     "        round((right - left) / src_resolution),\n",
102 |     "    ]\n",
103 |     "\n",
104 |     "    dst_band = np.zeros(dst_shape, np.float32)\n",
105 |     "    return dst_band, dst_transform, dst_crs, dst_shape\n",
106 |     "\n",
107 |     "\n",
108 |     "def calc_coords(shape, trans, crs):\n",
109 |     "    ny, nx = shape\n",
110 |     "\n",
111 |     "    # crs coords\n",
112 |     "    x, _ = trans * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5)\n",
113 |     "    _, y = trans * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5)\n",
114 |     "\n",
115 |     "    # convert to lat/lon\n",
116 |     "    xs, ys = np.meshgrid(x, y)\n",
117 |     "    lon, lat = transform(crs, {\"init\": \"EPSG:4326\"}, xs.flatten(), ys.flatten())\n",
118 |     "\n",
119 |     "    return {\n",
120 |     "        \"x\": xr.DataArray(x, dims=(\"x\",)),\n",
121 |     "        \"y\": xr.DataArray(y, dims=(\"y\",)),\n",
122 |     "        \"lat\": xr.DataArray(np.asarray(lat).reshape((ny, nx)), dims=(\"y\", \"x\")),\n",
123 |     "        \"lon\": xr.DataArray(np.asarray(lon).reshape((ny, nx)), dims=(\"y\", \"x\")),\n",
124 |     "    }\n",
125 |     "\n",
126 |     "\n",
127 |     "def prepare_mtbs(year, resolution, return_ds=True):\n",
128 |     "    src_path = f\"gs://carbonplan-data/raw/MTBS/30m/{year}.tif\"\n",
129 |     "    with rasterio.open(src_path, \"r\") as src_raster:\n",
130 |     "        src_transform = src_raster.meta[\"transform\"]\n",
131 |     "        src_crs = src_raster.meta[\"crs\"]\n",
132 |     "        src_band = src_raster.read(1)\n",
133 |     "        src_resolution = resolution\n",
134 |     "\n",
135 |     "        dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(src_band, src_resolution)\n",
136 |     "        print(\"calc_coords\")\n",
137 |     "        coords = calc_coords(dst_shape, dst_transform, dst_crs)\n",
138 |     "\n",
139 |     "        src_nodata = 6\n",
140 |     "        if resolution == 30:\n",
141 |     "            resampling = Resampling.nearest\n",
142 |     "        elif resolution > 30:\n",
143 |     "            resampling = Resampling.average\n",
144 |     "            # set moderate or high burn severity to 1 and others to 1\n",
145 |     "            src_band_tmp = ((src_band == 3) | (src_band == 4)).astype(\"uint8\")\n",
146 |     "            # set masked regions to nodata value\n",
147 |     "            src_band_tmp[src_band == src_nodata] = src_nodata\n",
148 |     "            src_band = src_band_tmp\n",
149 |     "            dst_band = dst_band.astype(\"float32\")  # convert to float for averaging\n",
150 |     "\n",
151 |     "        print(\"reproject\")\n",
152 |     "        # this seems to require rasterio=1.0.25 and gdal=2.4.2\n",
153 |     "        reproject(\n",
154 |     "            src_band,\n",
155 |     "            dst_band,\n",
156 |     "            src_transform=src_transform,\n",
157 |     "            src_crs=src_crs,\n",
158 |     "            dst_transform=dst_transform,\n",
159 |     "            dst_crs=dst_crs,\n",
160 |     "            resampling=resampling,\n",
161 |     "            src_nodata=src_nodata,\n",
162 |     "            dst_nodata=src_raster.meta[\"nodata\"],\n",
163 |     "        )\n",
164 |     "\n",
165 |     "        meta = src_raster.meta\n",
166 |     "        meta.update(\n",
167 |     "            width=dst_shape[0],\n",
168 |     "            height=dst_shape[1],\n",
169 |     "            dtype=str(dst_band.dtype),\n",
170 |     "            crs=dst_crs.to_wkt(),\n",
171 |     "            transform=list(dst_transform),\n",
172 |     "            nodata=src_raster.meta[\"nodata\"],\n",
173 |     "        )\n",
174 |     "\n",
175 |     "    varname = f\"{year}\"\n",
176 |     "    chunks = {\"x\": 512, \"y\": 512}\n",
177 |     "    ds = xr.DataArray(dst_band, dims=(\"y\", \"x\"), attrs=meta).to_dataset(name=varname)\n",
178 |     "    ds = ds.assign_coords(coords).chunk(chunks)\n",
179 |     "\n",
180 |     "    if return_ds:\n",
181 |     "        return ds\n",
182 |     "    else:\n",
183 |     "        fs = gcsfs.GCSFileSystem(project=\"carbonplan\", token=\"cloud\", requester_pays=True)\n",
184 |     "        mapper = fs.get_mapper(scratch + f\"/MTBS.{year}.{resolution}m.zarr\")\n",
185 |     "        ds.to_zarr(store=mapper, mode=\"w\", encoding={varname: {\"compressor\": Zlib()}})"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "years = list(range(1984, 2018))\n",
195 |     "\n",
196 |     "dsets = [prepare_mtbs(y, 4000) for y in years]"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "metadata": {},
203 |    "outputs": [],
204 |    "source": [
205 |     "varname = \"burned_area\"\n",
206 |     "da = xr.merge(dsets).to_array(dim=\"time\", name=varname)\n",
207 |     "da[\"time\"] = da.time.astype(int)\n",
208 |     "ds = da.to_dataset()\n",
209 |     "ds[varname].attrs.update(dsets[0][\"1984\"].attrs)\n",
210 |     "ds"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": [
219 |     "fs = gcsfs.GCSFileSystem(project=\"carbonplan\", token=\"cloud\", requester_pays=True)\n",
220 |     "mapper = fs.get_mapper(\"carbonplan-data/processed/MTBS/raster.zarr\")\n",
221 |     "\n",
222 |     "ds.to_zarr(\n",
223 |     "    store=mapper,\n",
224 |     "    group=\"4000m\",\n",
225 |     "    mode=\"w\",\n",
226 |     "    encoding={varname: {\"compressor\": Zlib()}},\n",
227 |     ")"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": [
236 |     "ds[varname].sum(\"time\").plot(robust=True)"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "ds[varname]"
246 |    ]
247 |   }
248 |  ],
249 |  "metadata": {
250 |   "kernelspec": {
251 |    "display_name": "Python 3",
252 |    "language": "python",
253 |    "name": "python3"
254 |   },
255 |   "language_info": {
256 |    "codemirror_mode": {
257 |     "name": "ipython",
258 |     "version": 3
259 |    },
260 |    "file_extension": ".py",
261 |    "mimetype": "text/x-python",
262 |    "name": "python",
263 |    "nbconvert_exporter": "python",
264 |    "pygments_lexer": "ipython3",
265 |    "version": "3.7.8"
266 |   }
267 |  },
268 |  "nbformat": 4,
269 |  "nbformat_minor": 4
270 | }
271 | 


--------------------------------------------------------------------------------
/scripts/mtbs/04_mtbs_perims_to_raster.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# MTBS Perimeters to Zarr\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), November 3, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts MTBS fire perimeters to monthly burned area rasters\n",
 14 |     "\n",
 15 |     "**Inputs:**\n",
 16 |     "\n",
 17 |     "- MTBS fire perimeters shapefile\n",
 18 |     "\n",
 19 |     "**Outputs:**\n",
 20 |     "\n",
 21 |     "- 1 Zarr archive:\n",
 22 |     "  `gs://carbonplan-data/processed/mtbs/conus/{res}m/monthly_perims_raster.zarr`\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- Text defining large and very large fires from Barbero et al. (2015):\n",
 27 |     "  > The Monitoring Trends in Burn Severity (MTBS) data- base was used to acquire\n",
 28 |     "  > fire location, fire discovery date and burned area for LFs over the\n",
 29 |     "  > contiguous US from 1984 to 2010. We excluded fires smaller than 404ha and\n",
 30 |     "  > further eliminated 'unburned to low' burned area for each fire as classified\n",
 31 |     "  > by MTBS to more accurately portray the true area burned (Kolden et al 2012).\n",
 32 |     "  > While the definition of VLFs is subjective and likely geographically\n",
 33 |     "  > dependent, we define VLFs as fires whose size exceeds the 90th percentile\n",
 34 |     "  > (5073 ha) of MTBS fires greater than 404 ha (n = 927) (figure 1(b)) and LF\n",
 35 |     "  > as fires whose size was below the 90th percentile but greater than 404 ha (n\n",
 36 |     "  > = 8343)(figure 1(c)).\n"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "import geopandas\n",
 46 |     "import hvplot.pandas  # noqa\n",
 47 |     "import numpy as np\n",
 48 |     "import pandas as pd\n",
 49 |     "import rasterio\n",
 50 |     "import xarray as xr\n",
 51 |     "from carbonplan.data import cat\n",
 52 |     "from rasterio.features import rasterize"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "months = pd.date_range(\"1984-01\", \"2018-12\", freq=\"MS\")"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "# mask = cat.nlcd.raster.read().squeeze(drop=True)\n",
 71 |     "\n",
 72 |     "region = \"conus\"\n",
 73 |     "\n",
 74 |     "mask = rasterio.open(cat.mtbs.raw_raster._urlpath)\n",
 75 |     "transform = mask.transform\n",
 76 |     "shape = mask.shape\n",
 77 |     "src_profile = mask.profile\n",
 78 |     "\n",
 79 |     "# TODO: replace with intake use\n",
 80 |     "perims = geopandas.GeoDataFrame.from_file(\"mtbs_perimeter_data/mtbs_perims_DD/mtbs_perims_DD.shp\")"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "# note we set all start days to 1 (so we can easily group by month later)\n",
 90 |     "dates = pd.DatetimeIndex(\n",
 91 |     "    [pd.to_datetime(f\"{r.Year}-{r.StartMonth}-1\") for _, r in perims.iterrows()]\n",
 92 |     ")\n",
 93 |     "perims.index = dates\n",
 94 |     "perims = perims.sort_index()\n",
 95 |     "perims[\"ha\"] = perims[\"Acres\"] * 0.40468564224\n",
 96 |     "perims[\"ym\"] = dates\n",
 97 |     "perims = perims.to_crs(crs=mask.crs)\n",
 98 |     "perims"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "pattern = \"Wild*|Out*|Unknown|Complex\"\n",
108 |     "perims = perims[perims.Fire_Type.str.contains(pattern)]\n",
109 |     "\n",
110 |     "perims_lf = perims[perims.ha.between(404, 5073)]\n",
111 |     "perims_vlf = perims[perims.ha > 5073]\n",
112 |     "perims_vlf"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "def rasterize_geom(geoms):\n",
122 |     "    r = rasterize(\n",
123 |     "        [(geom, 1) for geom in geoms],\n",
124 |     "        out_shape=shape,\n",
125 |     "        transform=transform,\n",
126 |     "        fill=0,\n",
127 |     "        merge_alg=rasterio.enums.MergeAlg.replace,\n",
128 |     "        all_touched=True,\n",
129 |     "        dtype=rasterio.uint8,\n",
130 |     "    )\n",
131 |     "    return r"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "perims_vlf[[\"ha\", \"geometry\", \"ym\"]][\"2018\":\"2018\"].to_crs(\"EPSG:4326\").hvplot(\n",
141 |     "    c=\"ha\", geo=True, coastline=True\n",
142 |     ")"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "from rasterio.io import MemoryFile\n",
152 |     "from rio_cogeo.cogeo import cog_translate\n",
153 |     "from rio_cogeo.profiles import cog_profiles"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "from gcsfs import GCSFileSystem\n",
163 |     "\n",
164 |     "\n",
165 |     "def copy_to_fs(source, dst, fs):\n",
166 |     "    with open(source, \"rb\") as fsource:\n",
167 |     "        with fs.open(dst, \"wb\") as fdst:\n",
168 |     "            fdst.write(fsource.read())\n",
169 |     "\n",
170 |     "\n",
171 |     "def numpy_to_cog(data, out_fname=\"temp_cog.tif\"):\n",
172 |     "    with MemoryFile() as memfile:\n",
173 |     "        with memfile.open(**src_profile) as mem:\n",
174 |     "            # Populate the input file with numpy array\n",
175 |     "            mem.write(r, indexes=1)\n",
176 |     "\n",
177 |     "            dst_profile = cog_profiles.get(\"deflate\")\n",
178 |     "            cog_translate(\n",
179 |     "                mem,\n",
180 |     "                out_fname,\n",
181 |     "                dst_profile,\n",
182 |     "                in_memory=True,\n",
183 |     "                quiet=True,\n",
184 |     "            )\n",
185 |     "\n",
186 |     "\n",
187 |     "fs = GCSFileSystem()"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {},
194 |    "outputs": [],
195 |    "source": [
196 |     "# unocomment to start over\n",
197 |     "# paths = fs.glob('carbonplan-data/processed/mtbs/conus/30m/*f_????.??.tif')\n",
198 |     "# fs.rm(paths)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "# make an empty file we can copy to each month without any fires\n",
208 |     "r = np.zeros(shape, dtype=rasterio.uint8)\n",
209 |     "numpy_to_cog(r, \"empty_cog.tif\")"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": null,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "dst_profile = cog_profiles.get(\"deflate\")\n",
219 |     "\n",
220 |     "for month in months:\n",
221 |     "    for name, df in [(\"lf\", perims_lf), (\"vlf\", perims_vlf)]:\n",
222 |     "        out_fname = (\n",
223 |     "            f\"carbonplan-data/processed/mtbs/{region}/30m/{name}_{month.strftime('%Y.%m')}.tif\"\n",
224 |     "        )\n",
225 |     "\n",
226 |     "        if fs.exists(out_fname):\n",
227 |     "            print(f\"{out_fname} exists, skipping...\")\n",
228 |     "            continue\n",
229 |     "\n",
230 |     "        try:\n",
231 |     "            geom = df.loc[[month]].geometry\n",
232 |     "            print(geom)\n",
233 |     "            print(f\"rasterizing {month}\")\n",
234 |     "            r = rasterize_geom(geom)\n",
235 |     "            numpy_to_cog(r, \"temp_cog.tif\")\n",
236 |     "            copy_to_fs(\"temp_cog.tif\", out_fname, fs)\n",
237 |     "        except (KeyError, ValueError) as e:\n",
238 |     "            print(f\"raised error: {e}\")\n",
239 |     "            print(f\"copying empty cog to {out_fname}\")\n",
240 |     "            copy_to_fs(\"empty_cog.tif\", out_fname, fs)"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {},
247 |    "outputs": [],
248 |    "source": [
249 |     "import intake\n",
250 |     "from dask.diagnostics import ProgressBar\n",
251 |     "\n",
252 |     "cat2 = intake.open_catalog(\n",
253 |     "    \"https://raw.githubusercontent.com/carbonplan/data/master/carbonplan_data/catalogs/mtbs.yaml\"\n",
254 |     ")\n",
255 |     "dates = [f\"2018.{m:02d}\" for m in range(1, 13)]\n",
256 |     "da = xr.concat(\n",
257 |     "    [cat2.rasterized_perims(size=\"vlf\", date=d).to_dask().squeeze(drop=True) for d in dates],\n",
258 |     "    dim=xr.Variable(\"time\", dates),\n",
259 |     ")\n",
260 |     "\n",
261 |     "with ProgressBar():\n",
262 |     "    da_sum = da.sum(\"time\").coarsen(x=133, y=133, boundary=\"trim\").mean().load()\n",
263 |     "da_sum"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "metadata": {},
270 |    "outputs": [],
271 |    "source": [
272 |     "da_sum.where(da_sum).plot(vmax=0.01, vmin=0, cmap=\"Greys\")"
273 |    ]
274 |   }
275 |  ],
276 |  "metadata": {
277 |   "kernelspec": {
278 |    "display_name": "Python 3",
279 |    "language": "python",
280 |    "name": "python3"
281 |   },
282 |   "language_info": {
283 |    "codemirror_mode": {
284 |     "name": "ipython",
285 |     "version": 3
286 |    },
287 |    "file_extension": ".py",
288 |    "mimetype": "text/x-python",
289 |    "name": "python",
290 |    "nbconvert_exporter": "python",
291 |    "pygments_lexer": "ipython3",
292 |    "version": "3.8.5"
293 |   }
294 |  },
295 |  "nbformat": 4,
296 |  "nbformat_minor": 4
297 | }
298 | 


--------------------------------------------------------------------------------
/scripts/mtbs/05_monthly_downsampling.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# MTBS monthly downsampling and reprojection\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), August 5, 2020_\n",
 12 |     "\n",
 13 |     "This notebook downsamples and reprojects monthly MTBS 30m rasters stored in\n",
 14 |     "Cloud Optimized GeoTIFF 4000m GeoTIFFs.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- Monthly COGs\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- COG outputs after downsampling and reprojection\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- Source CRS and projection extent come from MTBS\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import os"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "from carbonplan_data.utils import projections, setup\n",
 45 |     "\n",
 46 |     "workdir, upload = setup(\"jeremy\")\n",
 47 |     "workdir"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "from rio_cogeo.cogeo import cog_translate\n",
 57 |     "from rio_cogeo.profiles import cog_profiles\n",
 58 |     "\n",
 59 |     "dst_profile = cog_profiles.get(\"deflate\")"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "def get_file(region, fire, year, month):\n",
 69 |     "    return {\n",
 70 |     "        \"source\": (\n",
 71 |     "            workdir / f\"processed/mtbs/{region}/30m/{fire}_{year}.{month:02g}.tif\"\n",
 72 |     "        ).as_posix(),\n",
 73 |     "        \"target\": f\"processed/mtbs/{region}/4000m/tif/{fire}.{month:02g}.tif\",\n",
 74 |     "    }"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "resolution = 4000\n",
 84 |     "\n",
 85 |     "for year in [1984]:\n",
 86 |     "    for month in [1]:\n",
 87 |     "        for fire in [\"vlf\"]:\n",
 88 |     "            for region in [\"ak\", \"conus\"]:\n",
 89 |     "                f = get_file(region, fire, year, month)\n",
 90 |     "                crs, extent = projections(\"albers\", region)\n",
 91 |     "                resampling = \"average\"\n",
 92 |     "                cmd = (\"gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} {} {}\").format(\n",
 93 |     "                    crs,\n",
 94 |     "                    extent,\n",
 95 |     "                    resolution,\n",
 96 |     "                    resolution,\n",
 97 |     "                    resampling,\n",
 98 |     "                    f[\"source\"],\n",
 99 |     "                    \"./raster.tif\",\n",
100 |     "                )\n",
101 |     "                os.system(cmd)\n",
102 |     "                cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n",
103 |     "                upload(\"./raster.tif\", f[\"target\"])\n",
104 |     "                os.remove(\"./raster.tif\")"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "files = [get_file(\"conus\", \"vlf\", 1984, month)[\"source\"] for month in range(12)]"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "resolution = 4000\n",
123 |     "\n",
124 |     "for year in [2017]:\n",
125 |     "    for region in [\"conus\"]:\n",
126 |     "        for fire in [\"vlf\"]:\n",
127 |     "            files = [get_file(\"conus\", \"vlf\", year, month + 1)[\"source\"] for month in range(12)]\n",
128 |     "            crs, extent = projections(\"albers\", region)\n",
129 |     "            resampling = \"sum\"\n",
130 |     "            cmd = (\"gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} {} {}\").format(\n",
131 |     "                crs,\n",
132 |     "                extent,\n",
133 |     "                resolution,\n",
134 |     "                resolution,\n",
135 |     "                resampling,\n",
136 |     "                \" \".join(files),\n",
137 |     "                \"./raster.tif\",\n",
138 |     "            )\n",
139 |     "            print(cmd)\n",
140 |     "            os.system(cmd)\n",
141 |     "            # cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n",
142 |     "            # upload(\"./raster.tif\", f[\"target\"])\n",
143 |     "            # os.remove(\"./raster.tif\")"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "import rasterio\n",
153 |     "\n",
154 |     "r = rasterio.open(\"/Users/freeman/workdir/carbonplan-data/processed/mtbs/conus/30m/vlf_2018.09.tif\")\n",
155 |     "im = r.read(1)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "r = rasterio.open(\"./raster.tif\")\n",
165 |     "im = r.read(1)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "%matplotlib inline"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {},
181 |    "outputs": [],
182 |    "source": [
183 |     "from showit import image"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {},
190 |    "outputs": [],
191 |    "source": [
192 |     "im.sum()"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": [
201 |     "image(im, size=12, clim=(0, 1))"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "from carbonplan_forests import load"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": [
219 |     "mtbs = load.mtbs(store=\"local\")"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "before = mtbs.groupby(\"time.year\").sum().sel(year=2018)[\"vlf\"]"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "image(1 - before, size=12, clim=(0, 1))"
238 |    ]
239 |   }
240 |  ],
241 |  "metadata": {
242 |   "kernelspec": {
243 |    "display_name": "Python 3",
244 |    "language": "python",
245 |    "name": "python3"
246 |   },
247 |   "language_info": {
248 |    "codemirror_mode": {
249 |     "name": "ipython",
250 |     "version": 3
251 |    },
252 |    "file_extension": ".py",
253 |    "mimetype": "text/x-python",
254 |    "name": "python",
255 |    "nbconvert_exporter": "python",
256 |    "pygments_lexer": "ipython3",
257 |    "version": "3.7.9"
258 |   }
259 |  },
260 |  "nbformat": 4,
261 |  "nbformat_minor": 4
262 | }
263 | 


--------------------------------------------------------------------------------
/scripts/mtbs/05_monthly_mtbs_to_zarr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import rasterio
  4 | import xarray as xr
  5 | from numcodecs.zlib import Zlib
  6 | from rasterio import Affine
  7 | from rasterio.crs import CRS
  8 | from rasterio.warp import Resampling, reproject, transform
  9 | 
 10 | 
 11 | def base_crs():
 12 |     return (
 13 |         'PROJCS["Albers_Conical_Equal_Area",'
 14 |         'GEOGCS["WGS 84",DATUM["WGS_1984",'
 15 |         'SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],'
 16 |         "TOWGS84[0,0,0,-0,-0,-0,0],"
 17 |         'AUTHORITY["EPSG","6326"]],'
 18 |         'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],'
 19 |         'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],'
 20 |         'AUTHORITY["EPSG","4326"]],'
 21 |         'PROJECTION["Albers_Conic_Equal_Area"],'
 22 |         'PARAMETER["standard_parallel_1",29.5],'
 23 |         'PARAMETER["standard_parallel_2",45.5],'
 24 |         'PARAMETER["latitude_of_center",23],'
 25 |         'PARAMETER["longitude_of_center",-96],'
 26 |         'PARAMETER["false_easting",0],'
 27 |         'PARAMETER["false_northing",0],'
 28 |         'UNIT["meters",1]]'
 29 |     )
 30 | 
 31 | 
 32 | def make_dst_band(src_band, src_resolution):
 33 |     left = -2493045.0
 34 |     right = 2342655.0
 35 |     top = 3310005.0
 36 |     bottom = 177285.0
 37 |     dst_transform = Affine(30.0, 0.0, left, 0.0, -30.0, top)
 38 |     dst_resolution = dst_transform[0]
 39 |     dst_transform = dst_transform * Affine.scale(
 40 |         src_resolution / dst_resolution, src_resolution / dst_resolution
 41 |     )
 42 |     dst_crs = CRS.from_wkt(base_crs())
 43 |     dst_shape = [
 44 |         round((top - bottom) / src_resolution),
 45 |         round((right - left) / src_resolution),
 46 |     ]
 47 |     dst_band = np.zeros(dst_shape, np.float32)
 48 |     return dst_band, dst_transform, dst_crs, dst_shape
 49 | 
 50 | 
 51 | def calc_coords(shape, trans, crs):
 52 |     ny, nx = shape
 53 |     x, _ = trans * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5)
 54 |     _, y = trans * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5)
 55 |     xs, ys = np.meshgrid(x, y)
 56 |     lon, lat = transform(crs, {"init": "EPSG:4326"}, xs.flatten(), ys.flatten())
 57 | 
 58 |     return {
 59 |         "x": xr.DataArray(x, dims=("x",)),
 60 |         "y": xr.DataArray(y, dims=("y",)),
 61 |         "lat": xr.DataArray(np.asarray(lat).reshape((ny, nx)), dims=("y", "x")),
 62 |         "lon": xr.DataArray(np.asarray(lon).reshape((ny, nx)), dims=("y", "x")),
 63 |     }
 64 | 
 65 | 
 66 | src_nodata = 6
 67 | resampling = Resampling.average
 68 | resolution = 4000
 69 | years = np.arange(1984, 2019)
 70 | months = np.arange(1, 13)
 71 | 
 72 | for year in years:
 73 |     print(f"starting year {year}")
 74 |     src_path_year = f"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/severity/{year}.tif"
 75 | 
 76 |     with rasterio.open(src_path_year, "r") as src_raster_year:
 77 |         src_transform = src_raster_year.meta["transform"]
 78 |         src_crs = src_raster_year.meta["crs"]
 79 |         src_band_year = src_raster_year.read(1)
 80 |         src_resolution = resolution
 81 | 
 82 |         dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(src_band_year, src_resolution)
 83 |         coords = calc_coords(dst_shape, dst_transform, dst_crs)
 84 | 
 85 |         for month in months:
 86 |             print(f"processing month {month}")
 87 |             varname = f"{year}.{month:02n}"
 88 |             src_path_month = (
 89 |                 f"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/area/{varname}.tif"
 90 |             )
 91 | 
 92 |             with rasterio.open(src_path_month, "r") as src_raster_month:
 93 |                 src_band_month = src_raster_month.read(1)
 94 |                 src_band_tmp = src_band_month * (
 95 |                     (src_band_year == 3) | (src_band_year == 4)
 96 |                 ).astype("uint8")
 97 |                 src_band_tmp[src_band_year == src_nodata] = src_nodata
 98 |                 src_band_month = src_band_tmp
 99 | 
100 |                 dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(
101 |                     src_band_year, src_resolution
102 |                 )
103 |                 dst_band = dst_band.astype("float32")
104 | 
105 |                 # this seems to require rasterio=1.0.25 and gdal=2.4.2
106 |                 reproject(
107 |                     src_band_month,
108 |                     dst_band,
109 |                     src_transform=src_transform,
110 |                     src_crs=src_crs,
111 |                     dst_transform=dst_transform,
112 |                     dst_crs=dst_crs,
113 |                     resampling=resampling,
114 |                     src_nodata=src_nodata,
115 |                     dst_nodata=src_raster_year.meta["nodata"],
116 |                 )
117 | 
118 |                 meta = src_raster_year.meta
119 |                 meta.update(
120 |                     width=dst_shape[0],
121 |                     height=dst_shape[1],
122 |                     dtype=str(dst_band.dtype),
123 |                     crs=dst_crs.to_wkt(),
124 |                     transform=list(dst_transform),
125 |                     nodata=src_raster_year.meta["nodata"],
126 |                 )
127 | 
128 |                 chunks = {"x": 512, "y": 512}
129 |                 ds = xr.DataArray(dst_band, dims=("y", "x"), attrs=meta).to_dataset(name=varname)
130 |                 ds = ds.assign_coords(coords).chunk(chunks)
131 | 
132 |                 ds.to_zarr(f"{varname}.zarr", mode="w", encoding={varname: {"compressor": Zlib()}})
133 | 
134 | results = []
135 | for year in years:
136 |     for month in months:
137 |         varname = f"{year}.{month:02n}"
138 |         ds = xr.open_zarr(f"{varname}.zarr")
139 |         ds = ds.chunk({"x": 1209, "y": 783})
140 |         results.append(ds[varname])
141 | 
142 | dates = pd.date_range("1984-01", "2018-12", freq="MS")
143 | ds = xr.concat(results, xr.Variable("time", dates))
144 | ds.name = "monthly"
145 | ds["x"] = range(len(ds["x"]))
146 | ds["y"] = range(len(ds["y"]))
147 | ds = ds.to_dataset()
148 | chunks = {"time": 1, "x": 1209, "y": 783}
149 | ds = ds.chunk(chunks)
150 | 
151 | ds.to_zarr(
152 |     "/Users/freeman/workdir/carbonplan-data/processed/mtbs/conus/4000m/monthly.zarr",
153 |     mode="w",
154 |     encoding={"monthly": {"compressor": Zlib()}},
155 |     consolidated=True,
156 | )
157 | 


--------------------------------------------------------------------------------
/scripts/mtbs/06_annual_downsampling.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | from rio_cogeo.cogeo import cog_translate
 5 | from rio_cogeo.profiles import cog_profiles
 6 | 
 7 | from carbonplan_data.utils import projections, setup
 8 | 
 9 | dst_profile = cog_profiles.get("deflate")
10 | 
11 | 
12 | workdir, upload = setup("jeremy")
13 | workdir
14 | 
15 | resolution = 30
16 | 
17 | for region in ["conus"]:
18 |     for year in np.arange(1984, 2019):
19 |         source = (workdir / f"raw/mtbs/{region}/30m/{year}.tif").as_posix()
20 |         print(source)
21 |         crs, extent = projections("albers", region)
22 |         resampling = "nearest"
23 |         cmd = ("gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} {} {}").format(
24 |             crs,
25 |             extent,
26 |             resolution,
27 |             resolution,
28 |             resampling,
29 |             source,
30 |             "./raster.tif",
31 |         )
32 |         os.system(cmd)
33 |         cog_translate("./raster.tif", f"./{year}.tif", dst_profile)
34 |         os.remove("./raster.tif")
35 | 


--------------------------------------------------------------------------------
/scripts/mtbs/06_annual_mtbs_to_zarr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import rasterio
  3 | import xarray as xr
  4 | from numcodecs.zlib import Zlib
  5 | from rasterio import Affine
  6 | from rasterio.crs import CRS
  7 | from rasterio.warp import Resampling, reproject, transform
  8 | 
  9 | 
 10 | def base_crs():
 11 |     return (
 12 |         'PROJCS["Albers_Conical_Equal_Area",'
 13 |         'GEOGCS["WGS 84",DATUM["WGS_1984",'
 14 |         'SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],'
 15 |         "TOWGS84[0,0,0,-0,-0,-0,0],"
 16 |         'AUTHORITY["EPSG","6326"]],'
 17 |         'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],'
 18 |         'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],'
 19 |         'AUTHORITY["EPSG","4326"]],'
 20 |         'PROJECTION["Albers_Conic_Equal_Area"],'
 21 |         'PARAMETER["standard_parallel_1",29.5],'
 22 |         'PARAMETER["standard_parallel_2",45.5],'
 23 |         'PARAMETER["latitude_of_center",23],'
 24 |         'PARAMETER["longitude_of_center",-96],'
 25 |         'PARAMETER["false_easting",0],'
 26 |         'PARAMETER["false_northing",0],'
 27 |         'UNIT["meters",1]]'
 28 |     )
 29 | 
 30 | 
 31 | def make_dst_band(src_band, src_resolution):
 32 |     left = -2493045.0
 33 |     right = 2342655.0
 34 |     top = 3310005.0
 35 |     bottom = 177285.0
 36 |     dst_transform = Affine(30.0, 0.0, left, 0.0, -30.0, top)
 37 |     dst_resolution = dst_transform[0]
 38 |     dst_transform = dst_transform * Affine.scale(
 39 |         src_resolution / dst_resolution, src_resolution / dst_resolution
 40 |     )
 41 |     dst_crs = CRS.from_wkt(base_crs())
 42 |     dst_shape = [
 43 |         round((top - bottom) / src_resolution),
 44 |         round((right - left) / src_resolution),
 45 |     ]
 46 |     dst_band = np.zeros(dst_shape, np.float32)
 47 |     return dst_band, dst_transform, dst_crs, dst_shape
 48 | 
 49 | 
 50 | def calc_coords(shape, trans, crs):
 51 |     ny, nx = shape
 52 |     x, _ = trans * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5)
 53 |     _, y = trans * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5)
 54 |     xs, ys = np.meshgrid(x, y)
 55 |     lon, lat = transform(crs, {"init": "EPSG:4326"}, xs.flatten(), ys.flatten())
 56 | 
 57 |     return {
 58 |         "x": xr.DataArray(x, dims=("x",)),
 59 |         "y": xr.DataArray(y, dims=("y",)),
 60 |         "lat": xr.DataArray(np.asarray(lat).reshape((ny, nx)), dims=("y", "x")),
 61 |         "lon": xr.DataArray(np.asarray(lon).reshape((ny, nx)), dims=("y", "x")),
 62 |     }
 63 | 
 64 | 
 65 | src_nodata = 6
 66 | resampling = Resampling.average
 67 | resolution = 4000
 68 | years = np.arange(1984, 2019)
 69 | months = np.arange(1, 13)
 70 | 
 71 | for year in years:
 72 |     print(f"starting year {year}")
 73 |     src_path_year = f"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/severity/{year}.tif"
 74 | 
 75 |     with rasterio.open(src_path_year, "r") as src_raster_year:
 76 |         src_transform = src_raster_year.meta["transform"]
 77 |         src_crs = src_raster_year.meta["crs"]
 78 |         src_band_year = src_raster_year.read(1)
 79 |         src_resolution = resolution
 80 | 
 81 |         dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(src_band_year, src_resolution)
 82 |         coords = calc_coords(dst_shape, dst_transform, dst_crs)
 83 | 
 84 |         for month in months:
 85 |             print(f"processing month {month}")
 86 |             varname = f"{year}.{month:02n}"
 87 |             src_path_month = (
 88 |                 f"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/area/{varname}.tif"
 89 |             )
 90 | 
 91 |             with rasterio.open(src_path_month, "r") as src_raster_month:
 92 |                 if month == 1:
 93 |                     src_band_month = src_raster_month.read(1)
 94 |                 else:
 95 |                     src_band_month += src_raster_month.read(1)
 96 | 
 97 |         src_band_month[src_band_month > 1] = 1
 98 |         src_band_tmp = src_band_month * ((src_band_year == 3) | (src_band_year == 4)).astype(
 99 |             "uint8"
100 |         )
101 |         src_band_tmp[src_band_year == src_nodata] = src_nodata
102 | 
103 |         dst_band = dst_band.astype("float32")
104 | 
105 |         # this seems to require rasterio=1.0.25 and gdal=2.4.2
106 |         reproject(
107 |             src_band_tmp,
108 |             dst_band,
109 |             src_transform=src_transform,
110 |             src_crs=src_crs,
111 |             dst_transform=dst_transform,
112 |             dst_crs=dst_crs,
113 |             resampling=resampling,
114 |             src_nodata=src_nodata,
115 |             dst_nodata=src_raster_year.meta["nodata"],
116 |         )
117 | 
118 |         meta = src_raster_year.meta
119 |         meta.update(
120 |             width=dst_shape[0],
121 |             height=dst_shape[1],
122 |             dtype=str(dst_band.dtype),
123 |             crs=dst_crs.to_wkt(),
124 |             transform=list(dst_transform),
125 |             nodata=src_raster_year.meta["nodata"],
126 |         )
127 | 
128 |         chunks = {"x": 512, "y": 512}
129 |         ds = xr.DataArray(dst_band, dims=("y", "x"), attrs=meta).to_dataset(name=f"{year}")
130 |         ds = ds.assign_coords(coords).chunk(chunks)
131 | 
132 |         ds.to_zarr(f"{year}.zarr", mode="w", encoding={f"{year}": {"compressor": Zlib()}})
133 | 
134 | # results = []
135 | # for year in years:
136 | #     varname = f'{year}'
137 | #     ds = xr.open_zarr(f'{varname}.zarr')
138 | #     results.append(ds[varname])
139 | 
140 | # dates = pd.date_range('1984', '2018', freq='YS')
141 | # ds = xr.concat(results, xr.Variable('time', dates))
142 | # ds.name = 'annual'
143 | # ds['x'] = range(len(ds['x']))
144 | # ds['y'] = range(len(ds['y']))
145 | # ds = ds.to_dataset()
146 | # chunks = ({'time': 1, 'x': 1209, 'y': 783})
147 | # ds = ds.chunk(chunks)
148 | # ds.to_zarr(
149 | #     '/Users/freeman/workdir/carbonplan-data/processed/mtbs/conus/4000m/annual.zarr',
150 | #     mode='w', encoding={'annual': {'compressor': Zlib()}}
151 | # )
152 | 


--------------------------------------------------------------------------------
/scripts/mtbs/README.md:
--------------------------------------------------------------------------------
1 | # MTBS Burned Area
2 | 


--------------------------------------------------------------------------------
/scripts/mtbs/prepare.py:
--------------------------------------------------------------------------------
  1 | import rasterio
  2 | import zarr
  3 | from numcodecs.zlib import Zlib
  4 | from numpy import zeros
  5 | from rasterio import Affine
  6 | from rasterio.crs import CRS
  7 | from rasterio.warp import Resampling, reproject
  8 | 
  9 | RAW_PATH = "/Users/freeman/data/treeplan/raw/"
 10 | PROCESSED_PATH = "/Users/freeman/github/carbonplan/data/processed/"
 11 | 
 12 | 
 13 | def base_crs():
 14 |     return (
 15 |         'PROJCS["Albers_Conical_Equal_Area",'
 16 |         'GEOGCS["WGS 84",DATUM["WGS_1984",'
 17 |         'SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],'
 18 |         "TOWGS84[0,0,0,-0,-0,-0,0],"
 19 |         'AUTHORITY["EPSG","6326"]],'
 20 |         'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],'
 21 |         'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],'
 22 |         'AUTHORITY["EPSG","4326"]],'
 23 |         'PROJECTION["Albers_Conic_Equal_Area"],'
 24 |         'PARAMETER["standard_parallel_1",29.5],'
 25 |         'PARAMETER["standard_parallel_2",45.5],'
 26 |         'PARAMETER["latitude_of_center",23],'
 27 |         'PARAMETER["longitude_of_center",-96],'
 28 |         'PARAMETER["false_easting",0],'
 29 |         'PARAMETER["false_northing",0],'
 30 |         'UNIT["meters",1]]'
 31 |     )
 32 | 
 33 | 
 34 | def make_dst_band(src_band, src_resolution):
 35 |     left = -2493045.0
 36 |     right = 2342655.0
 37 |     top = 3310005.0
 38 |     bottom = 177285.0
 39 |     dst_transform = Affine(30.0, 0.0, left, 0.0, -30.0, top)
 40 |     dst_resolution = dst_transform[0]
 41 |     dst_transform = dst_transform * Affine.scale(
 42 |         src_resolution / dst_resolution, src_resolution / dst_resolution
 43 |     )
 44 |     dst_crs = CRS.from_wkt(base_crs())
 45 | 
 46 |     dst_shape = [
 47 |         round((top - bottom) / src_resolution),
 48 |         round((right - left) / src_resolution),
 49 |     ]
 50 | 
 51 |     dst_band = zeros(dst_shape, src_band.dtype)
 52 |     return dst_band, dst_transform, dst_crs, dst_shape
 53 | 
 54 | 
 55 | def prepare_mtbs(year, resolution):
 56 |     src_path = RAW_PATH + f"MTBS/{year}/mtbs_CONUS_{year}.tif"
 57 |     src_raster = rasterio.open(src_path)
 58 |     src_transform = src_raster.meta["transform"]
 59 |     src_crs = src_raster.meta["crs"]
 60 |     src_band = src_raster.read(1)
 61 |     src_resolution = resolution
 62 | 
 63 |     dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(src_band, src_resolution)
 64 | 
 65 |     if resolution == 30:
 66 |         resampling = Resampling.nearest
 67 |     elif resolution > 30:
 68 |         resampling = Resampling.average
 69 |         # set moderate or high burn severity to 1 and others to 1
 70 |         src_band_tmp = ((src_band == 3) | (src_band == 4)).astype("uint8")
 71 |         # set masked regions to nodata value
 72 |         src_band_tmp[src_band == 6] = 6
 73 |         src_band = src_band_tmp
 74 |         dst_band = dst_band.astype("float32")  # convert to float for averaging
 75 |         src_nodata = 6
 76 | 
 77 |     reproject(
 78 |         src_band,
 79 |         dst_band,
 80 |         src_transform=src_transform,
 81 |         src_crs=src_crs,
 82 |         dst_transform=dst_transform,
 83 |         dst_crs=dst_crs,
 84 |         resampling=resampling,
 85 |         src_nodata=src_nodata,
 86 |         dst_nodata=src_raster.meta["nodata"],
 87 |     )
 88 | 
 89 |     meta = src_raster.meta
 90 |     meta.update(
 91 |         width=dst_shape[0],
 92 |         height=dst_shape[1],
 93 |         dtype=str(dst_band.dtype),
 94 |         crs=dst_crs.to_wkt(),
 95 |         transform=list(dst_transform),
 96 |         nodata=src_raster.meta["nodata"],
 97 |     )
 98 | 
 99 |     store = zarr.open(PROCESSED_PATH + f"MTBS.{year}.{resolution}m.zarr", "w")
100 |     store.attrs.put(meta)
101 |     store.array("0", dst_band, chunks=(512, 512), compressor=Zlib())
102 | 
103 | 
104 | years = ["%s" % (d + 1984) for d in range(2018 - 1984)]
105 | [prepare_mtbs(year, 500) for year in years]
106 | 


--------------------------------------------------------------------------------
/scripts/nftd/00_download.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# Download NFTD\n",
 10 |     "\n",
 11 |     "_by Jeremy Freeman (CarbonPlan), August 1, 2020_\n",
 12 |     "\n",
 13 |     "This notebook downloads local copies of NFTD (National Forest Type Dataset)\n",
 14 |     "datasets for processing.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- sources.yaml\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- Local copies of NFTD data\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- No reprojection or processing of the data is done in this notebook.\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import pathlib\n",
 36 |     "import zipfile\n",
 37 |     "\n",
 38 |     "import urlpath\n",
 39 |     "import wget\n",
 40 |     "import yaml\n",
 41 |     "\n",
 42 |     "workdir = pathlib.Path(\"/Users/freeman/workdir/carbonplan_data_downloads/nftd/\")\n",
 43 |     "workdir.mkdir(parents=True, exist_ok=True)\n",
 44 |     "workdir"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "with open(\"../../sources.yaml\") as f:\n",
 54 |     "    sources = yaml.load(f, Loader=yaml.FullLoader)[\"usfs\"]"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "for key, dset in sources[\"data\"].items():\n",
 64 |     "    if \"download\" in dset[\"actions\"]:\n",
 65 |     "        for url in dset[\"urlpath\"]:\n",
 66 |     "            url = urlpath.URL(url)\n",
 67 |     "            out = workdir / url.name\n",
 68 |     "            if not out.exists():\n",
 69 |     "                print(f\"downloading {url}\")\n",
 70 |     "                wget.download(str(url), out=str(out))\n",
 71 |     "\n",
 72 |     "            if \"unzip\" in dset[\"actions\"]:\n",
 73 |     "                outdir = workdir / out.stem\n",
 74 |     "                if not outdir.exists():\n",
 75 |     "                    outdir.mkdir(parents=True)\n",
 76 |     "                    with zipfile.ZipFile(out, \"r\") as f:\n",
 77 |     "                        print(f\"extracting contents of {out}\")\n",
 78 |     "                        f.extractall(outdir)"
 79 |    ]
 80 |   }
 81 |  ],
 82 |  "metadata": {
 83 |   "kernelspec": {
 84 |    "display_name": "Python 3",
 85 |    "language": "python",
 86 |    "name": "python3"
 87 |   },
 88 |   "language_info": {
 89 |    "codemirror_mode": {
 90 |     "name": "ipython",
 91 |     "version": 3
 92 |    },
 93 |    "file_extension": ".py",
 94 |    "mimetype": "text/x-python",
 95 |    "name": "python",
 96 |    "nbconvert_exporter": "python",
 97 |    "pygments_lexer": "ipython3",
 98 |    "version": "3.7.6"
 99 |   }
100 |  },
101 |  "nbformat": 4,
102 |  "nbformat_minor": 2
103 | }
104 | 


--------------------------------------------------------------------------------
/scripts/nftd/01_nftd_to_cogs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# Convert NFTD to COGs\n",
 10 |     "\n",
 11 |     "_by Jeremy Freeman (CarbonPlan), August 1, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts local copies of NFTD datasets to cloud optimized\n",
 14 |     "GeoTIFFs.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- downloaded files from UFSF raster gateway\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- Cloud optimized GeoTIFFs\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- No reprojection or processing of the data is done in this notebook.\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import os\n",
 36 |     "import pathlib\n",
 37 |     "\n",
 38 |     "from google.cloud import storage\n",
 39 |     "from rio_cogeo.cogeo import cog_translate\n",
 40 |     "from rio_cogeo.profiles import cog_profiles"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "storage.blob._DEFAULT_CHUNKSIZE = 5 * 1024 * 1024  # 5 MB\n",
 50 |     "storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024  # 5 MB"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "dst_profile = cog_profiles.get(\"deflate\")\n",
 60 |     "os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = (\n",
 61 |     "    \"/Users/freeman/.config/gcloud/legacy_credentials/jeremy@carbonplan.org/adc.json\"\n",
 62 |     ")\n",
 63 |     "\n",
 64 |     "\n",
 65 |     "def upload(src, target, bucket=\"carbonplan-data\"):\n",
 66 |     "    storage_client = storage.Client(\"carbonplan\")\n",
 67 |     "    bucket = storage_client.bucket(bucket)\n",
 68 |     "    blob = bucket.blob(target)\n",
 69 |     "    blob.upload_from_filename(src)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "## Iterate over datasets\n"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "# conus forest group\n",
 86 |     "workdir = pathlib.Path(\"/Users/freeman/workdir/carbonplan_data_downloads/nftd/\")\n",
 87 |     "source = workdir / \"conus_forestgroup\" / \"conus_forestgroup.img\"\n",
 88 |     "target = workdir / \"conus_forestgroup\" / \"raster.tif\"\n",
 89 |     "cloud = \"raw/nftd/conus_forestgroup/30m/raster.tif\""
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "files = [\n",
 99 |     "    {\n",
100 |     "        \"source\": workdir / \"conus_forestgroup\" / \"conus_forestgroup.img\",\n",
101 |     "        \"target\": workdir / \"conus_forestgroup\" / \"raster.tif\",\n",
102 |     "        \"cloud\": \"raw/nftd/conus_forestgroup/250m/raster.tif\",\n",
103 |     "    },\n",
104 |     "    {\n",
105 |     "        \"source\": workdir / \"conus_forestgroup\" / \"conus_forestgroup_error.img\",\n",
106 |     "        \"target\": workdir / \"conus_forestgroup\" / \"error.tif\",\n",
107 |     "        \"cloud\": \"raw/nftd/conus_forestgroup/250m/error.tif\",\n",
108 |     "    },\n",
109 |     "    {\n",
110 |     "        \"source\": workdir / \"conus_forest-type\" / \"conus_foresttype.img\",\n",
111 |     "        \"target\": workdir / \"conus_forest-type\" / \"raster.tif\",\n",
112 |     "        \"cloud\": \"raw/nftd/conus_foresttype/250m/raster.tif\",\n",
113 |     "    },\n",
114 |     "    {\n",
115 |     "        \"source\": workdir / \"conus_forest-type\" / \"conus_foresttype_error.img\",\n",
116 |     "        \"target\": workdir / \"conus_forest-type\" / \"error.tif\",\n",
117 |     "        \"cloud\": \"raw/nftd/conus_foresttype/250m/error.tif\",\n",
118 |     "    },\n",
119 |     "    {\n",
120 |     "        \"source\": workdir / \"ak_forestgroup\" / \"alaska_forestgroup_63360.img\",\n",
121 |     "        \"target\": workdir / \"ak_forestgroup\" / \"raster.tif\",\n",
122 |     "        \"cloud\": \"raw/nftd/ak_forestgroup/250m/raster.tif\",\n",
123 |     "    },\n",
124 |     "    {\n",
125 |     "        \"source\": workdir / \"ak_forestgroup\" / \"alaska_forestgroup_confidence_63360.img\",\n",
126 |     "        \"target\": workdir / \"ak_forestgroup\" / \"error.tif\",\n",
127 |     "        \"cloud\": \"raw/nftd/ak_forestgroup/250m/error.tif\",\n",
128 |     "    },\n",
129 |     "    {\n",
130 |     "        \"source\": workdir / \"ak_forest-type\" / \"alaska_foresttype_63360.img\",\n",
131 |     "        \"target\": workdir / \"ak_forest-type\" / \"raster.tif\",\n",
132 |     "        \"cloud\": \"raw/nftd/ak_foresttype/250m/raster.tif\",\n",
133 |     "    },\n",
134 |     "    {\n",
135 |     "        \"source\": workdir / \"ak_forest-type\" / \"alaska_foresttype_confidence_63360.img\",\n",
136 |     "        \"target\": workdir / \"ak_forest-type\" / \"error.tif\",\n",
137 |     "        \"cloud\": \"raw/nftd/ak_foresttype/250m/error.tif\",\n",
138 |     "    },\n",
139 |     "]"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "# convert to cogs\n",
149 |     "[cog_translate(f[\"source\"], f[\"target\"], dst_profile) for f in files]"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "# upload to cloud storage\n",
159 |     "[upload(f[\"target\"], f[\"cloud\"]) for f in files]"
160 |    ]
161 |   }
162 |  ],
163 |  "metadata": {
164 |   "kernelspec": {
165 |    "display_name": "Python 3",
166 |    "language": "python",
167 |    "name": "python3"
168 |   },
169 |   "language_info": {
170 |    "codemirror_mode": {
171 |     "name": "ipython",
172 |     "version": 3
173 |    },
174 |    "file_extension": ".py",
175 |    "mimetype": "text/x-python",
176 |    "name": "python",
177 |    "nbconvert_exporter": "python",
178 |    "pygments_lexer": "ipython3",
179 |    "version": "3.8.2"
180 |   }
181 |  },
182 |  "nbformat": 4,
183 |  "nbformat_minor": 4
184 | }
185 | 


--------------------------------------------------------------------------------
/scripts/nftd/02_downsampling_and_reprojection.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# NFTD downsampling and reprojection\n",
 10 |     "\n",
 11 |     "_by Jeremy Freeman (CarbonPlan), August 2, 2020_\n",
 12 |     "\n",
 13 |     "This notebook downsamples and reprojects NFTD 250m yearly rasters stored in\n",
 14 |     "Cloud Optimized GeoTIFF into 250m and 4000m GeoTIFFs.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- COG outputs from `01_nftd_to_cogs.ipynb`\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- COG outputs after downsampling and reprojection\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- Source CRS and projection extent come from NLCD\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import os"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "from carbonplan_data.utils import projections, setup\n",
 45 |     "\n",
 46 |     "workdir, upload = setup(\"jeremy\")"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "import rasterio\n",
 56 |     "from rio_cogeo.cogeo import cog_translate\n",
 57 |     "from rio_cogeo.profiles import cog_profiles\n",
 58 |     "\n",
 59 |     "dst_profile = cog_profiles.get(\"deflate\")"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "def get_files(region, resolution):\n",
 69 |     "    return [\n",
 70 |     "        {\n",
 71 |     "            \"source\": workdir / f\"raw/nftd/{region}_foresttype/250m/raster.tif\",\n",
 72 |     "            \"target\": f\"processed/nftd/{region}/{resolution}m/type.tif\",\n",
 73 |     "        },\n",
 74 |     "        {\n",
 75 |     "            \"source\": workdir / f\"raw/nftd/{region}_forestgroup/250m/raster.tif\",\n",
 76 |     "            \"target\": f\"processed/nftd/{region}/{resolution}m/group.tif\",\n",
 77 |     "        },\n",
 78 |     "        {\n",
 79 |     "            \"source\": workdir / f\"raw/nftd/{region}_foresttype/250m/error.tif\",\n",
 80 |     "            \"target\": f\"processed/nftd/{region}/{resolution}m/type_error.tif\",\n",
 81 |     "        },\n",
 82 |     "        {\n",
 83 |     "            \"source\": workdir / f\"raw/nftd/{region}_forestgroup/250m/error.tif\",\n",
 84 |     "            \"target\": f\"processed/nftd/{region}/{resolution}m/group_error.tif\",\n",
 85 |     "        },\n",
 86 |     "    ]"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "for resolution in [250, 4000]:\n",
 96 |     "    for region in [\"ak\", \"conus\"]:\n",
 97 |     "        files = get_files(region, resolution)\n",
 98 |     "        crs, extent = projections(\"albers\", region)\n",
 99 |     "        for f in files:\n",
100 |     "            if \"error\" in str(f[\"source\"]):\n",
101 |     "                resampling = \"average\"\n",
102 |     "            elif resolution == 4000:\n",
103 |     "                resampling = \"mode\"\n",
104 |     "            else:\n",
105 |     "                resampling = \"near\"\n",
106 |     "            cmd = (\"gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} {} {}\").format(\n",
107 |     "                crs,\n",
108 |     "                extent,\n",
109 |     "                resolution,\n",
110 |     "                resolution,\n",
111 |     "                resampling,\n",
112 |     "                f[\"source\"],\n",
113 |     "                \"./raster.tif\",\n",
114 |     "            )\n",
115 |     "            os.system(cmd)\n",
116 |     "            cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n",
117 |     "            upload(\"./raster.tif\", f[\"target\"])\n",
118 |     "            os.remove(\"./raster.tif\")"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "### downsample using thresholding\n"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "groups = [\n",
135 |     "    100,\n",
136 |     "    120,\n",
137 |     "    140,\n",
138 |     "    160,\n",
139 |     "    180,\n",
140 |     "    200,\n",
141 |     "    220,\n",
142 |     "    240,\n",
143 |     "    260,\n",
144 |     "    280,\n",
145 |     "    300,\n",
146 |     "    320,\n",
147 |     "    340,\n",
148 |     "    360,\n",
149 |     "    370,\n",
150 |     "    380,\n",
151 |     "    400,\n",
152 |     "    500,\n",
153 |     "    600,\n",
154 |     "    700,\n",
155 |     "    800,\n",
156 |     "    900,\n",
157 |     "    910,\n",
158 |     "    920,\n",
159 |     "    940,\n",
160 |     "    950,\n",
161 |     "    980,\n",
162 |     "    990,\n",
163 |     "]"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "for resolution in [4000]:\n",
173 |     "    for region in [\"ak\", \"conus\"]:\n",
174 |     "        src = rasterio.open(workdir / f\"processed/nftd/{region}/250m/group.tif\")\n",
175 |     "        band = src.read(1)\n",
176 |     "        profile = src.profile\n",
177 |     "        profile[\"dtype\"] = \"uint8\"\n",
178 |     "\n",
179 |     "        for group in groups:\n",
180 |     "            print(f\"region {region} cat {group}\")\n",
181 |     "            crs, extent = projections(\"albers\", region)\n",
182 |     "            out = (band == group).astype(rasterio.uint8)\n",
183 |     "            resampling = \"average\"\n",
184 |     "\n",
185 |     "            with rasterio.open(\"./thresholded.tif\", \"w\", **profile) as dst:\n",
186 |     "                dst.write(out, 1)\n",
187 |     "\n",
188 |     "            cmd = (\"gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} -ot Float32 {} {}\").format(\n",
189 |     "                crs,\n",
190 |     "                extent,\n",
191 |     "                resolution,\n",
192 |     "                resolution,\n",
193 |     "                resampling,\n",
194 |     "                \"./thresholded.tif\",\n",
195 |     "                \"./raster.tif\",\n",
196 |     "            )\n",
197 |     "\n",
198 |     "            os.system(cmd)\n",
199 |     "            cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n",
200 |     "            upload(\n",
201 |     "                \"./raster.tif\",\n",
202 |     "                f\"processed/nftd/{region}/{resolution}m/group_g{group}.tif\",\n",
203 |     "            )\n",
204 |     "            os.remove(\"./thresholded.tif\")\n",
205 |     "            os.remove(\"./raster.tif\")"
206 |    ]
207 |   }
208 |  ],
209 |  "metadata": {
210 |   "kernelspec": {
211 |    "display_name": "Python 3",
212 |    "language": "python",
213 |    "name": "python3"
214 |   },
215 |   "language_info": {
216 |    "codemirror_mode": {
217 |     "name": "ipython",
218 |     "version": 3
219 |    },
220 |    "file_extension": ".py",
221 |    "mimetype": "text/x-python",
222 |    "name": "python",
223 |    "nbconvert_exporter": "python",
224 |    "pygments_lexer": "ipython3",
225 |    "version": "3.7.8"
226 |   }
227 |  },
228 |  "nbformat": 4,
229 |  "nbformat_minor": 4
230 | }
231 | 


--------------------------------------------------------------------------------
/scripts/nlcd/00_download.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# Download NLCD\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), June 29, 2020_\n",
 12 |     "\n",
 13 |     "This notebook downloads local copies of MLRC datasets for processing.\n",
 14 |     "\n",
 15 |     "**Inputs:**\n",
 16 |     "\n",
 17 |     "- sources.yaml\n",
 18 |     "\n",
 19 |     "**Outputs:**\n",
 20 |     "\n",
 21 |     "- Local copies of MLRC data\n",
 22 |     "\n",
 23 |     "**Notes:**\n",
 24 |     "\n",
 25 |     "- No reprojection or processing of the data is done in this notebook.\n"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import pathlib\n",
 35 |     "import zipfile\n",
 36 |     "\n",
 37 |     "import urlpath\n",
 38 |     "import wget\n",
 39 |     "import yaml\n",
 40 |     "\n",
 41 |     "workdir = pathlib.Path(\"/Users/jhamman/workdir/carbonplan_data_downloads/mlrc/\")\n",
 42 |     "workdir.mkdir(parents=True, exist_ok=True)\n",
 43 |     "workdir"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "with open(\"../../sources.yaml\") as f:\n",
 53 |     "    sources = yaml.load(f, Loader=yaml.FullLoader)[\"mlrc\"]"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "sources"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "for key, dset in sources[\"data\"].items():\n",
 72 |     "    if \"download\" in dset[\"actions\"]:\n",
 73 |     "        for url in dset[\"urlpath\"]:\n",
 74 |     "            url = urlpath.URL(url)\n",
 75 |     "            out = workdir / url.name\n",
 76 |     "            if not out.exists():\n",
 77 |     "                print(f\"downloading {url}\")\n",
 78 |     "                wget.download(str(url), out=str(out))\n",
 79 |     "\n",
 80 |     "            if \"unzip\" in dset[\"actions\"]:\n",
 81 |     "                outdir = workdir / out.stem\n",
 82 |     "                if not outdir.exists():\n",
 83 |     "                    outdir.mkdir(parents=True)\n",
 84 |     "                    with zipfile.ZipFile(out, \"r\") as f:\n",
 85 |     "                        print(f\"extracting contents of {out}\")\n",
 86 |     "                        f.extractall(outdir)"
 87 |    ]
 88 |   }
 89 |  ],
 90 |  "metadata": {
 91 |   "kernelspec": {
 92 |    "display_name": "Python 3",
 93 |    "language": "python",
 94 |    "name": "python3"
 95 |   },
 96 |   "language_info": {
 97 |    "codemirror_mode": {
 98 |     "name": "ipython",
 99 |     "version": 3
100 |    },
101 |    "file_extension": ".py",
102 |    "mimetype": "text/x-python",
103 |    "name": "python",
104 |    "nbconvert_exporter": "python",
105 |    "pygments_lexer": "ipython3",
106 |    "version": "3.7.8"
107 |   }
108 |  },
109 |  "nbformat": 4,
110 |  "nbformat_minor": 4
111 | }
112 | 


--------------------------------------------------------------------------------
/scripts/nlcd/01_nlcd_to_cogs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# Convert NLCD to COG\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), June 29, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts local copies of NLDC rasters to COG\n",
 14 |     "\n",
 15 |     "**Inputs:**\n",
 16 |     "\n",
 17 |     "- local copies of NLCD data\n",
 18 |     "\n",
 19 |     "**Outputs:**\n",
 20 |     "\n",
 21 |     "- COGs published to cloud storage\n",
 22 |     "\n",
 23 |     "**Notes:**\n",
 24 |     "\n",
 25 |     "- No reprojection or processing of the data is done in this notebook.\n",
 26 |     "- Includes both conus (L48) and alaska (AK)\n",
 27 |     "- Paths here need to be harmonized as it was run in two different local\n",
 28 |     "  environments\n"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import os\n",
 38 |     "import pathlib\n",
 39 |     "\n",
 40 |     "from google.cloud import storage\n",
 41 |     "from rio_cogeo.cogeo import cog_translate\n",
 42 |     "from rio_cogeo.profiles import cog_profiles\n",
 43 |     "\n",
 44 |     "storage.blob._DEFAULT_CHUNKSIZE = 5 * 1024 * 1024  # 5 MB\n",
 45 |     "storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024  # 5 MB"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# This is the COG profile:\n",
 55 |     "dst_profile = cog_profiles.get(\"deflate\")\n",
 56 |     "os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = (\n",
 57 |     "    \"/Users/freeman/.config/gcloud/legacy_credentials/jeremy@carbonplan.org/adc.json\"\n",
 58 |     ")\n",
 59 |     "\n",
 60 |     "\n",
 61 |     "def upload(src, target, bucket=\"carbonplan-data\"):\n",
 62 |     "    storage_client = storage.Client(\"carbonplan\")\n",
 63 |     "    bucket = storage_client.bucket(bucket)\n",
 64 |     "    blob = bucket.blob(target)\n",
 65 |     "    blob.upload_from_filename(src)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "## NLCD_Land_Cover_Change_Index_L48_20190424\n"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "# raw file\n",
 82 |     "source = \"/Users/jhamman/workdir/carbonplan_data_downloads/mlrc/NLCD_Land_Cover_Change_Index_L48_20190424/NLCD_Land_Cover_Change_Index_L48_20190424.img\"\n",
 83 |     "\n",
 84 |     "# local target\n",
 85 |     "target = \"./raster.tif\"\n",
 86 |     "\n",
 87 |     "# This is where we'll write the COGs when we're done\n",
 88 |     "cloud_target = \"raw/mlrc/NLCD_Land_Cover_Change_Index_L48_20190424/30m/raster.tif\""
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "# translate to COG\n",
 98 |     "cog_translate(source, target, dst_profile)\n",
 99 |     "\n",
100 |     "# Upload to GCS\n",
101 |     "upload(target, cloud_target)\n",
102 |     "\n",
103 |     "# Remove temporary file\n",
104 |     "os.remove(target)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "## NLCD_Land_Cover_L48_20190424_full_zip\n"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "target = \"./raster.tif\"\n",
121 |     "\n",
122 |     "for year in [2001, 2004, 2006, 2008, 2011, 2013, 2016]:\n",
123 |     "    print(year)\n",
124 |     "    source = f\"/Users/jhamman/workdir/carbonplan_data_downloads/mlrc/NLCD_Land_Cover_L48_20190424_full_zip/NLCD_{year}_Land_Cover_L48_20190424.img\"\n",
125 |     "    cloud_target = f\"raw/mlrc/NLCD_Land_Cover_L48_20190424_full/30m/{year}.tif\"\n",
126 |     "\n",
127 |     "    # translate to COG\n",
128 |     "    cog_translate(source, target, dst_profile)\n",
129 |     "\n",
130 |     "    # Upload to GCS\n",
131 |     "    upload(target, cloud_target)\n",
132 |     "\n",
133 |     "    # Remove temporary file\n",
134 |     "    os.remove(target)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "## NLCD_Land_Cover_AK_20200213\n"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "workdir = pathlib.Path(\"/Users/freeman/workdir/carbonplan_data_downloads/nlcd/\")\n",
151 |     "\n",
152 |     "target = \"./raster.tif\"\n",
153 |     "\n",
154 |     "for year in [2016]:\n",
155 |     "    print(year)\n",
156 |     "    source = workdir / f\"NLCD_{year}_Land_Cover_AK_20200213/NLCD_{year}_Land_Cover_AK_20200213.img\"\n",
157 |     "    cloud_target = f\"raw/nlcd/NLCD_Land_Cover_AK_20200213/30m/{year}.tif\"\n",
158 |     "\n",
159 |     "    # translate to COG\n",
160 |     "    cog_translate(source, target, dst_profile)\n",
161 |     "\n",
162 |     "    # Upload to GCS\n",
163 |     "    upload(target, cloud_target)\n",
164 |     "\n",
165 |     "    # Remove temporary file\n",
166 |     "    os.remove(target)"
167 |    ]
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "Python 3",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.7.8"
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 4
191 | }
192 | 


--------------------------------------------------------------------------------
/scripts/nlcd/README.md:
--------------------------------------------------------------------------------
1 | # National Land Cover Database (NLCD)
2 | 


--------------------------------------------------------------------------------
/scripts/prism/00_download.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import pathlib\n",
10 |     "import zipfile\n",
11 |     "\n",
12 |     "import wget\n",
13 |     "\n",
14 |     "from carbonplan_data.utils import setup\n",
15 |     "\n",
16 |     "workdir, upload = setup(\"joe\")"
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "code",
21 |    "execution_count": null,
22 |    "metadata": {},
23 |    "outputs": [],
24 |    "source": [
25 |     "files = [\n",
26 |     "    (\n",
27 |     "        \"http://services.nacse.org/prism/data/public/normals/800m/ppt/14\",\n",
28 |     "        f\"{workdir}/ppt_normal.zip\",\n",
29 |     "    ),\n",
30 |     "    (\n",
31 |     "        \"http://services.nacse.org/prism/data/public/normals/800m/tmean/14\",\n",
32 |     "        f\"{workdir}/tmean_normal.zip\",\n",
33 |     "    ),\n",
34 |     "    (\n",
35 |     "        \"https://prism.oregonstate.edu/projects/public/alaska/grids/tmean/PRISM_tmean_ak_30yr_normal_800mM1_annual_asc.zip\",\n",
36 |     "        f\"{workdir}/PRISM_tmean_ak_30yr_normal_800mM1_annual_asc.zip\",\n",
37 |     "    ),\n",
38 |     "    (\n",
39 |     "        \"https://prism.oregonstate.edu/projects/public/alaska/grids/ppt/PRISM_ppt_ak_30yr_normal_800mM1_annual_asc.zip\",\n",
40 |     "        f\"{workdir}/PRISM_ppt_ak_30yr_normal_800mM1_annual_asc.zip\",\n",
41 |     "    ),\n",
42 |     "]\n",
43 |     "\n",
44 |     "for src, dst in files:\n",
45 |     "    dst = pathlib.Path(dst)\n",
46 |     "    if not dst.exists:\n",
47 |     "        wget.download(src, out=dst)\n",
48 |     "\n",
49 |     "    outdir = workdir / dst.stem\n",
50 |     "    with zipfile.ZipFile(dst, \"r\") as f:\n",
51 |     "        #         print(f\"extracting contents of {dst}\")\n",
52 |     "        print(outdir)\n",
53 |     "        f.extractall(outdir)"
54 |    ]
55 |   }
56 |  ],
57 |  "metadata": {
58 |   "kernelspec": {
59 |    "display_name": "Python 3",
60 |    "language": "python",
61 |    "name": "python3"
62 |   },
63 |   "language_info": {
64 |    "codemirror_mode": {
65 |     "name": "ipython",
66 |     "version": 3
67 |    },
68 |    "file_extension": ".py",
69 |    "mimetype": "text/x-python",
70 |    "name": "python",
71 |    "nbconvert_exporter": "python",
72 |    "pygments_lexer": "ipython3",
73 |    "version": "3.9.2"
74 |   }
75 |  },
76 |  "nbformat": 4,
77 |  "nbformat_minor": 4
78 | }
79 | 


--------------------------------------------------------------------------------
/scripts/prism/01_prism_to_cogs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# Convert PRISM to COG\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), February 16, 2021_\n",
 12 |     "\n",
 13 |     "This notebook converts local copies of PRISM normal rasters to COG\n",
 14 |     "\n",
 15 |     "**Inputs:**\n",
 16 |     "\n",
 17 |     "- local copies of PRISM data\n",
 18 |     "\n",
 19 |     "**Outputs:**\n",
 20 |     "\n",
 21 |     "- COGs published to cloud storage\n",
 22 |     "\n",
 23 |     "**Notes:**\n"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import os\n",
 33 |     "\n",
 34 |     "import xarray as xr\n",
 35 |     "from google.cloud import storage\n",
 36 |     "from rio_cogeo.cogeo import cog_translate\n",
 37 |     "from rio_cogeo.profiles import cog_profiles\n",
 38 |     "\n",
 39 |     "from carbonplan_data.utils import setup\n",
 40 |     "\n",
 41 |     "storage.blob._DEFAULT_CHUNKSIZE = 5 * 1024 * 1024  # 5 MB\n",
 42 |     "storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024  # 5 MB"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "# This is the COG profile:\n",
 52 |     "dst_profile = cog_profiles.get(\"deflate\")\n",
 53 |     "\n",
 54 |     "workdir, upload = setup(\"joe\")"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## PRISM normals (CONUS and AK)\n"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "sources = [\n",
 71 |     "    \"/Users/jhamman/workdir/carbonplan_data_downloads/PRISM_tmean_ak_30yr_normal_800mM1_annual_asc/ak_tmean_1981_2010.14.txt\",\n",
 72 |     "    \"/Users/jhamman/workdir/carbonplan_data_downloads/PRISM_ppt_ak_30yr_normal_800mM1_annual_asc/ak_ppt_1981_2010.14.txt\",\n",
 73 |     "]\n",
 74 |     "\n",
 75 |     "crs = \"+proj=latlong +ellps=GRS80 +datum=NAD83\"\n",
 76 |     "\n",
 77 |     "for source in sources:\n",
 78 |     "    target = source.replace(\"txt\", \"tif\")\n",
 79 |     "    da = xr.open_rasterio(source)\n",
 80 |     "    da = da.rio.set_crs(crs)\n",
 81 |     "    da.rio.to_raster(target)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "sources = [\n",
 91 |     "    \"/Users/jhamman/workdir/carbonplan_data_downloads/ppt_normal/PRISM_ppt_30yr_normal_800mM2_annual_bil.bil\",\n",
 92 |     "    \"/Users/jhamman/workdir/carbonplan_data_downloads/tmean_normal/PRISM_tmean_30yr_normal_800mM2_annual_bil.bil\",\n",
 93 |     "    \"/Users/jhamman/workdir/carbonplan_data_downloads/PRISM_tmean_ak_30yr_normal_800mM1_annual_asc/ak_tmean_1981_2010.14.tif\",\n",
 94 |     "    \"/Users/jhamman/workdir/carbonplan_data_downloads/PRISM_ppt_ak_30yr_normal_800mM1_annual_asc/ak_ppt_1981_2010.14.tif\",\n",
 95 |     "]\n",
 96 |     "\n",
 97 |     "# local target\n",
 98 |     "target = \"./raster.tif\"\n",
 99 |     "\n",
100 |     "for source in sources:\n",
101 |     "    for var in [\"ppt\", \"tmean\"]:\n",
102 |     "        if var in source:\n",
103 |     "            break\n",
104 |     "\n",
105 |     "    if \"ak\" in source:\n",
106 |     "        region = \"ak\"\n",
107 |     "    else:\n",
108 |     "        region = \"conus\"\n",
109 |     "\n",
110 |     "    # This is where we'll write the COGs when we're done\n",
111 |     "    cloud_target = f\"raw/prism/normals/{region}/800m/{var}.tif\"\n",
112 |     "\n",
113 |     "    # translate to COG\n",
114 |     "    cog_translate(source, target, dst_profile)\n",
115 |     "\n",
116 |     "    # Upload to GCS\n",
117 |     "    upload(target, cloud_target)\n",
118 |     "\n",
119 |     "    # Remove temporary file\n",
120 |     "    os.remove(target)"
121 |    ]
122 |   }
123 |  ],
124 |  "metadata": {
125 |   "kernelspec": {
126 |    "display_name": "Python 3",
127 |    "language": "python",
128 |    "name": "python3"
129 |   },
130 |   "language_info": {
131 |    "codemirror_mode": {
132 |     "name": "ipython",
133 |     "version": 3
134 |    },
135 |    "file_extension": ".py",
136 |    "mimetype": "text/x-python",
137 |    "name": "python",
138 |    "nbconvert_exporter": "python",
139 |    "pygments_lexer": "ipython3",
140 |    "version": "3.9.2"
141 |   }
142 |  },
143 |  "nbformat": 4,
144 |  "nbformat_minor": 4
145 | }
146 | 


--------------------------------------------------------------------------------
/scripts/prism/02_downsample_and_reproject.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# PRISM downsampling and reprojection\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), February 16, 2021_\n",
 12 |     "\n",
 13 |     "This notebook downsamples and reprojects PRISM 300m climate normal rasters\n",
 14 |     "stored in Cloud Optimized GeoTIFF into 4000m GeoTIFFs.\n",
 15 |     "\n",
 16 |     "**Inputs:**\n",
 17 |     "\n",
 18 |     "- COG outputs from `01_prism_to_cogs.ipynb`\n",
 19 |     "\n",
 20 |     "**Outputs:**\n",
 21 |     "\n",
 22 |     "- COG outputs after downsampling and reprojection\n",
 23 |     "\n",
 24 |     "**Notes:**\n",
 25 |     "\n",
 26 |     "- Source CRS and projection extent come from NLCD\n"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import os\n",
 36 |     "\n",
 37 |     "import fsspec\n",
 38 |     "from rio_cogeo.cogeo import cog_translate\n",
 39 |     "from rio_cogeo.profiles import cog_profiles\n",
 40 |     "\n",
 41 |     "from carbonplan_data.utils import projections, setup\n",
 42 |     "\n",
 43 |     "dst_profile = cog_profiles.get(\"deflate\")\n",
 44 |     "\n",
 45 |     "workdir, upload = setup(\"joe\")"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "fs = fsspec.get_filesystem_class(\"gs\")()"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "resampling = \"average\"\n",
 64 |     "resolution = 4000\n",
 65 |     "\n",
 66 |     "for region in [\"conus\", \"ak\"]:\n",
 67 |     "    crs, extent = projections(\"albers\", region)\n",
 68 |     "\n",
 69 |     "    for var in [\"ppt\", \"tmean\"]:\n",
 70 |     "        print(region, var)\n",
 71 |     "\n",
 72 |     "        source = f\"carbonplan-data/raw/prism/normals/{region}/800m/{var}.tif\"\n",
 73 |     "        cloud_target = f\"processed/prism/normals/{region}/{resolution}m/{var}.tif\"\n",
 74 |     "\n",
 75 |     "        local_source = \"./local_source.tif\"\n",
 76 |     "        fs.get_file(source, local_source)\n",
 77 |     "\n",
 78 |     "        cmd = (\n",
 79 |     "            \"gdalwarp\",\n",
 80 |     "            \"-t_srs\",\n",
 81 |     "            f\"'{crs}'\",\n",
 82 |     "            \"-te\",\n",
 83 |     "            extent,\n",
 84 |     "            \"-tr\",\n",
 85 |     "            f\"{resolution} {resolution}\",\n",
 86 |     "            \"-r\",\n",
 87 |     "            resampling,\n",
 88 |     "            local_source,\n",
 89 |     "            \"./raster.tif\",\n",
 90 |     "        )\n",
 91 |     "        print(\" \".join(cmd))\n",
 92 |     "        os.system(\" \".join(cmd))\n",
 93 |     "\n",
 94 |     "        cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n",
 95 |     "        upload(\"./raster.tif\", cloud_target)\n",
 96 |     "\n",
 97 |     "        os.remove(\n",
 98 |     "            \"raster.tif\",\n",
 99 |     "        )\n",
100 |     "        os.remove(\n",
101 |     "            \"local_source.tif\",\n",
102 |     "        )"
103 |    ]
104 |   }
105 |  ],
106 |  "metadata": {
107 |   "kernelspec": {
108 |    "display_name": "Python 3",
109 |    "language": "python",
110 |    "name": "python3"
111 |   },
112 |   "language_info": {
113 |    "codemirror_mode": {
114 |     "name": "ipython",
115 |     "version": 3
116 |    },
117 |    "file_extension": ".py",
118 |    "mimetype": "text/x-python",
119 |    "name": "python",
120 |    "nbconvert_exporter": "python",
121 |    "pygments_lexer": "ipython3",
122 |    "version": "3.9.2"
123 |   }
124 |  },
125 |  "nbformat": 4,
126 |  "nbformat_minor": 4
127 | }
128 | 


--------------------------------------------------------------------------------
/scripts/statsgo/01_raw_to_tif.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import rasterio
 3 | 
 4 | grid = "mu_grid.e00"
 5 | var = "awc"
 6 | band = "awc.bsq"
 7 | 
 8 | src_raster = rasterio.open(grid)
 9 | profile = src_raster.profile
10 | src_resolution = 1000
11 | 
12 | tmp_band = src_raster.read(1)
13 | dtype = "uint16" if var == "bd" else "uint8"
14 | f_band = np.fromfile(band, dtype=dtype)
15 | w = src_raster.meta["width"]
16 | h = src_raster.meta["height"]
17 | src_band = f_band.reshape(int(f_band.shape[0] / (w * h)), h, w)
18 | 
19 | if var == "hsgpct":
20 |     src_band = np.argmax(src_band, axis=0).astype(dtype)
21 | 
22 | if len(src_band.shape) == 3:
23 |     src_band = src_band[0]
24 | 
25 | src_band[tmp_band == -2147483647] = 255
26 | 
27 | profile.update(dtype=rasterio.uint8, driver="GTiff", nodata=255)
28 | 
29 | with rasterio.open("awc.tif", "w", **profile) as dst:
30 |     dst.write(src_band.astype(rasterio.uint8), 1)
31 | 


--------------------------------------------------------------------------------
/scripts/terraclimate/01_terraclimate_aux_fileds_to_zarr.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# TERRACLIMATE Aux fields to Zarr\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), June 29, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts the raw TERAACLIMATE auxiliary fields to Zarr format.\n",
 14 |     "\n",
 15 |     "**Inputs:**\n",
 16 |     "\n",
 17 |     "**Outputs:**\n",
 18 |     "\n",
 19 |     "- Cloud copy of TERRACLIMATE Aux fields\n",
 20 |     "\n",
 21 |     "**Notes:**\n",
 22 |     "\n",
 23 |     "- No reprojection or processing of the data is done in this notebook.\n"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import fsspec\n",
 33 |     "import xarray as xr\n",
 34 |     "import zarr"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# options\n",
 44 |     "name = \"terraclimate\"\n",
 45 |     "target_location = f\"gs://carbonplan-data/raw/{name}/4000m/raster.zarr\"\n",
 46 |     "\n",
 47 |     "\n",
 48 |     "awc_uri = \"https://github.com/abatz/WATERBALANCE/raw/master/terraclimate_awc.nc\"\n",
 49 |     "dem_uri = \"http://thredds.northwestknowledge.net:8080/thredds/dodsC/TERRACLIMATE_ALL/layers/terraclim_dem.nc\""
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "with fsspec.open(awc_uri).open() as f:\n",
 59 |     "    awc = xr.open_dataset(f).load()"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "dem = xr.open_dataset(dem_uri).load()"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "mapper = fsspec.get_mapper(target_location)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {},
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "ds_append = xr.merge([dem, awc]).chunk({\"lat\": 1440, \"lon\": 1440})"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "ds_append.to_zarr(mapper, mode=\"a\")"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {},
102 |    "outputs": [],
103 |    "source": [
104 |     "zarr.consolidate_metadata(mapper)"
105 |    ]
106 |   }
107 |  ],
108 |  "metadata": {
109 |   "kernelspec": {
110 |    "display_name": "Python 3",
111 |    "language": "python",
112 |    "name": "python3"
113 |   },
114 |   "language_info": {
115 |    "codemirror_mode": {
116 |     "name": "ipython",
117 |     "version": 3
118 |    },
119 |    "file_extension": ".py",
120 |    "mimetype": "text/x-python",
121 |    "name": "python",
122 |    "nbconvert_exporter": "python",
123 |    "pygments_lexer": "ipython3",
124 |    "version": "3.7.8"
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 4
129 | }
130 | 


--------------------------------------------------------------------------------
/scripts/terraclimate/02_terraclimate_regrid.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<img width=\"50\" src=\"https://carbonplan-assets.s3.amazonaws.com/monogram/dark-small.png\" style=\"margin-left:0px;margin-top:20px\"/>\n",
  8 |     "\n",
  9 |     "# Regridding TERRACLIMATE with xesmf\n",
 10 |     "\n",
 11 |     "_by Joe Hamman (CarbonPlan), June 29, 2020_\n",
 12 |     "\n",
 13 |     "This notebook converts the raw TERAACLIMATE dataset to Zarr format.\n",
 14 |     "\n",
 15 |     "**Inputs:**\n",
 16 |     "\n",
 17 |     "**Outputs:**\n",
 18 |     "\n",
 19 |     "- Cloud copy of TERRACLIMATE\n",
 20 |     "\n",
 21 |     "**Notes:**\n",
 22 |     "\n",
 23 |     "- No reprojection or processing of the data is done in this notebook.\n"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "%pip install -U xarray==0.16.0 --no-deps"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "import fsspec\n",
 42 |     "import numpy as np\n",
 43 |     "import xarray as xr\n",
 44 |     "import xesmf as xe\n",
 45 |     "from dask.diagnostics import ProgressBar"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "variables = {\n",
 55 |     "    #     'conservative': [\n",
 56 |     "    #         \"aet\",\n",
 57 |     "    #         \"def\",\n",
 58 |     "    #         \"pet\",\n",
 59 |     "    #         \"ppt\",\n",
 60 |     "    #         \"q\",\n",
 61 |     "    #         \"srad\",\n",
 62 |     "    #     ],\n",
 63 |     "    \"bilinear\": [\n",
 64 |     "        \"tmax\",\n",
 65 |     "        \"tmin\",\n",
 66 |     "        \"pdsi\",\n",
 67 |     "        \"vap\",\n",
 68 |     "        \"vpd\",\n",
 69 |     "        \"ws\",\n",
 70 |     "        \"soil\",\n",
 71 |     "        \"swe\",\n",
 72 |     "        # move to conservative after scrable is fixed\n",
 73 |     "        \"aet\",\n",
 74 |     "        \"def\",\n",
 75 |     "        \"pet\",\n",
 76 |     "        \"ppt\",\n",
 77 |     "        \"q\",\n",
 78 |     "        \"srad\",\n",
 79 |     "        \"awc\",\n",
 80 |     "        \"elevation\",\n",
 81 |     "    ]\n",
 82 |     "}"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "# options\n",
 92 |     "name = \"terraclimate\"\n",
 93 |     "raw_location = \"gs://carbonplan-data/raw/terraclimate/4000m/raster.zarr\"\n",
 94 |     "target_grid = \"gs://carbonplan-data/processed/grids/conus/4000m/domain.zarr\"\n",
 95 |     "# getting weird errors when writing to carbonplan-data\n",
 96 |     "target_location = f\"gs://carbonplan-data/processed/{name}/conus/4000m/raster.zarr\""
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "mapper = fsspec.get_mapper(target_grid)\n",
106 |     "target_ds = xr.open_zarr(mapper, consolidated=True)  # .rename({'xc': 'lon', 'yc': 'lat'})\n",
107 |     "target_ds"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "mapper = fsspec.get_mapper(raw_location)\n",
117 |     "ds = xr.open_zarr(mapper, consolidated=True)\n",
118 |     "ds"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "step = 360 / 8640 + 1e-9\n",
128 |     "global_grid = xe.util.grid_global(step, step)\n",
129 |     "global_grid = global_grid.isel(y=slice(None, None, -1)).isel(y_b=slice(None, None, -1))\n",
130 |     "global_grid[\"lat_b\"].values = np.clip(global_grid[\"lat_b\"].values, -90, 90)\n",
131 |     "display(global_grid)\n",
132 |     "\n",
133 |     "# check that this grid is a drop in replacement for the source grid\n",
134 |     "assert np.abs(global_grid.lat.isel(x=0).values - ds.lat.values).max() < 1e-5\n",
135 |     "assert np.abs(global_grid.lon.isel(y=0).values - ds.lon.values).max() < 1e-5\n",
136 |     "assert np.abs(global_grid.lat).max().item() <= 90\n",
137 |     "assert np.abs(global_grid.lat_b).max().item() <= 90\n",
138 |     "\n",
139 |     "# rename grid variables\n",
140 |     "source_ds = ds.rename({\"lon\": \"x\", \"lat\": \"y\"}).assign_coords(coords=global_grid.coords)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "regridders = {}\n",
150 |     "\n",
151 |     "for method in variables:\n",
152 |     "    regridders[method] = xe.Regridder(source_ds, target_ds, method, reuse_weights=True)"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "temp = []\n",
162 |     "for method, var_list in variables.items():\n",
163 |     "    regridder = regridders[method]\n",
164 |     "    temp.append(regridder(ds[var_list].chunk({\"lat\": -1, \"lon\": -1})))\n",
165 |     "\n",
166 |     "ds_out = xr.merge(temp, compat=\"override\")"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "ds_out"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "# fs = fsspec.get_filesystem_class('gs')()\n",
185 |     "# fs.rm(target_location, recursive=True)"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "from multiprocessing.pool import ThreadPool\n",
195 |     "\n",
196 |     "import dask\n",
197 |     "\n",
198 |     "with dask.config.set(scheduler=\"threads\", pool=ThreadPool(3)):\n",
199 |     "    with ProgressBar():\n",
200 |     "        mapper2 = fsspec.get_mapper(target_location)\n",
201 |     "        ds_out.to_zarr(mapper2, mode=\"w\", consolidated=True)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "import zarr\n",
211 |     "\n",
212 |     "mapper2 = fsspec.get_mapper(target_location)\n",
213 |     "\n",
214 |     "\n",
215 |     "zarr.consolidate_metadata(mapper2)"
216 |    ]
217 |   }
218 |  ],
219 |  "metadata": {
220 |   "kernelspec": {
221 |    "display_name": "Python 3",
222 |    "language": "python",
223 |    "name": "python3"
224 |   },
225 |   "language_info": {
226 |    "codemirror_mode": {
227 |     "name": "ipython",
228 |     "version": 3
229 |    },
230 |    "file_extension": ".py",
231 |    "mimetype": "text/x-python",
232 |    "name": "python",
233 |    "nbconvert_exporter": "python",
234 |    "pygments_lexer": "ipython3",
235 |    "version": "3.8.6"
236 |   }
237 |  },
238 |  "nbformat": 4,
239 |  "nbformat_minor": 4
240 | }
241 | 


--------------------------------------------------------------------------------
/scripts/worldclim/01_raw_to_zarr.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "0",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "- https://www.worldclim.org/data/worldclim21.html\n",
  9 |     "- Citation:\n",
 10 |     "\n",
 11 |     "  Fick, S.E. and R.J. Hijmans, 2017. WorldClim 2: new 1km spatial resolution\n",
 12 |     "  climate surfaces for global land areas. International Journal of Climatology\n",
 13 |     "  37 (12): 4302-4315.\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "id": "1",
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import fsspec\n",
 24 |     "import xarray as xr"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "id": "2",
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "month_vars = [\"prec\", \"srad\", \"tavg\", \"tmax\", \"tmin\", \"vapr\", \"wind\"]\n",
 35 |     "\n",
 36 |     "months = xr.Variable(\"month\", list(range(1, 13)))\n",
 37 |     "\n",
 38 |     "ds = xr.Dataset()\n",
 39 |     "\n",
 40 |     "for var in month_vars:\n",
 41 |     "    ds[var] = xr.concat(\n",
 42 |     "        [\n",
 43 |     "            xr.open_rasterio(\n",
 44 |     "                f\"gs://carbonplan-scratch/worldclim-raw/wc2.1_30s_{var}/wc2.1_30s_{var}_{m:02d}.tif\",\n",
 45 |     "                chunks={\"x\": 8192, \"y\": 8192},\n",
 46 |     "            ).squeeze(drop=True)\n",
 47 |     "            for m in months.values\n",
 48 |     "        ],\n",
 49 |     "        dim=months,\n",
 50 |     "        compat=\"override\",\n",
 51 |     "        coords=\"minimal\",\n",
 52 |     "    )"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "id": "3",
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "ds"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "id": "4",
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "ds[\"elev\"] = xr.open_rasterio(\n",
 73 |     "    \"gs://carbonplan-scratch/worldclim-raw/wc2.1_30s_elev.tif\",\n",
 74 |     "    chunks={\"x\": 8192, \"y\": 8192},\n",
 75 |     ").squeeze(drop=True)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "id": "5",
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "bio_names = {\n",
 86 |     "    \"BIO1\": \"Annual Mean Temperature\",\n",
 87 |     "    \"BIO2\": \"Mean Diurnal Range (Mean of monthly (max temp - min temp))\",\n",
 88 |     "    \"BIO3\": \"Isothermality (BIO2/BIO7) (×100)\",\n",
 89 |     "    \"BIO4\": \"Temperature Seasonality (standard deviation ×100)\",\n",
 90 |     "    \"BIO5\": \"Max Temperature of Warmest Month\",\n",
 91 |     "    \"BIO6\": \"Min Temperature of Coldest Month\",\n",
 92 |     "    \"BIO7\": \"Temperature Annual Range (BIO5-BIO6)\",\n",
 93 |     "    \"BIO8\": \"Mean Temperature of Wettest Quarter\",\n",
 94 |     "    \"BIO9\": \"Mean Temperature of Driest Quarter\",\n",
 95 |     "    \"BIO10\": \"Mean Temperature of Warmest Quarter\",\n",
 96 |     "    \"BIO11\": \"Mean Temperature of Coldest Quarter\",\n",
 97 |     "    \"BIO12\": \"Annual Precipitation\",\n",
 98 |     "    \"BIO13\": \"Precipitation of Wettest Month\",\n",
 99 |     "    \"BIO14\": \"Precipitation of Driest Month\",\n",
100 |     "    \"BIO15\": \"Precipitation Seasonality (Coefficient of Variation)\",\n",
101 |     "    \"BIO16\": \"Precipitation of Wettest Quarter\",\n",
102 |     "    \"BIO17\": \"Precipitation of Driest Quarter\",\n",
103 |     "    \"BIO18\": \"Precipitation of Warmest Quarter\",\n",
104 |     "    \"BIO19\": \"Precipitation of Coldest Quarter\",\n",
105 |     "}\n",
106 |     "\n",
107 |     "\n",
108 |     "for bionum in range(1, 20):\n",
109 |     "    ds[f\"BIO{bionum:02d}\"] = xr.open_rasterio(\n",
110 |     "        f\"gs://carbonplan-scratch/worldclim-raw/wc2.1_30s_bio/wc2.1_30s_bio_{bionum}.tif\",\n",
111 |     "        chunks={\"x\": 8192, \"y\": 8192},\n",
112 |     "    ).squeeze(drop=True)\n",
113 |     "    ds[f\"BIO{bionum:02d}\"].attrs[\"description\"] = bio_names[f\"BIO{bionum:d}\"]"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "id": "6",
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "mapper = fsspec.get_mapper(\"gs://carbonplan-data/raw/worldclim/30s/raster.zarr\")\n",
124 |     "\n",
125 |     "ds.to_zarr(mapper, consolidated=True)"
126 |    ]
127 |   }
128 |  ],
129 |  "metadata": {
130 |   "kernelspec": {
131 |    "display_name": "Python [conda env:notebook] *",
132 |    "language": "python",
133 |    "name": "conda-env-notebook-py"
134 |   },
135 |   "language_info": {
136 |    "codemirror_mode": {
137 |     "name": "ipython",
138 |     "version": 3
139 |    },
140 |    "file_extension": ".py",
141 |    "mimetype": "text/x-python",
142 |    "name": "python",
143 |    "nbconvert_exporter": "python",
144 |    "pygments_lexer": "ipython3",
145 |    "version": "3.8.8"
146 |   }
147 |  },
148 |  "nbformat": 4,
149 |  "nbformat_minor": 5
150 | }
151 | 


--------------------------------------------------------------------------------
/sources.yaml:
--------------------------------------------------------------------------------
  1 | mtbs:
  2 |   description: Raw datasets from MTBS
  3 |   metadata:
  4 |     url: https://www.mtbs.gov/
  5 |   data:
  6 |     burned_area_extent_shapefile:
  7 |       actions: [download, unzip]
  8 |       urlpath:
  9 |         - https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/MTBS_Fire/data/composite_data/burned_area_extent_shapefile/mtbs_perimeter_data.zip
 10 |     fod_pt_shapefile:
 11 |       actions: [wget, unzip]
 12 |       urlpath:
 13 |         - https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/MTBS_Fire/data/composite_data/fod_pt_shapefile/mtbs_fod_pts_data.zip
 14 | 
 15 | mlrc:
 16 |   description: Raw datasets from MRLC
 17 |   metadata:
 18 |     url: https://www.mrlc.gov/data
 19 |   data:
 20 |     NLCD_Land_Cover_Change_Index_L48_20190424:
 21 |       actions: [download, unzip]
 22 |       urlpath:
 23 |         - https://s3-us-west-2.amazonaws.com/mrlc/NLCD_Land_Cover_Change_Index_L48_20190424.zip
 24 |     NLCD_Land_Cover_L48_20190424_full_zip:
 25 |       actions: [wget, unzip]
 26 |       urlpath:
 27 |         - https://s3-us-west-2.amazonaws.com/mrlc/NLCD_Land_Cover_L48_20190424_full_zip.zip
 28 |     NLCD_Land_Cover_AK_20200213:
 29 |       actions: [download, unzip]
 30 |       urlpath:
 31 |         - https://s3-us-west-2.amazonaws.com/mrlc/NLCD_2001_Land_Cover_AK_20200213.zip
 32 |         - https://s3-us-west-2.amazonaws.com/mrlc/NLCD_2011_Land_Cover_AK_20200213.zip
 33 |         - https://s3-us-west-2.amazonaws.com/mrlc/NLCD_2016_Land_Cover_AK_20200213.zip
 34 |     nlcd_treecanopy_2019_08_31:
 35 |       actions: [download, unzip]
 36 |       urlpath:
 37 |         - https://s3-us-west-2.amazonaws.com/mrlc/nlcd_2011_treecanopy_2019_08_31.zip
 38 |         - https://s3-us-west-2.amazonaws.com/mrlc/nlcd_2016_treecanopy_2019_08_31.zip
 39 | 
 40 | usfs:
 41 |   desription: Raw raster datasets from the US Forest Service raster gateway
 42 |   metadata:
 43 |     url: https://data.fs.usda.gov/geodata/rastergateway/
 44 |   data:
 45 |     conus_forestgroup:
 46 |       actions: [download, unzip]
 47 |       urlpath:
 48 |         - https://data.fs.usda.gov/geodata/rastergateway/forest_type/conus_forestgroup.zip
 49 |     conus_foresttype:
 50 |       actions: [download, unzip]
 51 |       urlpath:
 52 |         - https://data.fs.usda.gov/geodata/rastergateway/forest_type/conus_forest-type.zip
 53 |     ak_forestgroup:
 54 |       actions: [download, unzip]
 55 |       urlpath:
 56 |         - https://data.fs.usda.gov/geodata/rastergateway/forest_type/ak_forestgroup.zip
 57 |     ak_foresttype:
 58 |       actions: [download, unzip]
 59 |       urlpath:
 60 |         - https://data.fs.usda.gov/geodata/rastergateway/forest_type/ak_forest-type.zip
 61 | 
 62 | fia:
 63 |   description: Raw datasets from Forest Inventory Analysis
 64 |   metadata:
 65 |     url: https://apps.fs.usda.gov/fia/datamart/datamart.html
 66 |   data:
 67 |     entire:
 68 |       actions: [download, unzip]
 69 |       urlpath:
 70 |         - https://apps.fs.usda.gov/fia/datamart/CSV/ENTIRE.zip
 71 | 
 72 | gcp:
 73 |   description: Raw datasets from the Global Carbon Budget.
 74 |   metadata:
 75 |     url: https://www.icos-cp.eu/global-carbon-budget-2019
 76 |   data:
 77 |     global_budget_2019:
 78 |       actions: [manual]
 79 |       urlpath:
 80 |         - https://doi.org/10.18160/GCP-2019
 81 |     national_emissions_2019:
 82 |       actions: [manual]
 83 |       urlpath:
 84 |         - https://doi.org/10.18160/GCP-2019
 85 | 
 86 | iiasa:
 87 |   description: Raw datasets from the Global Carbon Budget.
 88 |   metadata:
 89 |     url:
 90 |   data:
 91 |     SSP_IAM_V2_201811:
 92 |       actions: [manual]
 93 |       urlpath:
 94 |         - https://tntcat.iiasa.ac.at/SspDb/download/iam_v2/SSP_IAM_V2_201811.csv.zip
 95 |     SSP_CMIP6_201811:
 96 |       actions: [manual]
 97 |       urlpath:
 98 |         - https://tntcat.iiasa.ac.at/SspDb/download/cmip6/SSP_CMIP6_201811.csv.zip
 99 |         - https://tntcat.iiasa.ac.at/SspDb/download/cmip6/cmip6_iam_model_region_mapping.xlsx
100 |     SSP_BASIC_ELEMENTS:
101 |       actions: [manual]
102 |       urlpath:
103 |         - https://tntcat.iiasa.ac.at/SspDb/download/basic_elements/SspDb_compare_regions_2013-06-12.csv.zip
104 |         - https://tntcat.iiasa.ac.at/SspDb/download/basic_elements/SspDb_country_data_2013-06-12.csv.zip
105 |     RCP_CMIP5:
106 |       actions: [manual]
107 |       urlpath:
108 |         - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/PICNTRL_MIDYR_CONC.zip
109 |         - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/PRE2005_MIDYR_CONC.zip
110 |         - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/RCP3PD_MIDYR_CONC.zip
111 |         - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/RCP45_MIDYR_CONC.zip
112 |         - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/RCP6_MIDYR_CONC.zip
113 |         - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/RCP85_MIDYR_CONC.zip
114 | 


--------------------------------------------------------------------------------