├── .github ├── dependabot.yml └── workflows │ ├── main.yaml │ └── pypi-release.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── .prettierrc.toml ├── LICENSE ├── MANIFEST.in ├── README.md ├── carbonplan_data ├── __init__.py ├── catalogs │ ├── fia.yaml │ ├── fluxnet.yaml │ ├── gcp.yaml │ ├── gridmet.yaml │ ├── grids.yaml │ ├── maca.yaml │ ├── master.yaml │ ├── mtbs.yaml │ ├── nftd.yaml │ ├── nlcd.yaml │ ├── projects.yaml │ ├── spawnetal2020.yaml │ └── terraclimate.yaml ├── metadata.py ├── tests │ ├── __init__.py │ ├── test_catalogs.py │ └── test_utils.py └── utils.py ├── ci └── environment.yaml ├── codecov.yml ├── pyproject.toml ├── scripts ├── fia │ ├── 00_download.ipynb │ ├── 01_raw_to_parquet.ipynb │ └── 01_raw_to_parquet_part2.ipynb ├── fluxnet │ ├── 01_raw_to_parquet.ipynb │ └── download.sh ├── gcp │ └── 01_raw_to_parquet.ipynb ├── glas │ └── 01_cache_glas_data.ipynb ├── global-biomass │ └── 01_biomass_to_cogs.ipynb ├── gridmet │ └── 01_gridmet_to_zarr.ipynb ├── grids │ └── make_grid.ipynb ├── iiasa │ └── 01_raw_to_parquet.ipynb ├── mtbs │ ├── 01_raw_to_cogs.ipynb │ ├── 02_downsampling_and_reprojection.ipynb │ ├── 02_mtbs_to_zarr.ipynb │ ├── 03_mtbs_to_zarr.ipynb │ ├── 04_mtbs_perims_to_raster.ipynb │ ├── 05_monthly_downsampling.ipynb │ ├── 05_monthly_mtbs_to_zarr.ipynb │ ├── 05_monthly_mtbs_to_zarr.py │ ├── 06_annual_downsampling.py │ ├── 06_annual_mtbs_to_zarr.py │ ├── README.md │ └── prepare.py ├── nftd │ ├── 00_download.ipynb │ ├── 01_nftd_to_cogs.ipynb │ └── 02_downsampling_and_reprojection.ipynb ├── nlcd │ ├── 00_download.ipynb │ ├── 01_nlcd_to_cogs.ipynb │ ├── 02_downsampling_and_reprojection.ipynb │ └── README.md ├── prism │ ├── 00_download.ipynb │ ├── 01_prism_to_cogs.ipynb │ └── 02_downsample_and_reproject.ipynb ├── statsgo │ └── 01_raw_to_tif.py ├── terraclimate │ ├── 01_terraclimate_aux_fileds_to_zarr.ipynb │ ├── 01_terraclimate_to_zarr3.ipynb │ ├── 02_terraclimate_regrid.ipynb │ └── 02_terraclimate_to_fiaplots.ipynb └── worldclim │ └── 01_raw_to_zarr.ipynb └── sources.yaml /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "monthly" 7 | groups: 8 | actions: 9 | patterns: 10 | - "*" 11 | -------------------------------------------------------------------------------- /.github/workflows/main.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.ref }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | test: 15 | name: test-py${{ matrix.python }}-${{matrix.CARBONPLAN_DATA}} 16 | runs-on: ubuntu-latest 17 | defaults: 18 | run: 19 | shell: bash -l {0} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python: ["3.9", "3.10", "3.11", "3.12"] 24 | CARBONPLAN_DATA: ["gs://carbonplan-data"] 25 | steps: 26 | - uses: actions/checkout@v4 27 | - name: Install Conda environment from environment.yml 28 | uses: mamba-org/setup-micromamba@v2 29 | with: 30 | # environment-file is not assumed anymore 31 | environment-file: ci/environment.yaml 32 | create-args: >- 33 | python=${{ matrix.python-version }} 34 | - name: Install package 35 | run: | 36 | python -m pip install -e . 37 | - name: Conda list information 38 | run: | 39 | conda env list 40 | conda list 41 | - name: Run tests 42 | env: 43 | CARBONPLAN_DATA: ${{ matrix.CARBONPLAN_DATA }} 44 | run: | 45 | python -m pytest 46 | -------------------------------------------------------------------------------- /.github/workflows/pypi-release.yaml: -------------------------------------------------------------------------------- 1 | name: Build and Upload carbonplan_data to PyPI 2 | on: 3 | release: 4 | types: 5 | - published 6 | # Runs for pull requests should be disabled other than for testing purposes 7 | # pull_request: 8 | # branches: 9 | # - main 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | build-artifacts: 16 | runs-on: ubuntu-latest 17 | if: github.repository == 'carbonplan/data' 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 0 22 | - uses: actions/setup-python@v5.4.0 23 | name: Install Python 24 | with: 25 | python-version: "3.11" 26 | 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install build twine 31 | git clean -xdf 32 | git restore -SW . 33 | 34 | # This step is only necessary for testing purposes and for TestPyPI 35 | - name: Fix up version string for TestPyPI 36 | if: ${{ !startsWith(github.ref, 'refs/tags') }} 37 | run: | 38 | # Change setuptools-scm local_scheme to "no-local-version" so the 39 | # local part of the version isn't included, making the version string 40 | # compatible with PyPI. 41 | sed --in-place "s/node-and-date/no-local-version/g" pyproject.toml 42 | 43 | - name: Build tarball and wheels 44 | run: | 45 | python -m build 46 | - name: Check built artifacts 47 | run: | 48 | python -m twine check --strict dist/* 49 | pwd 50 | if [ -f dist/carbonplan_data-0.0.0.tar.gz ]; then 51 | echo "❌ INVALID VERSION NUMBER" 52 | exit 1 53 | else 54 | echo "✅ Looks good" 55 | fi 56 | - uses: actions/upload-artifact@v4 57 | with: 58 | name: releases 59 | path: dist 60 | 61 | test-built-dist: 62 | needs: build-artifacts 63 | runs-on: ubuntu-latest 64 | steps: 65 | - uses: actions/setup-python@v5.4.0 66 | name: Install Python 67 | with: 68 | python-version: "3.11" 69 | - uses: actions/download-artifact@v4 70 | with: 71 | name: releases 72 | path: dist 73 | - name: List contents of built dist 74 | run: | 75 | ls -ltrh 76 | ls -ltrh dist 77 | - name: Verify the built dist/wheel is valid 78 | run: | 79 | python -m pip install --upgrade pip 80 | python -m pip install dist/carbonplan_data*.whl 81 | python -c "from carbonplan_data.utils import get_versions; print(get_versions())" 82 | - name: Publish package to TestPyPI 83 | uses: pypa/gh-action-pypi-publish@v1.12.4 84 | with: 85 | password: ${{ secrets.TEST_PYPI_TOKEN }} 86 | repository-url: https://test.pypi.org/legacy/ 87 | # verbose: true 88 | 89 | upload-to-pypi: 90 | needs: test-built-dist 91 | if: github.event_name == 'release' 92 | runs-on: ubuntu-latest 93 | steps: 94 | - uses: actions/download-artifact@v4 95 | with: 96 | name: releases 97 | path: dist 98 | - name: Publish package to PyPI 99 | uses: pypa/gh-action-pypi-publish@v1.12.4 100 | with: 101 | password: ${{ secrets.PYPI_TOKEN }} 102 | # verbose: true 103 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | processed/ 2 | .DS_Store 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | carbonplan_data/_version.py 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # web 136 | .next 137 | node_modules 138 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autoupdate_schedule: monthly 3 | 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v5.0.0 7 | hooks: 8 | - id: trailing-whitespace 9 | - id: end-of-file-fixer 10 | - id: check-docstring-first 11 | - id: check-json 12 | - id: check-yaml 13 | - id: debug-statements 14 | - id: mixed-line-ending 15 | 16 | - repo: https://github.com/astral-sh/ruff-pre-commit 17 | rev: "v0.11.8" 18 | hooks: 19 | - id: ruff 20 | args: ["--fix"] 21 | - id: ruff-format 22 | 23 | - repo: https://github.com/pre-commit/mirrors-prettier 24 | rev: v4.0.0-alpha.8 25 | hooks: 26 | - id: prettier 27 | 28 | - repo: https://github.com/kynan/nbstripout 29 | rev: 0.8.1 30 | hooks: 31 | - id: nbstripout 32 | 33 | - repo: https://github.com/pre-commit/mirrors-prettier 34 | rev: "v4.0.0-alpha.8" 35 | hooks: 36 | - id: prettier 37 | name: prettier-markdown 38 | entry: prettier --write --parser mdx 39 | files: "\\.(\ 40 | |md|markdown|mdown|mkdn\ 41 | |mdx\ 42 | )$" 43 | -------------------------------------------------------------------------------- /.prettierrc.toml: -------------------------------------------------------------------------------- 1 | tabWidth = 2 2 | semi = false 3 | singleQuote = true 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 carbonplan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include carbonplan_data/catalogs * 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 4 | 5 | CarbonPlan monogram. 6 | 7 | 8 |

9 | 10 | # carbonplan / data 11 | 12 | **data catalog and curation** 13 | 14 | [![CI](https://github.com/carbonplan/data/actions/workflows/main.yaml/badge.svg)](https://github.com/carbonplan/data/actions/workflows/main.yaml) 15 | [![PyPI](https://img.shields.io/pypi/v/carbonplan-data)](https://pypi.org/project/carbonplan-data/) 16 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) 17 | 18 | This repository includes our main data catalog as well as our pre-processing utilities. 19 | 20 | ## install 21 | 22 | ```shell 23 | python -m pip install carbonplan[data] 24 | ``` 25 | 26 | ## usage 27 | 28 | The CarbonPlan data archives are currently mirrored on Google Cloud Storage (US-Central) and 29 | AWS (us-west-2). Set the `CARBONPLAN_DATA` environment variable before using the 30 | Intake catalog below: 31 | 32 | ```shell 33 | # google (us-central) 34 | export CARBONPLAN_DATA="https://storage.googleapis.com/carbonplan-data" 35 | # or 36 | # s3 (us-west-2) 37 | export CARBONPLAN_DATA="https://carbonplan-data.s3.us-west-2.amazonaws.com" 38 | ``` 39 | 40 | ```python 41 | # open the top level catalog 42 | from carbonplan.data import cat 43 | 44 | # extract an entry as a Dask-backed Xarray Dataset 45 | cat.mtbs["raster"](region="conus", resolution="4000m").to_dask() 46 | ``` 47 | 48 | --- 49 | 50 | ## developer documentation 51 | 52 | To run the unit and integration tests for this API, run: 53 | 54 | ```shell 55 | $ pytest -v 56 | ``` 57 | 58 | Catalog entries scan be marked as either _skip_ or _xfail_ by setting the `ci` key in the metadata dictionary: 59 | 60 | ```yaml 61 | foo: 62 | description: "skip this entry in the CI tests" 63 | metadata: 64 | ci: skip 65 | ``` 66 | 67 | ## license 68 | 69 | All the code in this repository is [MIT](https://choosealicense.com/licenses/mit/)-licensed. When possible, the data is licensed using the [CC-BY-4.0](https://choosealicense.com/licenses/cc-by-4.0/) license. We include attribution and additional license information for third party datasets, and we request that you also maintain that attribution if using this data. 70 | 71 | ## about us 72 | 73 | CarbonPlan is a nonprofit organization that uses data and science for climate action. We aim to improve the transparency and scientific integrity of climate solutions with open data and tools. Find out more at [carbonplan.org](https://carbonplan.org/) or get in touch by [opening an issue](https://github.com/carbonplan/data/issues/new) or [sending us an email](mailto:hello@carbonplan.org). 74 | -------------------------------------------------------------------------------- /carbonplan_data/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import os 4 | import pathlib 5 | from importlib.metadata import PackageNotFoundError as _PackageNotFoundError 6 | from importlib.metadata import version as _version 7 | 8 | import intake 9 | 10 | try: 11 | version = _version(__name__) 12 | except _PackageNotFoundError: 13 | # package is not installed 14 | version = "unknown" 15 | __version__ = version 16 | 17 | CATALOG_DIR_PATH = pathlib.Path(__file__) 18 | MASTER_CATALOG_PATH = str(CATALOG_DIR_PATH.parent / "catalogs/master.yaml") 19 | KNOWN_DATA_LOCATIONS = [ 20 | "https://storage.googleapis.com/carbonplan-data", 21 | "https://carbonplan-data.s3.us-west-2.amazonaws.com", 22 | ] 23 | 24 | # open master catalog 25 | if "CARBONPLAN_DATA" not in os.environ: 26 | os.environ["CARBONPLAN_DATA"] = "https://storage.googleapis.com/carbonplan-data" 27 | 28 | cat = intake.open_catalog(MASTER_CATALOG_PATH) 29 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/fia.yaml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_parquet 4 | 5 | # TODOs: 6 | # - remove csvs in gcp. 7 | # - add aux data source(s) 8 | 9 | sources: 10 | raw_table: 11 | metadata: 12 | title: Forest Inventory Analysis (raw) 13 | summary: Data on status and trends in forest location, health, growth, mortality, and production. 14 | description: | 15 | The Forest Inventory and Analysis dataset is a nationwide survey of the forest assets of 16 | the United States. The Forest Inventory and Analysis (FIA) research program has been in 17 | existence since mandated by Congress in 1928. FIA's primary objective is to determine 18 | the extent, condition, volume, growth, and use of trees on the Nation's forest land. 19 | tags: [forests] 20 | type: application/parquet 21 | license: Public domain 22 | providers: 23 | - name: USDA Forest Service 24 | description: Data provided by the United States Department of Agriculture Forest Service. 25 | url: https://www.fia.fs.fed.us/ 26 | driver: parquet 27 | parameters: 28 | name: 29 | description: FIA data product name 30 | type: str 31 | default: plot 32 | allowed: 33 | [ 34 | boundary, 35 | cond, 36 | cond_dwm_calc, 37 | county, 38 | dwm_coarse_woody_debris, 39 | dwm_duff_litter_fuel, 40 | dwm_fine_woody_debris, 41 | dwm_microplot_fuel, 42 | dwm_residual_pile, 43 | dwm_transect_segment, 44 | dwm_visit, 45 | grnd_cvr, 46 | invasive_subplot_spp, 47 | lichen_lab, 48 | lichen_plot_summary, 49 | lichen_visit, 50 | ozone_biosite_summary, 51 | ozone_plot, 52 | ozone_plot_summary, 53 | ozone_species_summary, 54 | ozone_validation, 55 | ozone_visit, 56 | p2veg_subplot_spp, 57 | p2veg_subp_structure, 58 | plot, 59 | plotgeom, 60 | plotsnap, 61 | plot_regen, 62 | pop_estn_unit, 63 | pop_eval, 64 | pop_eval_attribute, 65 | pop_eval_grp, 66 | pop_eval_typ, 67 | pop_plot_stratum_assgn, 68 | pop_stratum, 69 | seedling, 70 | seedling_regen, 71 | sitetree, 72 | soils_erosion, 73 | soils_lab, 74 | soils_sample_loc, 75 | soils_visit, 76 | subplot, 77 | subplot_regen, 78 | subp_cond, 79 | subp_cond_chng_mtrx, 80 | survey, 81 | tree, 82 | tree_grm_begin, 83 | tree_grm_component, 84 | tree_grm_estn, 85 | tree_grm_midpt, 86 | tree_grm_threshold, 87 | tree_regional_biomass, 88 | tree_woodland_stems, 89 | veg_plot_species, 90 | veg_quadrat, 91 | veg_subplot, 92 | veg_subplot_spp, 93 | veg_visit, 94 | ] 95 | args: 96 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/fia/{{ name }}.parquet" 97 | engine: "pyarrow" 98 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/fluxnet.yaml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_parquet 4 | 5 | sources: 6 | raw_aux: 7 | metadata: 8 | title: FLUXNET Auxiliary Data (raw) 9 | summary: Supporting metadata for the FLUXNET dataset. 10 | description: | 11 | The preparation of this FLUXNET Dataset has been possible thanks only to the efforts of 12 | many scientists and technicians around the world and the coordination among teams from 13 | regional networks. The previous versions of FLUXNET Dataset releases are the FLUXNET 14 | Marconi Dataset (2000) and the FLUXNET LaThuile Dataset (2007). The FLUXNET2015 Dataset 15 | includes several improvements to the data quality control protocols and the data 16 | processing pipeline. Examples include close interaction with tower teams to improve data 17 | quality, new methods for uncertainty quantification, use of reanalysis data to fill long 18 | gaps of micrometeorological variable records, among others (see the data processing 19 | pipeline page for details). 20 | tags: [climate, carbon] 21 | type: application/parquet 22 | license: Creative Commons Attribution 4.0 International 23 | providers: 24 | - name: FLUXNET 25 | description: | 26 | FLUXNET is organized through the Regional Networks that contribute data to FLUXNET 27 | datasets available at the FLUXNET webiste (https://fluxnet.org/), hosted at the 28 | Lawrence Berkeley National Laboratory (USA). 29 | url: https://fluxnet.org/ 30 | parameters: 31 | station: 32 | description: fluxnet station code 33 | type: str 34 | default: it-noe 35 | kind: 36 | description: fluxnet data stream 37 | type: str 38 | default: auxmeteo 39 | allowed: [auxmeteo, auxnee] 40 | driver: parquet 41 | args: 42 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/fluxnet/{{ station }}_{{ kind }}.parquet" 43 | 44 | raw_fullset: 45 | metadata: 46 | title: FLUXNET FULLSET Data (raw) 47 | summary: Global network of micrometeorological flux measurement measuring carbon, energy and water cycles. 48 | description: | 49 | The preparation of this FLUXNET Dataset has been possible thanks only to the efforts of 50 | many scientists and technicians around the world and the coordination among teams from 51 | regional networks. The previous versions of FLUXNET Dataset releases are the FLUXNET 52 | Marconi Dataset (2000) and the FLUXNET LaThuile Dataset (2007). The FLUXNET2015 Dataset 53 | includes several improvements to the data quality control protocols and the data 54 | processing pipeline. Examples include close interaction with tower teams to improve data 55 | quality, new methods for uncertainty quantification, use of reanalysis data to fill long 56 | gaps of micrometeorological variable records, among others (see the data processing 57 | pipeline page for details). 58 | tags: [climate, carbon] 59 | type: application/parquet 60 | license: Creative Commons Attribution 4.0 International 61 | providers: 62 | - name: FLUXNET 63 | description: | 64 | FLUXNET is organized through the Regional Networks that contribute data to FLUXNET 65 | datasets available at the FLUXNET webiste (https://fluxnet.org/), hosted at the 66 | Lawrence Berkeley National Laboratory (USA). 67 | url: https://fluxnet.org/ 68 | parameters: 69 | station: 70 | description: fluxnet station code 71 | type: str 72 | default: it-noe 73 | kind: 74 | description: fluxnet data stream 75 | type: str 76 | default: fullset 77 | allowed: [erai, fullset] 78 | freq: 79 | description: temporal frequency 80 | type: str 81 | default: dd 82 | allowed: [dd, hh, mm, ww, yy] 83 | driver: parquet 84 | args: 85 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/fluxnet/{{ station }}_{{ kind }}_{{ freq }}.parquet" 86 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/gcp.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | raw_table: 3 | metadata: 4 | title: Global Carbon Project 5 | summary: Timeseries of the global carbon budget and carbon emissions. 6 | description: | 7 | The annually updated Global Carbon Budget produced by the Global Carbon Project. 8 | All datasets and modeling output to complete the Global Carbon Budget 2019 are 9 | described in detail in 10 | [Friedlingstein et al. (2019)](https://doi.org/10.5194/essd-11-1783-2019). 11 | tags: [carbon] 12 | type: application/parquet 13 | license: The use of data is conditional on citing the original data sources. 14 | providers: 15 | - name: Integrated Carbon Observation System 16 | description: | 17 | The Integrated Carbon Observation System, ICOS, is a European-wide greenhouse gas 18 | research infrastructure. ICOS produces standardised data on greenhouse gas 19 | concentrations in the atmosphere, as well as on carbon fluxes between the 20 | atmosphere, the earth and oceans. 21 | url: https://www.icos-cp.eu/global-carbon-budget-2019 22 | parameters: 23 | name: 24 | description: name of GCB dataset 25 | type: str 26 | default: global_carbon_budget 27 | allowed: 28 | [ 29 | global_carbon_budget, 30 | fossil_emissions_by_fuel_type, 31 | land_use_change_emissions, 32 | ocean_sink, 33 | terrestrial_sink, 34 | historical_budget, 35 | consumption_emissions, 36 | territorial_emissions, 37 | transfer_emissions, 38 | ] 39 | driver: parquet 40 | args: 41 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/gcp/{{ name }}.parquet" 42 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/gridmet.yaml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_xarray 4 | 5 | sources: 6 | raw_gridmet: 7 | metadata: 8 | title: gridMET (raw) 9 | summary: High-resolution surface meteorologicaldata covering the conus US from 1979-yesterday. 10 | description: | 11 | gridMET is a dataset of daily high-spatial resolution (~4-km, 1/24th degree) surface 12 | meteorological data covering the contiguous US from 1979-yesterday. 13 | These data can provide important inputs for ecological, agricultural, and 14 | hydrological models. These data are updated daily. gridMET is the preferred naming 15 | convention for these data; however, the data are also known as cited as METDATA. 16 | tags: [climate] 17 | type: application/netcdf 18 | license: Public Domain Mark 1.0 19 | providers: 20 | - name: Climatology Lab, University of California, Merced 21 | description: Data provided by Dr. John Abatzoglou's Climatology Lab at the University of California, Merced. 22 | url: http://www.climatologylab.org 23 | driver: opendap 24 | parameters: 25 | variable: 26 | description: climate variable 27 | type: str 28 | default: pr 29 | allowed: 30 | [ 31 | "pr", 32 | "tmmn", 33 | "tmmx", 34 | "rmax", 35 | "rmin", 36 | "sph", 37 | "srad", 38 | "th", 39 | "vs", 40 | "bi", 41 | "fm100", 42 | "fm1000", 43 | "erc", 44 | "pdsi", 45 | "etr", 46 | "pet", 47 | "vpd", 48 | ] 49 | year: 50 | description: year 51 | type: int 52 | default: 2000 53 | args: 54 | urlpath: http://thredds.northwestknowledge.net:8080/thredds/dodsC/MET/{{ variable }}/{{ variable }}_{{ '%04d' % year }}.nc 55 | auth: null 56 | chunks: 57 | lat: 585 58 | lon: 1386 59 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/grids.yaml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_xarray 4 | 5 | sources: 6 | albers4k: 7 | metadata: 8 | title: Albers Equal Area 4km grid 9 | summary: Grid definition for the CONUS/AK 4km Albers Equal Area study area 10 | description: | 11 | This dataset defines the grid used for many of our Albers Equal Area 4km data. 12 | products. It is particularly useful as a target grid when regridding/reprojecting 13 | other datasets to this (common) grid. 14 | tags: [meta] 15 | type: application/zarr 16 | license: Creative Commons Attribution 4.0 International 17 | providers: 18 | - name: CarbonPlan 19 | description: | 20 | CarbonPlan is a registered non-profit public benefit corporation working on 21 | the science and data of carbon removal. 22 | url: https://carbonplan.org 23 | parameters: 24 | region: 25 | description: conus or ak 26 | type: str 27 | default: conus 28 | allowed: [conus, ak] 29 | driver: zarr 30 | args: 31 | urlpath: "{{env(CARBONPLAN_DATA)}}/processed/grids/{{ region }}/4000m/domain.zarr/" 32 | consolidated: True 33 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/maca.yaml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_xarray 4 | 5 | sources: 6 | raw_maca: 7 | metadata: 8 | title: MACA (raw) 9 | summary: Historical and future climate projections derived from CMIP5 using the MACA statistical downscaling technique. 10 | description: | 11 | Multivariate Adaptive Constructed Analogs (MACA) is a statistical method for downscaling 12 | Global Climate Models (GCMs) from their native coarse resolution to a higher spatial 13 | resolution that captures reflects observed patterns of daily near-surface meteorology and 14 | simulated changes in GCMs experiments. 15 | tags: [climate] 16 | type: application/netcdf 17 | license: Creative Commons CC0 1.0 Universal 18 | providers: 19 | - name: Climatology Lab, University of California, Merced 20 | description: Data provided by Dr. John Abatzoglou's Climatology Lab at the University of California, Merced. 21 | url: http://www.climatologylab.org 22 | driver: opendap 23 | parameters: 24 | variable: 25 | description: climate variable 26 | type: str 27 | default: pr 28 | allowed: 29 | [ 30 | "huss", 31 | "pr", 32 | "rhsmin", 33 | "rhsmax", 34 | "rsds", 35 | "tasmax", 36 | "tasmin", 37 | "uas", 38 | "vas", 39 | "vpd", 40 | ] 41 | gcm: 42 | description: climate model 43 | type: str 44 | default: IPSL-CM5A-LR 45 | # allowed: TODO: add list of defaults 46 | scenario: 47 | description: climate scenario 48 | type: str 49 | default: historical_1950_2005 50 | allowed: ["historical_1950_2005", "rcp45_2006_2099", "rcp85_2006_2099"] 51 | args: 52 | urlpath: http://thredds.northwestknowledge.net:8080/thredds/dodsC/agg_macav2metdata_{{ variable }}_{{ gcm }}_r1i1p1_{{ scenario }}_CONUS_daily.nc 53 | auth: null 54 | chunks: 55 | lat: 585 56 | lon: 1386 57 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/master.yaml: -------------------------------------------------------------------------------- 1 | description: "CarbonPlan Master Data Catalog" 2 | sources: 3 | gridmet: 4 | name: "gridMET" 5 | description: "Gridded daily surface meteorological data covering the continental US" 6 | metadata: 7 | tags: [climate] 8 | driver: intake.catalog.local.YAMLFileCatalog 9 | args: 10 | path: "{{CATALOG_DIR}}/gridmet.yaml" 11 | 12 | terraclimate: 13 | name: "TerraClimate" 14 | description: "Global gridded monthly climate and hydroclimate data from 1958-present." 15 | metadata: 16 | tags: [climate] 17 | driver: intake.catalog.local.YAMLFileCatalog 18 | args: 19 | path: "{{CATALOG_DIR}}/terraclimate.yaml" 20 | 21 | maca: 22 | name: "MACA" 23 | description: "Statistically downscaled climate data using the MACA method." 24 | metadata: 25 | tags: [climate] 26 | driver: intake.catalog.local.YAMLFileCatalog 27 | args: 28 | path: "{{CATALOG_DIR}}/maca.yaml" 29 | 30 | fia: 31 | name: "Forest Inventory Analysis (FIA)" 32 | description: "Catalog for data from Forest Inventory Analysis (FIA) database" 33 | metadata: 34 | tags: [forests] 35 | driver: intake.catalog.local.YAMLFileCatalog 36 | args: 37 | path: "{{CATALOG_DIR}}/fia.yaml" 38 | 39 | fluxnet: 40 | name: "FLUXNET" 41 | description: "Catalog for data from the FLUXNET dataset" 42 | metadata: 43 | tags: [climate, carbon] 44 | driver: intake.catalog.local.YAMLFileCatalog 45 | args: 46 | path: "{{CATALOG_DIR}}/fluxnet.yaml" 47 | 48 | gcp: 49 | name: "Global Carbon Project (GCP)" 50 | description: "Catalog for data from the Global Carbon Project" 51 | metadata: 52 | tags: [climate, carbon] 53 | driver: intake.catalog.local.YAMLFileCatalog 54 | args: 55 | path: "{{CATALOG_DIR}}/gcp.yaml" 56 | 57 | mtbs: 58 | name: "Monitoring Trends in Burn Severity (MTBS)" 59 | description: "Catalog for data from the Monitoring Trends in Burn Severity (MTBS) dataset" 60 | metadata: 61 | tags: [forests] 62 | driver: intake.catalog.local.YAMLFileCatalog 63 | args: 64 | path: "{{CATALOG_DIR}}/mtbs.yaml" 65 | 66 | nftd: 67 | name: "National Forest Type Database (NFTD)" 68 | description: "Catalog for data from the National Forest Type Database (NFTD)" 69 | metadata: 70 | tags: [forests] 71 | driver: intake.catalog.local.YAMLFileCatalog 72 | args: 73 | path: "{{CATALOG_DIR}}/nftd.yaml" 74 | 75 | nlcd: 76 | name: "National Land Cover Database (NLCD)" 77 | description: "Catalog for data from the National Land Cover Database (NLCD)" 78 | metadata: 79 | tags: [forests] 80 | driver: intake.catalog.local.YAMLFileCatalog 81 | args: 82 | path: "{{CATALOG_DIR}}/nlcd.yaml" 83 | 84 | projects: 85 | name: "CarbonPlan Project Reports" 86 | description: "CarbonPlan Projects Dataset Catalog" 87 | metadata: 88 | tags: [carbon] 89 | driver: intake.catalog.local.YAMLFileCatalog 90 | args: 91 | path: "{{CATALOG_DIR}}/projects.yaml" 92 | 93 | spawnetal2020: 94 | name: "Global Above- and Belowground Biomass" 95 | description: "Catalog for data from Global Aboveground and Belowground Biomass Carbon Density Maps for the Year 2010 from Spawn et al (2020)" 96 | metadata: 97 | tags: [forests] 98 | driver: intake.catalog.local.YAMLFileCatalog 99 | args: 100 | path: "{{CATALOG_DIR}}/spawnetal2020.yaml" 101 | 102 | grids: 103 | name: "Project Grids" 104 | description: "Catalog grid files and domain definitions." 105 | metadata: 106 | tags: [meta] 107 | driver: intake.catalog.local.YAMLFileCatalog 108 | args: 109 | path: "{{CATALOG_DIR}}/grids.yaml" 110 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/mtbs.yaml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_xarray 4 | 5 | sources: 6 | raw_raster: 7 | metadata: 8 | title: MTBS (raw) 9 | summary: Annual burn severity mosaics for the continental United States and Alaska. 10 | description: | 11 | Monitoring Trends in Burn Severity (MTBS) is an interagency program whose goal is to 12 | consistently map the burn severity and extent of large fires across all lands of the 13 | United States from 1984 to present. This includes all fires 1000 acres or greater in 14 | the western United States and 500 acres or greater in the eastern Unites States. The 15 | extent of coverage includes the continental U.S., Alaska, Hawaii and Puerto Rico. 16 | 17 | The burn severity mosaics consist of thematic raster images of MTBS burn severity 18 | classes for all currently completed MTBS fires for the continental United States, 19 | Alaska, Hawaii and Puerto Rico. Mosaicked burn severity images are compiled annually 20 | for each year by US State and the continental United States. 21 | tags: [forests] 22 | type: image/tiff; application=geotiff; profile=cloud-optimized 23 | license: Public Domain 24 | providers: 25 | - name: Monitoring Trends in Burn Severity 26 | description: Monitoring Trends in Burn Severity (MTBS) is an interagency program that includes the USGS, NASA, USFS, USDI, and USDA. 27 | url: https://www.mtbs.gov/ 28 | parameters: 29 | year: 30 | description: year 31 | type: int 32 | default: 1984 33 | region: 34 | description: mtbs region 35 | type: str 36 | default: conus 37 | allowed: [conus, ak] 38 | driver: rasterio 39 | args: 40 | # urlpath: "https://storage.googleapis.com/carbonplan-data/raw/mtbs/{{ region }}/30m/{{ '%d' % year }}.tif" 41 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/mtbs/{{ region }}/30m/{{ '%d' % year }}.tif" 42 | chunks: { "y": 5120, "x": 5120 } 43 | 44 | raster: 45 | metadata: 46 | title: MTBS (processed) 47 | summary: Annual burn severity mosaics for the continental United States and Alaska. 48 | description: | 49 | Monitoring Trends in Burn Severity (MTBS) is an interagency program whose goal is to 50 | consistently map the burn severity and extent of large fires across all lands of the 51 | United States from 1984 to present. This includes all fires 1000 acres or greater in 52 | the western United States and 500 acres or greater in the eastern Unites States. 53 | 54 | The burn severity mosaics consist of thematic raster images of MTBS burn severity 55 | classes for all currently completed MTBS fires for the continental United States, 56 | Alaska, Hawaii and Puerto Rico. Mosaicked burn severity images are compiled annually 57 | for each year by US State and the continental United States. 58 | 59 | These data have been processed to CarbonPlan's CONUS and Alaska study domains. 60 | tags: [forests] 61 | type: application/zarr 62 | license: Creative Commons Attribution 4.0 International 63 | providers: 64 | - name: Monitoring Trends in Burn Severity 65 | description: Monitoring Trends in Burn Severity (MTBS) is an interagency program that includes the USGS, NASA, USFS, USDI, and USDA. 66 | url: https://www.mtbs.gov/ 67 | parameters: 68 | region: 69 | description: conus or ak 70 | type: str 71 | default: conus 72 | allowed: [conus, ak] 73 | resolution: 74 | description: Pixel resolution in meters 75 | type: str 76 | default: "4000m" 77 | allowed: ["4000m"] 78 | driver: zarr 79 | args: 80 | urlpath: "{{env(CARBONPLAN_DATA)}}/processed/mtbs/{{ region }}/{{ resolution }}/raster.zarr" 81 | consolidated: True 82 | 83 | rasterized_perims: 84 | metadata: 85 | title: MTBS Rasterized Fire Perimeters (processed) 86 | summary: Monthly burned area rasters for the continental United States and Alaska. 87 | description: | 88 | Monitoring Trends in Burn Severity (MTBS) is an interagency program whose goal is to 89 | consistently map the burn severity and extent of large fires across all lands of the 90 | United States from 1984 to present. 91 | 92 | TODO... 93 | 94 | tags: [forests] 95 | type: image/tiff; application=geotiff; profile=cloud-optimized 96 | license: Creative Commons Attribution 4.0 International 97 | providers: 98 | - name: Monitoring Trends in Burn Severity 99 | description: Monitoring Trends in Burn Severity (MTBS) is an interagency program that includes the USGS, NASA, USFS, USDI, and USDA. 100 | url: https://www.mtbs.gov/ 101 | parameters: 102 | region: 103 | description: conus or ak 104 | type: str 105 | default: conus 106 | allowed: [conus, ak] 107 | resolution: 108 | description: Pixel resolution in meters 109 | type: str 110 | default: "30m" 111 | allowed: ["30m"] 112 | size: 113 | description: Fire size 114 | type: str 115 | default: "lf" 116 | allowed: ["lf", "vlf"] 117 | date: 118 | description: "Year and month (format: YYYY.MM)" 119 | type: str 120 | default: "2018.11" 121 | driver: rasterio 122 | args: 123 | urlpath: "{{env(CARBONPLAN_DATA)}}/processed/mtbs/{{ region }}/{{ resolution }}/{{ size }}_{{ date }}.tif" 124 | chunks: { "y": 5120, "x": 5120 } 125 | 126 | fod_shp: 127 | metadata: 128 | title: MTBS Occurance (vector data) 129 | summary: Fire occurance location dataset in vector/point format. 130 | description: | 131 | The fire occurrence location dataset is a vector point ESRI shapefile of the centroids of 132 | all currently completed MTBS fires occurring in the continental United States, Alaska, 133 | Hawaii and Puerto Rico. 134 | tags: [fire, forests] 135 | type: application/octet-stream 136 | license: Public Domain 137 | providers: 138 | - name: Monitoring Trends in Burn Severity 139 | description: Monitoring Trends in Burn Severity (MTBS) is an interagency program that includes the USGS, NASA, USFS, USDI, and USDA. 140 | url: https://www.mtbs.gov/ 141 | ci: skip 142 | driver: shapefile 143 | args: 144 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/mtbs/mtbs_fod_pts_data/mtbs_fod_pts_DD.shp" 145 | 146 | perims_shp: 147 | metadata: 148 | title: MTBS Boundaries (vector data) 149 | summary: Burned area boundaries data in vector/polygon format. 150 | description: | 151 | The burned area boundaries dataset is a vector polygon ESRI shapefile of the extent of the 152 | burned areas of all currently completed MTBS fires for the continental United States, 153 | Alaska, Hawaii and Puerto Rico. 154 | tags: [forests] 155 | type: application/octet-stream 156 | license: Public Domain 157 | providers: 158 | - name: Monitoring Trends in Burn Severity 159 | description: Monitoring Trends in Burn Severity (MTBS) is an interagency program that includes the USGS, NASA, USFS, USDI, and USDA. 160 | url: https://www.mtbs.gov/ 161 | ci: skip 162 | driver: shapefile 163 | args: 164 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/mtbs/mtbs_perimeter_data/mtbs_perims_DD.shp" 165 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/nftd.yaml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_xarray 4 | 5 | sources: 6 | raw_raster: 7 | metadata: 8 | title: National Forest Type Dataset (raw) 9 | summary: Extent, distribution, and forest type composition of the nation’s forests. 10 | description: | 11 | This geospatial dataset was created by the USFS Forest Inventory and Analysis (FIA) program 12 | and the Geospatial Technology and Applications Center (GTAC) to show the extent, 13 | distribution, and forest type composition of the nation’s forests. 14 | 15 | The dataset was created by modeling forest type from FIA plot data as a function of more 16 | than one hundred geospatially continuous predictor layers. 17 | 18 | This process results in a view of forest type distribution in greater detail than is 19 | possible with the FIA plot data alone. 20 | tags: [forests] 21 | type: image/tiff; application=geotiff; profile=cloud-optimized 22 | license: Public Domain 23 | providers: 24 | - name: USDA Forest Service 25 | description: Data provided by the United States Department of Agriculture Forest Service. 26 | url: https://www.fia.fs.fed.us/ 27 | ci: xfail 28 | parameters: 29 | option: 30 | description: error or raster 31 | type: str 32 | default: raster 33 | allowed: [error, raster] 34 | region: 35 | description: conus or ak 36 | type: str 37 | default: conus 38 | allowed: [conus, ak] 39 | variable: 40 | description: foresttype or forestgroup 41 | type: str 42 | default: foresttype 43 | allowed: [foresttype, forestgroup] 44 | driver: rasterio 45 | args: 46 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/nftd/{{ region }}_{{ variable }}/250m/{{ option }}.tif" 47 | chunks: { "y": 5120, "x": 5120 } 48 | 49 | raster: 50 | metadata: 51 | title: National Forest Type Dataset (processed) 52 | summary: Extent, distribution, and forest type composition of the nation’s forests. 53 | description: | 54 | This geospatial dataset was created by the USFS Forest Inventory and Analysis (FIA) program 55 | and the Geospatial Technology and Applications Center (GTAC) to show the extent, 56 | distribution, and forest type composition of the nation’s forests. 57 | 58 | The dataset was created by modeling forest type from FIA plot data as a function of more 59 | than one hundred geospatially continuous predictor layers. 60 | 61 | This process results in a view of forest type distribution in greater detail than is 62 | possible with the FIA plot data alone. 63 | 64 | These data have been processed to CarbonPlan's CONUS and Alaska study domains. 65 | tags: [forests] 66 | type: image/tiff; application=geotiff; profile=cloud-optimized 67 | license: Public Domain 68 | providers: 69 | - name: USDA Forest Service 70 | description: Data provided by the United States Department of Agriculture Forest Service. 71 | url: https://www.fia.fs.fed.us/ 72 | ci: xfail 73 | parameters: 74 | region: 75 | description: conus or ak 76 | type: str 77 | default: conus 78 | allowed: [conus, ak] 79 | option: 80 | description: group/type [optional _error] 81 | type: str 82 | default: type 83 | allowed: [group, type, group_error, type_error] 84 | resolution: 85 | description: pixel resolution in meters 86 | type: str 87 | default: 4000m 88 | allowed: [250m, 4000m] 89 | driver: rasterio 90 | args: 91 | urlpath: "{{env(CARBONPLAN_DATA)}}/processed/nftd/{{ region }}/{{ resolution }}/{{ option }}.tif" 92 | chunks: { "y": 5120, "x": 5120 } 93 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/nlcd.yaml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_xarray 4 | 5 | sources: 6 | raw_raster: 7 | metadata: 8 | title: National Land Cover Database (raw) 9 | summary: The National Land Cover Database - 2001 to 2016. 10 | description: | 11 | The U.S. Geological Survey (USGS), in partnership with several federal agencies, has 12 | developed and released four National Land Cover Database (NLCD) products over the past 13 | two decades: NLCD 1992, 2001, 2006, and 2011. These products provide spatially explicit 14 | and reliable information on the Nation’s land cover and land cover change. To continue 15 | the legacy of NLCD and further establish a long-term monitoring capability for the 16 | Nation’s land resources, the USGS has designed a new generation of NLCD products named 17 | NLCD 2016. The NLCD 2016 design aims to provide innovative, consistent, and robust 18 | methodologies for production of a multi-temporal land cover and land cover change 19 | database from 2001 to 2016 at 2–3-year intervals. Comprehensive research was conducted 20 | and resulted in developed strategies for NLCD 2016: a streamlined process for assembling 21 | and preprocessing Landsat imagery and geospatial ancillary datasets; a multi-source 22 | integrated training data development and decision-tree based land cover classifications; 23 | a temporally, spectrally, and spatially integrated land cover change analysis strategy; 24 | a hierarchical theme-based post-classification and integration protocol for generating 25 | land cover and change products; a continuous fields biophysical parameters modeling 26 | method; and an automated scripted operational system for the NLCD 2016 production. The 27 | performance of the developed strategies and methods were tested in twenty World Reference 28 | System-2 path/row throughout the conterminous U.S. An overall agreement ranging from 29 | 71% to 97% between land cover classification and reference data was achieved for all 30 | tested area and all years. Results from this study confirm the robustness of this 31 | comprehensive and highly automated procedure for NLCD 2016 operational mapping. 32 | tags: [forests] 33 | type: image/tiff; application=geotiff; profile=cloud-optimized 34 | license: Public Domain 35 | providers: 36 | - name: Multi-Resolution Land Characteristics (MRLC) Consortium 37 | description: The Multi-Resolution Land Characteristics (MRLC) consortium is a group of federal agencies who coordinate and generate consistent and relevant land cover information at the national scale for a wide variety of environmental, land management, and modeling applications. 38 | url: https://www.mrlc.gov/ 39 | parameters: 40 | option: 41 | description: year (int) or change 42 | type: str 43 | default: 2016 44 | region: 45 | description: conus or ak 46 | type: str 47 | default: conus 48 | allowed: [conus, ak] 49 | driver: rasterio 50 | args: 51 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/nlcd/{{ region }}/30m/{{ option }}.tif" 52 | chunks: { "y": 5120, "x": 5120 } 53 | 54 | raster: 55 | metadata: 56 | title: National Land Cover Database (processed) 57 | summary: The National Land Cover Database - 2001 to 2016. 58 | description: | 59 | The U.S. Geological Survey (USGS), in partnership with several federal agencies, has 60 | developed and released four National Land Cover Database (NLCD) products over the past 61 | two decades: NLCD 1992, 2001, 2006, and 2011. These products provide spatially explicit 62 | and reliable information on the Nation’s land cover and land cover change. To continue 63 | the legacy of NLCD and further establish a long-term monitoring capability for the 64 | Nation’s land resources, the USGS has designed a new generation of NLCD products named 65 | NLCD 2016. The NLCD 2016 design aims to provide innovative, consistent, and robust 66 | methodologies for production of a multi-temporal land cover and land cover change 67 | database from 2001 to 2016 at 2–3-year intervals. Comprehensive research was conducted 68 | and resulted in developed strategies for NLCD 2016: a streamlined process for assembling 69 | and preprocessing Landsat imagery and geospatial ancillary datasets; a multi-source 70 | integrated training data development and decision-tree based land cover classifications; 71 | a temporally, spectrally, and spatially integrated land cover change analysis strategy; 72 | a hierarchical theme-based post-classification and integration protocol for generating 73 | land cover and change products; a continuous fields biophysical parameters modeling 74 | method; and an automated scripted operational system for the NLCD 2016 production. The 75 | performance of the developed strategies and methods were tested in twenty World Reference 76 | System-2 path/row throughout the conterminous U.S. An overall agreement ranging from 77 | 71% to 97% between land cover classification and reference data was achieved for all 78 | tested area and all years. Results from this study confirm the robustness of this 79 | comprehensive and highly automated procedure for NLCD 2016 operational mapping. 80 | 81 | These data have been processed to CarbonPlan's CONUS and Alaska study domains. 82 | tags: [forests] 83 | type: image/tiff; application=geotiff; profile=cloud-optimized 84 | license: Public Domain 85 | providers: 86 | - name: Multi-Resolution Land Characteristics (MRLC) Consortium 87 | description: The Multi-Resolution Land Characteristics (MRLC) consortium is a group of federal agencies who coordinate and generate consistent and relevant land cover information at the national scale for a wide variety of environmental, land management, and modeling applications. 88 | url: https://www.mrlc.gov/ 89 | parameters: 90 | option: 91 | description: year (int) or change 92 | type: str 93 | default: 2016 94 | resolution: 95 | description: pixel resolution in meters 96 | type: str 97 | default: 4000m 98 | allowed: [250m, 4000m] 99 | region: 100 | description: conus or ak 101 | type: str 102 | default: conus 103 | allowed: [conus, ak] 104 | driver: rasterio 105 | args: 106 | urlpath: "{{env(CARBONPLAN_DATA)}}/processed/nlcd/{{ region }}/{{ resolution }}/{{ option }}.tif" 107 | chunks: { "y": 5120, "x": 5120 } 108 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/projects.yaml: -------------------------------------------------------------------------------- 1 | sources: 2 | reports: 3 | metadata: 4 | title: CarbonPlan Project's Database 5 | summary: Public database of carbon removal project proposals evaluated by CarbonPlan. 6 | description: | 7 | This is a public database of reports on carbon removal project proposals. These reports 8 | reflect our independent analysis of public information. 9 | tags: [carbon] 10 | type: text/csv 11 | license: Creative Commons Attribution 4.0 International 12 | providers: 13 | - name: CarbonPlan 14 | description: | 15 | CarbonPlan is a registered non-profit public benefit corporation working on 16 | the science and data of carbon removal. 17 | url: https://carbonplan.org 18 | driver: csv 19 | args: 20 | urlpath: "https://api.carbonplan.org/projects.csv" 21 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/spawnetal2020.yaml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_xarray 4 | 5 | sources: 6 | raw_raster: 7 | metadata: 8 | title: Global Biomass (Spawn and Gibbs, 2020) 9 | summary: Global aboveground and belowground biomass carbon density maps for the year 2010 10 | description: | 11 | This dataset provides temporally consistent and harmonized global maps of aboveground and 12 | belowground biomass carbon density for the year 2010 at a 300-m spatial resolution. The 13 | aboveground biomass map integrates land-cover specific, remotely sensed maps of woody, 14 | grassland, cropland, and tundra biomass. Input maps were amassed from the published 15 | literature and, where necessary, updated to cover the focal extent or time period. The 16 | belowground biomass map similarly integrates matching maps derived from each aboveground 17 | biomass map and land-cover specific empirical models. Aboveground and belowground maps were 18 | then integrated separately using ancillary maps of percent tree cover and landcover and a 19 | rule-based decision tree. Maps reporting the accumulated uncertainty of pixel-level 20 | estimates are also provided. 21 | tags: [biomass, forests] 22 | type: image/tiff; application=geotiff; profile=cloud-optimized 23 | license: Public domain 24 | providers: 25 | - name: Oak Ridge National Laboratory 26 | description: | 27 | The Oak Ridge National Laboratory Distributed Active Archive Center (ORNL DAAC) for 28 | Biogeochemical Dynamics is a NASA Earth Observing System Data and Information System 29 | (EOSDIS) data center managed by the Earth Science Data and Information System (ESDIS) 30 | Project. 31 | url: https://doi.org/10.3334/ORNLDAAC/1763 32 | ci: xfail 33 | parameters: 34 | variable: 35 | description: aboveground, aboveground_uncertainty, belowground, or belowground_uncertainty 36 | type: str 37 | default: aboveground 38 | allowed: 39 | [ 40 | aboveground, 41 | aboveground_uncertainty, 42 | belowground, 43 | belowground_uncertainty, 44 | ] 45 | driver: rasterio 46 | args: 47 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/2010-harmonized-biomass/global/300m/{{ variable }}.tif" 48 | chunks: { "y": 5120, "x": 5120 } 49 | -------------------------------------------------------------------------------- /carbonplan_data/catalogs/terraclimate.yaml: -------------------------------------------------------------------------------- 1 | plugins: 2 | source: 3 | - module: intake_xarray 4 | 5 | sources: 6 | raw_raster: 7 | metadata: 8 | title: TerraClimate (raw) 9 | summary: Global climate and climaticwater balance data from 1958-2019. 10 | description: | 11 | TerraClimate is a dataset of monthly climate and climatic water balance for global 12 | terrestrial surfaces from 1958-2019. These data provide important inputs for ecological 13 | and hydrological studies at global scales that require high spatial resolution and 14 | time-varying data. All data have monthly temporal resolution and a ~4-km (1/24th degree) 15 | spatial resolution. The data cover the period from 1958-2019. 16 | tags: [climate] 17 | type: application/netcdf 18 | license: Creative Commons Public Domain (CC0) 19 | providers: 20 | - name: Climatology Lab, University of California, Merced 21 | description: Data provided by Dr. John Abatzoglou's Climatology Lab at the University of California, Merced. 22 | url: http://www.climatologylab.org 23 | driver: zarr 24 | args: 25 | urlpath: "{{env(CARBONPLAN_DATA)}}/raw/terraclimate/4000m/raster.zarr" 26 | consolidated: True 27 | 28 | raster: 29 | metadata: 30 | title: TerraClimate (processed) 31 | summary: Climate and climaticwater balance data from 1958-2019. 32 | description: | 33 | TerraClimate is a dataset of monthly climate and climatic water balance for global 34 | terrestrial surfaces from 1958-2019. All data have monthly temporal resolution and a 35 | ~4-km (1/24th degree) spatial resolution. The data cover the period from 1958-2019. 36 | 37 | These data have been processed to CarbonPlan's CONUS and Alaska study domains. 38 | tags: [climate] 39 | type: application/zarr 40 | license: Creative Commons Public Domain (CC0) 41 | providers: 42 | - name: Climatology Lab, University of California, Merced 43 | description: Data provided by Dr. John Abatzoglou's Climatology Lab at the University of California, Merced. 44 | url: http://www.climatologylab.org 45 | ci: skip 46 | parameters: 47 | region: 48 | description: conus or ak 49 | type: str 50 | default: conus 51 | allowed: [conus, ak] 52 | resolution: 53 | description: Pixel resolution in meters 54 | type: str 55 | default: "4000m" 56 | allowed: ["4000m"] 57 | driver: zarr 58 | args: 59 | urlpath: "{{env(CARBONPLAN_DATA)}}/processed/terraclimate/{{ region }}/{{ resolution }}/raster.zarr" 60 | consolidated: True 61 | -------------------------------------------------------------------------------- /carbonplan_data/metadata.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import getpass 4 | import os 5 | import socket 6 | import sys 7 | import time 8 | 9 | from . import __version__ 10 | 11 | 12 | def get_cf_global_attrs(**attrs): 13 | if "history" not in attrs: 14 | attrs["history"] = f"Created: {time.ctime(time.time())}" 15 | 16 | if "insitution" not in attrs: 17 | attrs["institution"] = "CarbonPlan" 18 | 19 | if "source" not in attrs: 20 | attrs["source"] = sys.argv[0] 21 | 22 | if "hostname" not in attrs: 23 | attrs["hostname"] = socket.gethostname() 24 | 25 | if "username" not in attrs: 26 | attrs["username"] = os.getenv("JUPYTERHUB_USER", getpass.getuser()) 27 | 28 | if "version" not in attrs: 29 | attrs["version"] = __version__ 30 | 31 | return attrs 32 | -------------------------------------------------------------------------------- /carbonplan_data/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Based on scikit-learn/sklearn/utils/estimator_checks.py 2 | import itertools 3 | from functools import partial 4 | 5 | 6 | def get_entry_params(entry): 7 | user_parameters = entry.describe()["user_parameters"] 8 | if not user_parameters: 9 | return [] 10 | 11 | keys = [p["name"] for p in user_parameters] 12 | try: 13 | values = [p["allowed"] for p in user_parameters] 14 | except KeyError: 15 | return [] 16 | params = [None] 17 | params.extend([dict(zip(keys, p)) for p in itertools.product(*values)]) 18 | return params 19 | 20 | 21 | def _set_check_ids(obj): 22 | """Create pytest ids for checks. 23 | When `obj` is an intake entry, this returns the pprint version of the 24 | intake entry. When `obj` is a function, the name of the function is 25 | returned with its keyworld arguments. 26 | 27 | Parameters 28 | ---------- 29 | obj : intake entry or function 30 | Items generated by `check_entry` 31 | 32 | Returns 33 | ------- 34 | id : string or None 35 | 36 | See also 37 | -------- 38 | check_entry 39 | """ 40 | if hasattr(obj, "container"): 41 | c = getattr(obj, "_catalog", None) 42 | if c: 43 | name = f"{c.name}.{obj.name}" 44 | else: 45 | name = f"{obj.name}" 46 | return name 47 | if callable(obj): 48 | if not isinstance(obj, partial): 49 | return obj.__name__ 50 | 51 | if not obj.keywords: 52 | return obj.func.__name__ 53 | 54 | kwstring = ",".join([f"{k}={v}" for k, v in obj.keywords.items()]) 55 | return f"{obj.func.__name__}({kwstring})" 56 | 57 | 58 | def parametrize_with_checks(catalog): 59 | """Pytest specific decorator for parametrizing catalog checks. 60 | The `id` of each check is set to be a pprint version of the catalog 61 | and the name of the check with its keyword arguments. 62 | This allows to use `pytest -k` to specify which tests to run:: 63 | pytest test_check_catalogs.py -k check_catalog_metadata 64 | 65 | Parameters 66 | ---------- 67 | catalog : Intake Catalog 68 | Catalog to generated checks for. 69 | 70 | Returns 71 | ------- 72 | decorator : `pytest.mark.parametrize` 73 | 74 | Examples 75 | -------- 76 | >>> from carbonplan.data.tests import parametrize_with_checks 77 | >>> from carbonplan.data import cat 78 | >>> @parametrize_with_checks(cat) 79 | ... def test_catalog(entry, check): 80 | ... check(entry) 81 | ... 82 | 83 | """ 84 | import pytest 85 | 86 | checks_generator = itertools.chain.from_iterable( 87 | check_entry(name, entry) for name, entry in dict(catalog.walk(depth=10)).items() 88 | ) 89 | 90 | checks_with_marks = list( 91 | _mark_xfail_checks(estimator, check, pytest) for estimator, check in checks_generator 92 | ) 93 | 94 | return pytest.mark.parametrize("entry, check", checks_with_marks, ids=_set_check_ids) 95 | 96 | 97 | def _mark_xfail_checks(entry, check, pytest): 98 | # TODO 99 | return entry, check 100 | 101 | 102 | def _yield_all_checks(name, entry): 103 | yield check_entry_metadata 104 | 105 | for params in get_entry_params(entry): 106 | yield partial(check_get_entry_data, params=params) 107 | 108 | 109 | def check_entry(name, entry): 110 | yield from ((entry, partial(check, name)) for check in _yield_all_checks(name, entry)) 111 | 112 | 113 | def check_get_entry_data(name, entry, params=None): 114 | import pytest 115 | 116 | if params is not None: 117 | entry = entry(**params) 118 | else: 119 | entry = entry() 120 | 121 | if entry.container == "catalog": 122 | entry.reload() 123 | elif entry.container in ["xarray", "dataframe"]: 124 | if entry.metadata.get("ci", None) == "skip": 125 | pytest.skip("dataset marked as ci: skip") # TODO: move to _mark_xfail_checks 126 | elif entry.metadata.get("ci", None) == "xfail": 127 | pytest.xfail("dataset marked as ci: xfail") # TODO: move to _mark_xfail_checks 128 | try: 129 | _ = entry.to_dask() 130 | except NotImplementedError: 131 | _ = entry.read() 132 | 133 | 134 | def check_entry_metadata(name, entry): 135 | import pytest 136 | 137 | expected_keys = ["title", "summary", "description", "tags", "license", "providers"] 138 | if entry.container == "catalog": 139 | pytest.skip( 140 | "not checking metadata in top level catalog objects." 141 | ) # TODO: move to _mark_xfail_checks 142 | for key in expected_keys: 143 | assert key in entry().metadata 144 | -------------------------------------------------------------------------------- /carbonplan_data/tests/test_catalogs.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from carbonplan_data import MASTER_CATALOG_PATH, cat 4 | 5 | from . import parametrize_with_checks 6 | 7 | 8 | def test_yaml_catalogs_in_distribution(): 9 | assert os.path.exists(MASTER_CATALOG_PATH) 10 | 11 | 12 | @parametrize_with_checks(cat) 13 | def test_catalog_entries(entry, check): 14 | check(entry) 15 | -------------------------------------------------------------------------------- /carbonplan_data/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import xarray as xr 3 | from zarr.storage import MemoryStore 4 | 5 | from carbonplan_data.utils import get_versions, set_zarr_encoding 6 | 7 | 8 | @pytest.fixture 9 | def temperature(): 10 | ds = xr.tutorial.open_dataset("air_temperature") 11 | return ds 12 | 13 | 14 | def test_set_zarr_encoding(temperature): 15 | store = MemoryStore() 16 | temperature["air"].encoding["foo"] = "bar" 17 | ds = set_zarr_encoding(temperature) 18 | ds.to_zarr(store) 19 | assert "foo" not in ds.air.encoding 20 | assert ds.air.encoding.get("compressor", None) 21 | assert ds.air.encoding.get("_FillValue", None) 22 | 23 | ds = set_zarr_encoding(temperature, float_dtype="float16") 24 | ds.to_zarr(store, mode="w") 25 | assert "f2" in ds.air.dtype.str 26 | 27 | 28 | def test_get_versions(): 29 | versions = get_versions() 30 | assert versions["carbonplan_data"] 31 | -------------------------------------------------------------------------------- /carbonplan_data/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import importlib 4 | import os 5 | import pathlib 6 | import zipfile 7 | 8 | import numpy as np 9 | import validators 10 | import wget 11 | import xarray as xr 12 | import yaml 13 | from numpy.typing import DTypeLike 14 | 15 | root = pathlib.Path(__file__).parents[2] 16 | 17 | # from netCDF4 and netCDF4-python 18 | default_fillvals = { 19 | "S1": "\x00", 20 | "i1": -127, 21 | "u1": 255, 22 | "i2": -32767, 23 | "u2": 65535, 24 | "i4": -2147483647, 25 | "u4": 4294967295, 26 | "i8": -9223372036854775806, 27 | "u8": 18446744073709551614, 28 | "f4": 9.969209968386869e36, 29 | "f8": 9.969209968386869e36, 30 | } 31 | 32 | 33 | def projections(name, region): 34 | if name == "albers": 35 | if region == "conus": 36 | crs = albers_conus_crs() 37 | extent = albers_conus_extent() 38 | elif region == "ak": 39 | crs = albers_ak_crs() 40 | extent = albers_ak_extent() 41 | else: 42 | raise ValueError(f'region "{region}" not found') 43 | else: 44 | raise ValueError(f'projection "{name}" name not found') 45 | return crs, extent 46 | 47 | 48 | def albers_conus_extent(): 49 | return "-2493045.0 177285.0 2342655.0 3310005.0" 50 | 51 | 52 | def albers_conus_crs(): 53 | return ( 54 | 'PROJCS["Albers_Conical_Equal_Area",' 55 | 'GEOGCS["WGS 84",DATUM["WGS_1984",' 56 | 'SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],' 57 | "TOWGS84[0,0,0,-0,-0,-0,0]," 58 | 'AUTHORITY["EPSG","6326"]],' 59 | 'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],' 60 | 'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],' 61 | 'AUTHORITY["EPSG","4326"]],' 62 | 'PROJECTION["Albers_Conic_Equal_Area"],' 63 | 'PARAMETER["standard_parallel_1",29.5],' 64 | 'PARAMETER["standard_parallel_2",45.5],' 65 | 'PARAMETER["latitude_of_center",23],' 66 | 'PARAMETER["longitude_of_center",-96],' 67 | 'PARAMETER["false_easting",0],' 68 | 'PARAMETER["false_northing",0],' 69 | 'UNIT["meters",1]]' 70 | ) 71 | 72 | 73 | def albers_conus_transform(res=4000): 74 | return [res, 0.0, -2493045.0, 0.0, -res, 3310005.0] 75 | 76 | 77 | def albers_ak_extent(): 78 | return "-2232345.0 344805.0 1494735.0 2380125.0" 79 | 80 | 81 | def albers_ak_crs(): 82 | return ( 83 | 'PROJCS["WGS_1984_Albers",' 84 | 'GEOGCS["WGS 84",DATUM["WGS_1984",' 85 | 'SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],' 86 | 'AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],' 87 | 'UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4326"]],' 88 | 'PROJECTION["Albers_Conic_Equal_Area"],' 89 | 'PARAMETER["standard_parallel_1",55],' 90 | 'PARAMETER["standard_parallel_2",65],' 91 | 'PARAMETER["latitude_of_center",50],' 92 | 'PARAMETER["longitude_of_center",-154],' 93 | 'PARAMETER["false_easting",0],' 94 | 'PARAMETER["false_northing",0],' 95 | 'UNIT["metre",1,AUTHORITY["EPSG","9001"]]]' 96 | ) 97 | 98 | 99 | def albers_ak_transform(res=4000): 100 | return [res, 0.0, -2232345.0, 0.0, -res, 2380125.0] 101 | 102 | 103 | def setup(name): 104 | if name == "jeremy": 105 | creds = "/Users/freeman/.config/gcloud/legacy_credentials/jeremy@carbonplan.org/adc.json" 106 | workdir = pathlib.Path("/Users/freeman/workdir/carbonplan-data/") 107 | if name == "joe": 108 | creds = "/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json" 109 | workdir = pathlib.Path("/Users/jhamman/workdir/carbonplan_data_downloads/") 110 | os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds 111 | 112 | from google.cloud import storage 113 | 114 | storage.blob._DEFAULT_CHUNKSIZE = 5 * 1024 * 1024 # 5 MB 115 | storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 # 5 MB 116 | 117 | def upload(src, target, bucket="carbonplan-data"): 118 | storage_client = storage.Client("carbonplan") 119 | bucket = storage_client.bucket(bucket) 120 | blob = bucket.blob(target) 121 | blob.upload_from_filename(src) 122 | 123 | return workdir, upload 124 | 125 | 126 | def get_sources(): 127 | with open(root / "sources.yaml") as f: 128 | sources = yaml.load(f, Loader=yaml.FullLoader) 129 | 130 | return sources 131 | 132 | 133 | def get_workdir(workdir): 134 | # fallback to cwd 135 | if workdir is None: 136 | workdir = os.getcwd() 137 | 138 | # cast to pathlib obj 139 | if isinstance(workdir, str): 140 | workdir = pathlib.Path(workdir) 141 | 142 | # make sure workdir exists 143 | workdir.mkdir(parents=True, exist_ok=True) 144 | 145 | return workdir 146 | 147 | 148 | def process_sources(name, workdir=None): 149 | sources = get_sources() 150 | workdir = get_workdir(workdir) 151 | 152 | results = {"download": [], "unzip": []} 153 | 154 | for key, dset in sources[name]["data"].items(): 155 | # download 156 | if "download" in dset["actions"]: 157 | for url in dset["urlpath"]: 158 | if not validators.url(url): 159 | raise ValueError(f'url "{url}" not valid') 160 | out = workdir / url.name 161 | if not out.exists(): 162 | print(f"downloading {url}") 163 | wget.download(str(url), out=str(out)) 164 | 165 | results["download"].append(out) 166 | 167 | # unzip 168 | if "unzip" in dset["actions"]: 169 | outdir = workdir / out.stem 170 | if not outdir.exists(): 171 | outdir.mkdir(parents=True) 172 | with zipfile.ZipFile(out, "r") as f: 173 | print(f"extracting contents of {out}") 174 | f.extractall(outdir) 175 | 176 | results["unzip"].append(outdir.glob("**/*")) 177 | 178 | return results 179 | 180 | 181 | def set_zarr_encoding( 182 | ds: xr.Dataset, 183 | codec_config: dict | None = None, 184 | float_dtype: DTypeLike | None = None, 185 | int_dtype: DTypeLike | None = None, 186 | ) -> xr.Dataset: 187 | """Set zarr encoding for each variable in the dataset 188 | 189 | Parameters 190 | ---------- 191 | ds : xr.Dataset 192 | Input dataset 193 | codec_config : dict, optional 194 | Dictionary of parameters to pass to numcodecs.get_codec, default is {'id': 'zlib', 'level': 1} 195 | float_dtype : str or dtype, optional 196 | Dtype to cast floating point variables to 197 | 198 | Returns 199 | ------- 200 | ds : xr.Dataset 201 | Output dataset with updated variable encodings 202 | """ 203 | import numcodecs 204 | 205 | ds = ds.copy() 206 | 207 | if codec_config is None: 208 | codec_config = {"id": "zlib", "level": 1} 209 | compressor = numcodecs.get_codec(codec_config) 210 | 211 | for k, da in ds.variables.items(): 212 | # maybe cast float type 213 | if np.issubdtype(da.dtype, np.floating) and float_dtype is not None: 214 | da = da.astype(float_dtype) 215 | 216 | if np.issubdtype(da.dtype, np.integer) and int_dtype is not None: 217 | da = da.astype(int_dtype) 218 | 219 | # remove old encoding 220 | da.encoding.clear() 221 | 222 | # update with new encoding 223 | da.encoding["compressor"] = compressor 224 | try: 225 | del da.atrrs["_FillValue"] 226 | except AttributeError: 227 | pass 228 | da.encoding["_FillValue"] = default_fillvals.get( 229 | da.dtype.str[-2:], None 230 | ) # TODO: handle date/time types 231 | 232 | ds[k] = da 233 | 234 | return ds 235 | 236 | 237 | def get_versions( 238 | packages=[ 239 | "carbonplan", 240 | "carbonplan_data", 241 | "xarray", 242 | "dask", 243 | "numpy", 244 | "scipy", 245 | "fsspec", 246 | "intake", 247 | "rasterio", 248 | "zarr", 249 | ], 250 | ) -> dict[str, str]: 251 | """Helper to fetch commonly used package versions 252 | Parameters 253 | ---------- 254 | packages : list 255 | List of packages to fetch versions for 256 | Returns 257 | ------- 258 | versions : dict 259 | Version dictionary with keys of package names and values of version strings 260 | """ 261 | versions = {"docker_image ": os.getenv("REPO_HASH", None)} 262 | 263 | for p in packages: 264 | try: 265 | mod = importlib.import_module(p) 266 | versions[p] = getattr(mod, "__version__", None) 267 | except ModuleNotFoundError: 268 | versions[p] = None 269 | 270 | return versions 271 | 272 | 273 | def zarr_is_complete(store, check=".zmetadata"): 274 | """Return true if Zarr store is complete""" 275 | return check in store 276 | -------------------------------------------------------------------------------- /ci/environment.yaml: -------------------------------------------------------------------------------- 1 | name: carbonplan-data 2 | channels: 3 | - conda-forge 4 | - nodefaults 5 | dependencies: 6 | - dask 7 | - fastparquet 8 | - gcsfs 9 | - gdal 10 | - google-cloud-storage 11 | - intake<=0.7.0 12 | - intake-geopandas 13 | - intake-parquet 14 | - intake-xarray 15 | - libgdal 16 | - netcdf4 17 | - pandas 18 | - pip 19 | - pooch 20 | - pydap 21 | - pytest 22 | - pytest-cov 23 | - pre-commit 24 | - rasterio 25 | - xarray 26 | - zarr 27 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | require_ci_to_pass: no 3 | max_report_age: off 4 | 5 | comment: false 6 | 7 | ignore: 8 | - "tests/*.py" 9 | - "setup.py" 10 | 11 | coverage: 12 | precision: 2 13 | round: down 14 | status: 15 | project: 16 | default: 17 | target: 95 18 | informational: true 19 | patch: off 20 | changes: off 21 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=64", "setuptools-scm[toml]>=6.2", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "carbonplan-data" 7 | description = "Preprocessing utilities for CarbonPlan's data catalog" 8 | readme = "README.md" 9 | license = { text = "MIT" } 10 | authors = [{ name = "CarbonPlan", email = "tech@carbonplan.org" }] 11 | requires-python = ">=3.9" 12 | classifiers = [ 13 | "Development Status :: 3 - Alpha", 14 | "License :: OSI Approved :: MIT License", 15 | "Operating System :: OS Independent", 16 | "Intended Audience :: Science/Research", 17 | "Programming Language :: Python", 18 | "Programming Language :: Python :: 3", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | "Topic :: Scientific/Engineering", 24 | ] 25 | dynamic = ["version"] 26 | 27 | dependencies = ["intake<=0.7.0", "validators", "wget", "numpy", "xarray"] 28 | 29 | [project.urls] 30 | repository = "https://github.com/carbonplan/data" 31 | 32 | [tool.setuptools.packages.find] 33 | include = ["carbonplan_data*"] 34 | 35 | [tool.setuptools_scm] 36 | local_scheme = "node-and-date" 37 | fallback_version = "999" 38 | 39 | 40 | [tool.black] 41 | line-length = 100 42 | target-version = ['py39'] 43 | skip-string-normalization = true 44 | 45 | 46 | [tool.ruff] 47 | line-length = 100 48 | target-version = "py39" 49 | builtins = ["ellipsis"] 50 | extend-include = ["*.ipynb"] 51 | # Exclude a variety of commonly ignored directories. 52 | exclude = [ 53 | ".bzr", 54 | ".direnv", 55 | ".eggs", 56 | ".git", 57 | ".hg", 58 | ".mypy_cache", 59 | ".nox", 60 | ".pants.d", 61 | ".ruff_cache", 62 | ".svn", 63 | ".tox", 64 | ".venv", 65 | "__pypackages__", 66 | "_build", 67 | "buck-out", 68 | "build", 69 | "dist", 70 | "node_modules", 71 | "venv", 72 | ] 73 | [tool.ruff.lint] 74 | per-file-ignores = {} 75 | ignore = [ 76 | "E721", # Comparing types instead of isinstance 77 | "E741", # Ambiguous variable names 78 | "E501", # Conflicts with ruff format 79 | "E722", # Bare except 80 | ] 81 | select = [ 82 | # Pyflakes 83 | "F", 84 | # Pycodestyle 85 | "E", 86 | "W", 87 | # isort 88 | "I", 89 | # Pyupgrade 90 | "UP", 91 | ] 92 | 93 | 94 | [tool.ruff.lint.mccabe] 95 | max-complexity = 18 96 | 97 | [tool.ruff.lint.isort] 98 | known-first-party = ["carbonplan_data"] 99 | 100 | [tool.pytest.ini_options] 101 | console_output_style = "count" 102 | addopts = "--cov=./ --cov-report=xml --verbose" 103 | -------------------------------------------------------------------------------- /scripts/fia/00_download.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# Download FIA Database\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), June 29, 2020_\n", 12 | "\n", 13 | "This notebook downloads local copies of the FIA database for processing.\n", 14 | "\n", 15 | "**Inputs:**\n", 16 | "\n", 17 | "- sources.yaml\n", 18 | "\n", 19 | "**Outputs:**\n", 20 | "\n", 21 | "- Local copies of the FIA database\n", 22 | "\n", 23 | "**Notes:**\n", 24 | "\n", 25 | "- No reprojection or processing of the data is done in this notebook.\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "import pathlib\n", 42 | "import zipfile\n", 43 | "\n", 44 | "import urlpath\n", 45 | "import wget\n", 46 | "import yaml\n", 47 | "\n", 48 | "workdir = pathlib.Path(\"/Users/jhamman/workdir/carbonplan_data_downloads/fia/\")\n", 49 | "workdir.mkdir(parents=True, exist_ok=True)\n", 50 | "workdir" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "with open(\"../../sources.yaml\") as f:\n", 60 | " sources = yaml.load(f, Loader=yaml.FullLoader)[\"fia\"]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "sources" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "for key, dset in sources[\"data\"].items():\n", 79 | " if \"download\" in dset[\"actions\"]:\n", 80 | " for url in dset[\"urlpath\"]:\n", 81 | " url = urlpath.URL(url)\n", 82 | " out = workdir / url.name\n", 83 | " if not out.exists():\n", 84 | " print(f\"downloading {url}\")\n", 85 | " wget.download(str(url), out=str(out))\n", 86 | "\n", 87 | " if \"unzip\" in dset[\"actions\"]:\n", 88 | " outdir = workdir / out.stem\n", 89 | " if not outdir.exists():\n", 90 | " outdir.mkdir(parents=True)\n", 91 | " with zipfile.ZipFile(out, \"r\") as f:\n", 92 | " print(f\"extracting contents of {out}\")\n", 93 | " f.extractall(outdir)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "print(\"done\")" 103 | ] 104 | } 105 | ], 106 | "metadata": { 107 | "kernelspec": { 108 | "display_name": "Python 3", 109 | "language": "python", 110 | "name": "python3" 111 | }, 112 | "language_info": { 113 | "codemirror_mode": { 114 | "name": "ipython", 115 | "version": 3 116 | }, 117 | "file_extension": ".py", 118 | "mimetype": "text/x-python", 119 | "name": "python", 120 | "nbconvert_exporter": "python", 121 | "pygments_lexer": "ipython3", 122 | "version": "3.7.8" 123 | } 124 | }, 125 | "nbformat": 4, 126 | "nbformat_minor": 4 127 | } 128 | -------------------------------------------------------------------------------- /scripts/fia/01_raw_to_parquet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# FIA to Parquet\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), June 30, 2020_\n", 12 | "\n", 13 | "This notebook converts FIA csv files to Parquet format and stages them in a\n", 14 | "Google Cloud Storage bucket.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- `ENTIRE` directory\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- One Parquet dataset per CSV: `gs://carbonplan-data/raw/fia/.parquet`\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- No reprojection or processing of the data is done in this notebook.\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import gcsfs\n", 36 | "import pandas as pd\n", 37 | "\n", 38 | "from carbonplan_data.utils import setup\n", 39 | "\n", 40 | "# run `gcloud auth login` on the command line, or try switching token to `browser`\n", 41 | "fs = gcsfs.GCSFileSystem(\n", 42 | " project=\"carbonplan\",\n", 43 | " token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n", 44 | ")" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "workdir, upload = setup(\"joe\")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "csvs = (workdir / \"fia/ENTIRE\").glob(\"*csv\")" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "import numpy as np\n", 72 | "\n", 73 | "\n", 74 | "def force_float32(fname):\n", 75 | " memmap = fname.stat().st_size > 1e8\n", 76 | "\n", 77 | " df = pd.read_csv(fname, engine=\"c\", low_memory=False, memory_map=memmap)\n", 78 | " for c in df:\n", 79 | " if \"f8\" in df[c].dtype.str:\n", 80 | " df[c] = df[c].astype(np.float32)\n", 81 | "\n", 82 | " return df" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "def exists(blob):\n", 92 | " try:\n", 93 | " f = fs.open(blob, \"rb\")\n", 94 | " f.close()\n", 95 | " return True\n", 96 | " except:\n", 97 | " return False" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "failed = []\n", 107 | "for fname in csvs:\n", 108 | " blob = f\"carbonplan-data/raw/fia/{fname.stem}.parquet\"\n", 109 | " print(fname.stem)\n", 110 | "\n", 111 | " if \"TREE.csv\" in str(fname):\n", 112 | " continue\n", 113 | "\n", 114 | " if exists(blob):\n", 115 | " continue\n", 116 | "\n", 117 | " df = force_float32(fname)\n", 118 | "\n", 119 | " print(blob)\n", 120 | "\n", 121 | " try:\n", 122 | " df.to_parquet(\n", 123 | " blob,\n", 124 | " compression=\"gzip\",\n", 125 | " open_with=fs.open,\n", 126 | " row_group_offsets=1000,\n", 127 | " engine=\"fastparquet\",\n", 128 | " )\n", 129 | " # consider using dask dataframe here to write to chunked dataframes here.\n", 130 | " print(\" --> \", blob)\n", 131 | " except Exception as e:\n", 132 | " failed.append((fname, e))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "failed" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "# TREE.csv is a special case\n", 151 | "\n", 152 | "import dask.dataframe as dd\n", 153 | "\n", 154 | "row_group_offsets = 1000\n", 155 | "dtype = {\n", 156 | " \"AGENTCD\": \"float64\",\n", 157 | " \"CULL\": \"float64\",\n", 158 | " \"P2A_GRM_FLG\": \"object\",\n", 159 | " \"TREECLCD\": \"float64\",\n", 160 | " \"TREEHISTCD\": \"float64\",\n", 161 | " \"MODIFIED_IN_INSTANCE\": \"float64\",\n", 162 | " \"GST_PNWRS\": \"object\",\n", 163 | " \"SPGRPCD\": \"float64\",\n", 164 | " \"DIAHTCD\": \"float64\",\n", 165 | " \"SUBCYCLE\": \"float64\",\n", 166 | " \"CAVITY_USE_PNWRS\": \"object\",\n", 167 | "}\n", 168 | "\n", 169 | "blob = \"TREE.parquet\"\n", 170 | "\n", 171 | "df = dd.read_csv(\n", 172 | " \"/Users/jhamman/workdir/carbonplan_data_downloads/fia/ENTIRE/TREE.csv\",\n", 173 | " dtype=dtype,\n", 174 | ")\n", 175 | "\n", 176 | "df.to_parquet(\"gs://carbonplan-data/raw/fia/TREE.parquet\")" 177 | ] 178 | } 179 | ], 180 | "metadata": { 181 | "kernelspec": { 182 | "display_name": "Python 3", 183 | "language": "python", 184 | "name": "python3" 185 | }, 186 | "language_info": { 187 | "codemirror_mode": { 188 | "name": "ipython", 189 | "version": 3 190 | }, 191 | "file_extension": ".py", 192 | "mimetype": "text/x-python", 193 | "name": "python", 194 | "nbconvert_exporter": "python", 195 | "pygments_lexer": "ipython3", 196 | "version": "3.8.2" 197 | } 198 | }, 199 | "nbformat": 4, 200 | "nbformat_minor": 4 201 | } 202 | -------------------------------------------------------------------------------- /scripts/fluxnet/01_raw_to_parquet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# FLUXNET to Parquet\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), August 7, 2020_\n", 12 | "\n", 13 | "This notebook converts FLUXNET csv files to Parquet format and stages them in a\n", 14 | "Google Cloud Storage bucket.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- `fluxnet` directory\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- One Parquet dataset per CSV: `gs://carbonplan-data/raw/fluxnet/.parquet`\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- No reprojection or processing of the data is done in this notebook.\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import pathlib\n", 36 | "\n", 37 | "import dask.dataframe as dd\n", 38 | "import gcsfs\n", 39 | "import pandas as pd\n", 40 | "from fsspec.implementations.zip import ZipFileSystem\n", 41 | "from tqdm import tqdm\n", 42 | "\n", 43 | "# run `gcloud auth login` on the command line, or try switching token to `browser`\n", 44 | "fs = gcsfs.GCSFileSystem(\n", 45 | " project=\"carbonplan\",\n", 46 | " token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n", 47 | ")" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "workdir = pathlib.Path(\"/Users/jhamman/workdir/carbonplan_data_downloads/\")" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "storage_options = {\"token\": fs.session.credentials, \"project\": \"carbonplan\"}" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "jupyter": { 73 | "outputs_hidden": true 74 | } 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "zips = (workdir / \"fluxnet\").glob(\"*zip\")\n", 79 | "\n", 80 | "\n", 81 | "def make_fname(stem):\n", 82 | " p = stem.lower().split(\"_\")\n", 83 | " if \"AUX\" in stem:\n", 84 | " name = \"_\".join([p[1], *p[3:4]])\n", 85 | " else:\n", 86 | " name = \"_\".join([p[1], *p[3:5]])\n", 87 | " return name\n", 88 | "\n", 89 | "\n", 90 | "for zipfile in tqdm(zips):\n", 91 | " print(zipfile)\n", 92 | "\n", 93 | " zipfs = ZipFileSystem(zipfile, mode=\"r\")\n", 94 | " csvs = zipfs.glob(\"*csv\")\n", 95 | "\n", 96 | " for csv in csvs:\n", 97 | " fname = pathlib.PosixPath(csv)\n", 98 | " name = make_fname(fname.stem)\n", 99 | " blob = blob = f\"gcs://carbonplan-data/raw/fluxnet/{name}.parquet\"\n", 100 | "\n", 101 | " df = pd.read_csv(zipfs.open(csv, mode=\"rb\"))\n", 102 | " ddf = dd.from_pandas(df, chunksize=1000).repartition(partition_size=\"50MB\")\n", 103 | " ddf.to_parquet(blob, storage_options=storage_options)\n", 104 | "\n", 105 | " print(\"--> \", blob)" 106 | ] 107 | } 108 | ], 109 | "metadata": { 110 | "kernelspec": { 111 | "display_name": "Python 3", 112 | "language": "python", 113 | "name": "python3" 114 | }, 115 | "language_info": { 116 | "codemirror_mode": { 117 | "name": "ipython", 118 | "version": 3 119 | }, 120 | "file_extension": ".py", 121 | "mimetype": "text/x-python", 122 | "name": "python", 123 | "nbconvert_exporter": "python", 124 | "pygments_lexer": "ipython3", 125 | "version": "3.8.2" 126 | } 127 | }, 128 | "nbformat": 4, 129 | "nbformat_minor": 4 130 | } 131 | -------------------------------------------------------------------------------- /scripts/gcp/01_raw_to_parquet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# Global Carbon Project to Parquet\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), August 17, 2020_\n", 12 | "\n", 13 | "This notebook converts faw Excel files from the Global Carbon Project to Parquet\n", 14 | "format and stages them in a Google Cloud Storage bucket.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- `gcp` directory\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- One Parquet dataset per Excel sheet:\n", 23 | " `gs://carbonplan-data/raw/gcp/.parquet`\n", 24 | "\n", 25 | "**Notes:**\n", 26 | "\n", 27 | "- No reprojection or processing of the data is done in this notebook.\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import dask.dataframe as dd\n", 37 | "import gcsfs\n", 38 | "import pandas as pd" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# run `gcloud auth login` on the command line, or try switching token to `browser`\n", 48 | "fs = gcsfs.GCSFileSystem(\n", 49 | " project=\"carbonplan\",\n", 50 | " token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n", 51 | ")\n", 52 | "\n", 53 | "storage_options = {\"token\": fs.session.credentials, \"project\": \"carbonplan\"}\n", 54 | "\n", 55 | "\n", 56 | "def process(fname, target, **open_kwargs):\n", 57 | " df = pd.read_excel(fname, **open_kwargs)\n", 58 | " df = df.loc[:, ~df.columns.str.contains(\"^Unnamed\")]\n", 59 | " df = dd.from_pandas(df, npartitions=1)\n", 60 | " df.to_parquet(target, engine=\"fastparquet\", storage_options=storage_options)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## National Carbon Emissions\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "fname = (\n", 77 | " \"/Users/jhamman/workdir/carbonplan_data_downloads/gcp/National_Carbon_Emissions_2019v1.0.xlsx\"\n", 78 | ")\n", 79 | "\n", 80 | "# Territorial Emissions\n", 81 | "target = \"gs://carbonplan-data/raw/gcp/consumption_emissions.parquet\"\n", 82 | "open_kwargs = dict(sheet_name=\"Territorial Emissions\", skiprows=16, index_col=0)\n", 83 | "process(fname, target, **open_kwargs)\n", 84 | "\n", 85 | "# Consumption Emissions\n", 86 | "target = \"gs://carbonplan-data/raw/gcp/territorial_emissions.parquet\"\n", 87 | "open_kwargs = dict(sheet_name=\"Consumption Emissions\", skiprows=8, index_col=0)\n", 88 | "process(fname, target, **open_kwargs)\n", 89 | "\n", 90 | "# Emissions Transfers\n", 91 | "target = \"gs://carbonplan-data/raw/gcp/transfer_emissions.parquet\"\n", 92 | "open_kwargs = dict(sheet_name=\"Emissions Transfers\", skiprows=8, index_col=0)\n", 93 | "process(fname, target, **open_kwargs)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "## Global Carbon Budget\n" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "fname = \"/Users/jhamman/workdir/carbonplan_data_downloads/gcp/raw_gcb_Global_Carbon_Budget_2019v1.0.xlsx\"\n", 110 | "\n", 111 | "# Global Carbon Budget\n", 112 | "target = \"gs://carbonplan-data/raw/gcp/global_carbon_budget.parquet\"\n", 113 | "open_kwargs = dict(sheet_name=\"Global Carbon Budget\", skiprows=18, index_col=0)\n", 114 | "process(fname, target, **open_kwargs)\n", 115 | "\n", 116 | "# Fossil Emissions by Fuel Type\n", 117 | "target = \"gs://carbonplan-data/raw/gcp/fossil_emissions_by_fuel_type.parquet\"\n", 118 | "open_kwargs = dict(sheet_name=\"Fossil Emissions by Fuel Type\", skiprows=12, index_col=0)\n", 119 | "process(fname, target, **open_kwargs)\n", 120 | "\n", 121 | "# Land-Use Change Emissions\n", 122 | "target = \"gs://carbonplan-data/raw/gcp/land_use_change_emissions.parquet\"\n", 123 | "open_kwargs = dict(sheet_name=\"Land-Use Change Emissions\", skiprows=25, index_col=0)\n", 124 | "process(fname, target, **open_kwargs)\n", 125 | "\n", 126 | "# Ocean Sink\n", 127 | "target = \"gs://carbonplan-data/raw/gcp/ocean_sink.parquet\"\n", 128 | "open_kwargs = dict(sheet_name=\"Ocean Sink\", skiprows=22, index_col=0)\n", 129 | "process(fname, target, **open_kwargs)\n", 130 | "\n", 131 | "# Terrestrial Sink\n", 132 | "target = \"gs://carbonplan-data/raw/gcp/terrestrial_sink.parquet\"\n", 133 | "open_kwargs = dict(sheet_name=\"Terrestrial Sink\", skiprows=23, index_col=0)\n", 134 | "process(fname, target, **open_kwargs)\n", 135 | "\n", 136 | "# Historical Budget\n", 137 | "target = \"gs://carbonplan-data/raw/gcp/historical_budget.parquet\"\n", 138 | "open_kwargs = dict(sheet_name=\"Historical Budget\", skiprows=14, index_col=0)\n", 139 | "process(fname, target, **open_kwargs)" 140 | ] 141 | } 142 | ], 143 | "metadata": { 144 | "kernelspec": { 145 | "display_name": "Python 3", 146 | "language": "python", 147 | "name": "python3" 148 | }, 149 | "language_info": { 150 | "codemirror_mode": { 151 | "name": "ipython", 152 | "version": 3 153 | }, 154 | "file_extension": ".py", 155 | "mimetype": "text/x-python", 156 | "name": "python", 157 | "nbconvert_exporter": "python", 158 | "pygments_lexer": "ipython3", 159 | "version": "3.8.2" 160 | } 161 | }, 162 | "nbformat": 4, 163 | "nbformat_minor": 4 164 | } 165 | -------------------------------------------------------------------------------- /scripts/global-biomass/01_biomass_to_cogs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# Convert Global Biomass data to COGs\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), June 29, 2020_\n", 12 | "\n", 13 | "This notebook converts Global Aboveground and Belowground Biomass Carbon Density\n", 14 | "Maps for the Year 2010 to COG format.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- local copy of biomass rasters\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- Local copies of biomass rasters in COG format\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- No reprojection or processing of the data is done in this notebook.\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import os\n", 36 | "\n", 37 | "from rio_cogeo.cogeo import cog_translate\n", 38 | "from rio_cogeo.profiles import cog_profiles\n", 39 | "\n", 40 | "from carbonplan_data.utils import setup\n", 41 | "\n", 42 | "# This is the COG profile:\n", 43 | "dst_profile = cog_profiles.get(\"deflate\")" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "workdir, upload = setup(\"joe\")" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "keys = {\n", 62 | " \"aboveground_biomass_carbon_2010\": \"aboveground\",\n", 63 | " \"aboveground_biomass_carbon_2010_uncertainty\": \"aboveground_uncertainty\",\n", 64 | " \"belowground_biomass_carbon_2010\": \"belowground\",\n", 65 | " \"belowground_biomass_carbon_2010_uncertainty\": \"belowground_uncertainty\",\n", 66 | "}\n", 67 | "\n", 68 | "for skey, tkey in keys.items():\n", 69 | " # raw file\n", 70 | " source = workdir / f\"Global_Maps_C_Density_2010_1763/data/{skey}.tif\"\n", 71 | "\n", 72 | " # local target\n", 73 | " target = \"./raster.tif\"\n", 74 | "\n", 75 | " # This is where we'll write the COGs when we're done\n", 76 | " cloud_target = f\"raw/2010-harmonized-biomass/global/300m/{tkey}.tif\"\n", 77 | "\n", 78 | " # translate to COG\n", 79 | " cog_translate(source, target, dst_profile)\n", 80 | "\n", 81 | " # Upload to GCS\n", 82 | " upload(target, cloud_target)\n", 83 | "\n", 84 | " # Remove temporary file\n", 85 | " os.remove(target)" 86 | ] 87 | } 88 | ], 89 | "metadata": { 90 | "kernelspec": { 91 | "display_name": "Python 3", 92 | "language": "python", 93 | "name": "python3" 94 | }, 95 | "language_info": { 96 | "codemirror_mode": { 97 | "name": "ipython", 98 | "version": 3 99 | }, 100 | "file_extension": ".py", 101 | "mimetype": "text/x-python", 102 | "name": "python", 103 | "nbconvert_exporter": "python", 104 | "pygments_lexer": "ipython3", 105 | "version": "3.7.8" 106 | } 107 | }, 108 | "nbformat": 4, 109 | "nbformat_minor": 4 110 | } 111 | -------------------------------------------------------------------------------- /scripts/gridmet/01_gridmet_to_zarr.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# gridMET to Zarr\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), June 29, 2020_\n", 12 | "\n", 13 | "This notebook converts the raw gridMET dataset to Zarr format.\n", 14 | "\n", 15 | "**Inputs:**\n", 16 | "\n", 17 | "- intake catalog: `climate.gridmet_opendap`\n", 18 | "\n", 19 | "**Outputs:**\n", 20 | "\n", 21 | "- Cloud copy of gridMET\n", 22 | "\n", 23 | "**Notes:**\n", 24 | "\n", 25 | "- No reprojection or processing of the data is done in this notebook.\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import gcsfs\n", 35 | "import xarray as xr\n", 36 | "from numcodecs.zlib import Zlib\n", 37 | "\n", 38 | "fs = gcsfs.GCSFileSystem(\n", 39 | " project=\"carbonplan\",\n", 40 | " token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n", 41 | ")" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "years = list(range(1979, 2021))\n", 51 | "variables = [\n", 52 | " \"pr\",\n", 53 | " \"tmmn\",\n", 54 | " \"tmmx\",\n", 55 | " \"rmax\",\n", 56 | " \"rmin\",\n", 57 | " \"sph\",\n", 58 | " \"srad\",\n", 59 | " \"th\",\n", 60 | " \"vs\",\n", 61 | " \"bi\",\n", 62 | " \"fm100\",\n", 63 | " \"fm1000\",\n", 64 | " \"erc\",\n", 65 | " \"pdsi\",\n", 66 | " \"etr\",\n", 67 | " \"pet\",\n", 68 | " \"vpd\",\n", 69 | "]" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "source_pattern = \"https://www.northwestknowledge.net/metdata/data/{var}_{year}.nc\"" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "jupyter": { 86 | "outputs_hidden": true 87 | } 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "ds_list = []\n", 92 | "for v in variables:\n", 93 | " print(v)\n", 94 | " ds_list.append(xr.concat([source(variable=v, year=y).to_dask() for y in years], dim=\"day\")) # noqa" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "ds = xr.merge(ds_list, compat=\"override\")\n", 104 | "ds[\"crs\"] = ds_list[0][\"crs\"]\n", 105 | "ds" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "ds.nbytes / 1e9" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "mapper = fs.get_mapper(\"carbonplan-data/raw/gridmet/4km/raster.zarr\")" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "ds = ds.chunk({\"day\": 1000, \"lat\": 256, \"lon\": 256})\n", 133 | "ds" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "encoding = {v: {\"compressor\": Zlib(4)} for v in ds.variables}\n", 143 | "encoding" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "future = ds.to_zarr(mapper, mode=\"w\", encoding=encoding, compute=False)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "from dask.diagnostics import ProgressBar\n", 162 | "\n", 163 | "with ProgressBar():\n", 164 | " future.compute(scheduler=\"threading\")" 165 | ] 166 | } 167 | ], 168 | "metadata": { 169 | "kernelspec": { 170 | "display_name": "Python 3", 171 | "language": "python", 172 | "name": "python3" 173 | }, 174 | "language_info": { 175 | "codemirror_mode": { 176 | "name": "ipython", 177 | "version": 3 178 | }, 179 | "file_extension": ".py", 180 | "mimetype": "text/x-python", 181 | "name": "python", 182 | "nbconvert_exporter": "python", 183 | "pygments_lexer": "ipython3", 184 | "version": "3.8.2" 185 | } 186 | }, 187 | "nbformat": 4, 188 | "nbformat_minor": 4 189 | } 190 | -------------------------------------------------------------------------------- /scripts/iiasa/01_raw_to_parquet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# IIASA to Parquet\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), July 1, 2020_\n", 12 | "\n", 13 | "This notebook converts IIASA CSV and DAT files to Parquet format and stages them\n", 14 | "in a Google Cloud Storage bucket.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- various data files downloaded from IIASA website (manual process).\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- One Parquet dataset per local data file:\n", 23 | " `gs://carbonplan-data-restricted/raw/iiasa/.parquet`\n", 24 | "\n", 25 | "**Notes:**\n", 26 | "\n", 27 | "- No reprojection or processing of the data is done in this notebook.\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import pathlib\n", 37 | "\n", 38 | "import gcsfs\n", 39 | "import pandas as pd\n", 40 | "\n", 41 | "# run `gcloud auth login` on the command line, or try switching token to `browser`\n", 42 | "fs = gcsfs.GCSFileSystem(\n", 43 | " project=\"carbonplan\",\n", 44 | " token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n", 45 | ")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "source_dir = pathlib.Path(\"../../carbonplan_data/iiasa/SSP_CMIP6_201811.csv/\")\n", 55 | "blob_prefix = \"carbonplan-data-restricted/raw/iiasa/SSP_CMIP6_201811\"\n", 56 | "csvs = source_dir.glob(\"*csv\")\n", 57 | "\n", 58 | "for csv in csvs:\n", 59 | " blob = f\"{blob_prefix}/{csv.stem.lower()}.parquet\"\n", 60 | " print(blob)\n", 61 | "\n", 62 | " df = pd.read_csv(csv)\n", 63 | " df.to_parquet(blob, compression=\"gzip\", open_with=fs.open, engine=\"fastparquet\")" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "source_dir = pathlib.Path(\"../../carbonplan_data/iiasa/SSP_IAM_V2_201811.csv/\")\n", 73 | "blob_prefix = \"carbonplan-data-restricted/raw/iiasa/SSP_IAM_V2_201811\"\n", 74 | "csvs = source_dir.glob(\"*csv\")\n", 75 | "\n", 76 | "for csv in csvs:\n", 77 | " blob = f\"{blob_prefix}/{csv.stem.lower()}.parquet\"\n", 78 | " print(blob)\n", 79 | "\n", 80 | " df = pd.read_csv(csv)\n", 81 | " df.to_parquet(blob, compression=\"gzip\", open_with=fs.open, engine=\"fastparquet\")" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "source = \"../../carbonplan_data/iiasa/SspDb_compare_regions_2013-06-12.csv\"\n", 91 | "blob = \"carbonplan-data-restricted/raw/iiasa/SspDb_compare_regions_2013-06-12.parquet\"\n", 92 | "df = pd.read_csv(source)\n", 93 | "df.to_parquet(blob, compression=\"gzip\", open_with=fs.open, engine=\"fastparquet\")" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "source = \"../../carbonplan_data/iiasa/SspDb_country_data_2013-06-12.csv\"\n", 103 | "blob = \"carbonplan-data-restricted/raw/iiasa/SspDb_country_data_2013-06-12.parquet\"\n", 104 | "df = pd.read_csv(source)\n", 105 | "df.to_parquet(blob, compression=\"gzip\", open_with=fs.open, engine=\"fastparquet\")" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "# TODO:\n", 113 | "\n", 114 | "- write parser for RCP DAT files." 115 | ] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.8.2" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 4 139 | } 140 | -------------------------------------------------------------------------------- /scripts/mtbs/01_raw_to_cogs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# MTBS to Cloud Optimized GeoTIFF\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), June 5, 2020_\n", 12 | "\n", 13 | "This notebook converts MTBS 30m yearly rasters to Cloud Optimized GeoTIFF and\n", 14 | "stages them in a Google Cloud Storage bucket.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- `DATA.zip` from MTBS website\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- One COG per year: `gs://carbonplan-data/raw/MTBS/30m//raster.tif`\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- No reprojection or processing of the data is done in this notebook.\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import io\n", 36 | "import os.path\n", 37 | "\n", 38 | "import gcsfs\n", 39 | "from fsspec.implementations import zip\n", 40 | "from rasterio.io import MemoryFile\n", 41 | "from rio_cogeo.cogeo import cog_translate\n", 42 | "from rio_cogeo.profiles import cog_profiles\n", 43 | "\n", 44 | "# run `gcloud auth login` on the command line, or try switching token to `browser`\n", 45 | "fs = gcsfs.GCSFileSystem(\n", 46 | " project=\"carbonplan\",\n", 47 | " token=\"/Users/jhamman/.config/gcloud/legacy_credentials/joe@carbonplan.org/adc.json\",\n", 48 | ")" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "The input for this script is a zip file called `DATA.zip`. This was downloaded\n", 56 | "from: https://www.mtbs.gov/direct-download Specifically, it came from:\n", 57 | "\n", 58 | "```\n", 59 | " - [select] Burn Severity Mosaics\n", 60 | " -> [select] Continental U.S.\n", 61 | " -> [click] all years\n", 62 | " -> [click] Download 34 Files\n", 63 | "```\n", 64 | "\n", 65 | "This file does not need to be un-zipped for the rest of the script to run.\n" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "# raw zip file\n", 75 | "raw_zips = \"~/Downloads/DATA.zip\"\n", 76 | "\n", 77 | "# This is where we'll write the COGs when we're done\n", 78 | "bucket = \"carbonplan-data/raw/MTBS/30m/\"\n", 79 | "\n", 80 | "# This is the COG profile:\n", 81 | "dst_profile = cog_profiles.get(\"deflate\")" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "def translate(fo, out_file):\n", 91 | " \"\"\"translate a file object (`fo`) to cloud optimized geotiff\n", 92 | "\n", 93 | " the resulting COG is written to the filesystem (`fs`) defined above.\n", 94 | " \"\"\"\n", 95 | " dst_profile = cog_profiles.get(\"deflate\")\n", 96 | " with MemoryFile() as mem_dst:\n", 97 | " # Important, we pass `mem_dst.name` as output dataset path\n", 98 | " cog_translate(fo, mem_dst.name, dst_profile, in_memory=True)\n", 99 | " print(f\"writing to {out_file}\")\n", 100 | " with fs.open(out_file, \"wb\") as f:\n", 101 | " f.write(mem_dst.read())" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "# iterate through the zip file, extracting individual years\n", 111 | "# write only files with `tif` or `htm` suffixes to the cloud bucket\n", 112 | "# Warning: this step takes a while to run, go get some coffee.\n", 113 | "root = zip.ZipFileSystem(raw_zips).get_mapper(\"composite_data\")\n", 114 | "for key in root:\n", 115 | " year = key.split(\"/\")[1]\n", 116 | " sub = io.BytesIO(root[key])\n", 117 | " r2 = zip.ZipFileSystem(sub).get_mapper(\"\")\n", 118 | "\n", 119 | " for fname in r2:\n", 120 | " if fname.endswith(\"tif\"):\n", 121 | " fo = io.BytesIO(r2[fname])\n", 122 | " out_name = os.path.join(bucket, f\"{year}.tif\")\n", 123 | " translate(fo, out_name)\n", 124 | " elif fname.endswith(\"htm\"):\n", 125 | " out_name = os.path.join(bucket, f\"{year}.htm\")\n", 126 | " with fs.open(out_name, \"wb\") as f:\n", 127 | " f.write(r2[fname])\n", 128 | " else:\n", 129 | " continue\n", 130 | " print(f\"done with {out_name}\")" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 3", 137 | "language": "python", 138 | "name": "python3" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 3 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython3", 150 | "version": "3.8.2" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 4 155 | } 156 | -------------------------------------------------------------------------------- /scripts/mtbs/02_downsampling_and_reprojection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# MTBS downsampling and reprojection\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), August 5, 2020_\n", 12 | "\n", 13 | "This notebook downsamples and reprojects MTBS 250m yearly rasters stored in\n", 14 | "Cloud Optimized GeoTIFF into 250m and 4000m GeoTIFFs.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- COG outputs from `01_mtbs_to_cogs.ipynb`\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- COG outputs after downsampling and reprojection\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- Source CRS and projection extent come from MTBS\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import os" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "from carbonplan_data.utils import projections, setup\n", 45 | "\n", 46 | "workdir, upload = setup(\"joe\")\n", 47 | "workdir" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "from rio_cogeo.cogeo import cog_translate\n", 57 | "from rio_cogeo.profiles import cog_profiles\n", 58 | "\n", 59 | "dst_profile = cog_profiles.get(\"deflate\")" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "def get_files(region):\n", 69 | " return [\n", 70 | " {\n", 71 | " \"source\": workdir / f\"mtbs/{region}_foresttype/250m/raster.tif\",\n", 72 | " \"target\": f\"processed/nftd/{region}/{resolution}m/type.tif\",\n", 73 | " },\n", 74 | " {\n", 75 | " \"source\": workdir / f\"nftd/{region}_forestgroup/250m/raster.tif\",\n", 76 | " \"target\": f\"processed/nftd/{region}/{resolution}m/group.tif\",\n", 77 | " },\n", 78 | " {\n", 79 | " \"source\": workdir / f\"nftd/{region}_foresttype/250m/error.tif\",\n", 80 | " \"target\": f\"processed/nftd/{region}/{resolution}m/type_error.tif\",\n", 81 | " },\n", 82 | " {\n", 83 | " \"source\": workdir / f\"nftd/{region}_forestgroup/250m/error.tif\",\n", 84 | " \"target\": f\"processed/nftd/{region}/{resolution}m/group_error.tif\",\n", 85 | " },\n", 86 | " ]" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "for resolution in [250, 4000]:\n", 96 | " for region in [\"ak\", \"conus\"]:\n", 97 | " files = get_files(region)\n", 98 | " crs, extent = projections(\"albers\", region)\n", 99 | " for f in files:\n", 100 | " if \"error\" in str(f[\"source\"]):\n", 101 | " resampling = \"average\"\n", 102 | " elif resolution == 4000:\n", 103 | " resampling = \"mode\"\n", 104 | " else:\n", 105 | " resampling = \"near\"\n", 106 | " cmd = (\"gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} {} {}\").format(\n", 107 | " crs,\n", 108 | " extent,\n", 109 | " resolution,\n", 110 | " resolution,\n", 111 | " resampling,\n", 112 | " f[\"source\"],\n", 113 | " \"./raster.tif\",\n", 114 | " )\n", 115 | " os.system(cmd)\n", 116 | " cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n", 117 | " upload(\"./raster.tif\", f[\"target\"])\n", 118 | " os.remove(\"./raster.tif\")" 119 | ] 120 | } 121 | ], 122 | "metadata": { 123 | "kernelspec": { 124 | "display_name": "Python 3", 125 | "language": "python", 126 | "name": "python3" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.8.2" 139 | } 140 | }, 141 | "nbformat": 4, 142 | "nbformat_minor": 4 143 | } 144 | -------------------------------------------------------------------------------- /scripts/mtbs/02_mtbs_to_zarr.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# MTBS to Zarr\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), June 17, 2020_\n", 12 | "\n", 13 | "This notebook converts MTBS 30m yearly rasters stored in Cloud Optimized GeoTIFF\n", 14 | "and stages them in a single Zarr archive.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- COG outputs from `01_raw_to_cogs.ipynb`\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- 1 Zarr archive:\n", 23 | " `gs://carbonplan-data/processed/MTBS/raster.zarr//`\n", 24 | "\n", 25 | "**Notes:**\n", 26 | "\n", 27 | "- In the process of processing this dataset, we found that the behavior in\n", 28 | " rasterio's `reproject` function was sensitive to the package version for\n", 29 | " rasterio and/or gdal. Versions we found to work were\n", 30 | " `rasterio=1.0.25,gdal=2.4.2`. Versions that we found to fail were\n", 31 | " `rasterio=1.1.5,gdal=3.1.0`\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import os\n", 41 | "\n", 42 | "import gcsfs\n", 43 | "import numpy as np\n", 44 | "import rasterio\n", 45 | "import xarray as xr\n", 46 | "from numcodecs.zlib import Zlib\n", 47 | "from rasterio import Affine\n", 48 | "from rasterio.crs import CRS\n", 49 | "from rasterio.warp import Resampling, reproject, transform\n", 50 | "\n", 51 | "scratch = os.environ[\"SCRATCH\"]" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "def base_crs():\n", 61 | " return (\n", 62 | " 'PROJCS[\"Albers_Conical_Equal_Area\",'\n", 63 | " 'GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",'\n", 64 | " 'SPHEROID[\"WGS 84\",6378137,298.257223563,AUTHORITY[\"EPSG\",\"7030\"]],'\n", 65 | " \"TOWGS84[0,0,0,-0,-0,-0,0],\"\n", 66 | " 'AUTHORITY[\"EPSG\",\"6326\"]],'\n", 67 | " 'PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],'\n", 68 | " 'UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],'\n", 69 | " 'AUTHORITY[\"EPSG\",\"4326\"]],'\n", 70 | " 'PROJECTION[\"Albers_Conic_Equal_Area\"],'\n", 71 | " 'PARAMETER[\"standard_parallel_1\",29.5],'\n", 72 | " 'PARAMETER[\"standard_parallel_2\",45.5],'\n", 73 | " 'PARAMETER[\"latitude_of_center\",23],'\n", 74 | " 'PARAMETER[\"longitude_of_center\",-96],'\n", 75 | " 'PARAMETER[\"false_easting\",0],'\n", 76 | " 'PARAMETER[\"false_northing\",0],'\n", 77 | " 'UNIT[\"meters\",1]]'\n", 78 | " )" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "def make_dst_band(src_band, src_resolution):\n", 88 | " left = -2493045.0\n", 89 | " right = 2342655.0\n", 90 | " top = 3310005.0\n", 91 | " bottom = 177285.0\n", 92 | " dst_transform = Affine(30.0, 0.0, left, 0.0, -30.0, top)\n", 93 | " dst_resolution = dst_transform[0]\n", 94 | " dst_transform = dst_transform * Affine.scale(\n", 95 | " src_resolution / dst_resolution, src_resolution / dst_resolution\n", 96 | " )\n", 97 | " dst_crs = CRS.from_wkt(base_crs())\n", 98 | "\n", 99 | " dst_shape = [\n", 100 | " round((top - bottom) / src_resolution),\n", 101 | " round((right - left) / src_resolution),\n", 102 | " ]\n", 103 | "\n", 104 | " dst_band = np.zeros(dst_shape, np.float32)\n", 105 | " return dst_band, dst_transform, dst_crs, dst_shape\n", 106 | "\n", 107 | "\n", 108 | "def calc_coords(shape, trans, crs):\n", 109 | " ny, nx = shape\n", 110 | "\n", 111 | " # crs coords\n", 112 | " x, _ = trans * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5)\n", 113 | " _, y = trans * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5)\n", 114 | "\n", 115 | " # convert to lat/lon\n", 116 | " xs, ys = np.meshgrid(x, y)\n", 117 | " lon, lat = transform(crs, {\"init\": \"EPSG:4326\"}, xs.flatten(), ys.flatten())\n", 118 | "\n", 119 | " return {\n", 120 | " \"x\": xr.DataArray(x, dims=(\"x\",)),\n", 121 | " \"y\": xr.DataArray(y, dims=(\"y\",)),\n", 122 | " \"lat\": xr.DataArray(np.asarray(lat).reshape((ny, nx)), dims=(\"y\", \"x\")),\n", 123 | " \"lon\": xr.DataArray(np.asarray(lon).reshape((ny, nx)), dims=(\"y\", \"x\")),\n", 124 | " }\n", 125 | "\n", 126 | "\n", 127 | "def prepare_mtbs(year, resolution, return_ds=True):\n", 128 | " src_path = f\"gs://carbonplan-data/raw/MTBS/30m/{year}.tif\"\n", 129 | " with rasterio.open(src_path, \"r\") as src_raster:\n", 130 | " src_transform = src_raster.meta[\"transform\"]\n", 131 | " src_crs = src_raster.meta[\"crs\"]\n", 132 | " src_band = src_raster.read(1)\n", 133 | " src_resolution = resolution\n", 134 | "\n", 135 | " dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(src_band, src_resolution)\n", 136 | " print(\"calc_coords\")\n", 137 | " coords = calc_coords(dst_shape, dst_transform, dst_crs)\n", 138 | "\n", 139 | " src_nodata = 6\n", 140 | " if resolution == 30:\n", 141 | " resampling = Resampling.nearest\n", 142 | " elif resolution > 30:\n", 143 | " resampling = Resampling.average\n", 144 | " # set moderate or high burn severity to 1 and others to 1\n", 145 | " src_band_tmp = ((src_band == 3) | (src_band == 4)).astype(\"uint8\")\n", 146 | " # set masked regions to nodata value\n", 147 | " src_band_tmp[src_band == src_nodata] = src_nodata\n", 148 | " src_band = src_band_tmp\n", 149 | " dst_band = dst_band.astype(\"float32\") # convert to float for averaging\n", 150 | "\n", 151 | " print(\"reproject\")\n", 152 | " # this seems to require rasterio=1.0.25 and gdal=2.4.2\n", 153 | " reproject(\n", 154 | " src_band,\n", 155 | " dst_band,\n", 156 | " src_transform=src_transform,\n", 157 | " src_crs=src_crs,\n", 158 | " dst_transform=dst_transform,\n", 159 | " dst_crs=dst_crs,\n", 160 | " resampling=resampling,\n", 161 | " src_nodata=src_nodata,\n", 162 | " dst_nodata=src_raster.meta[\"nodata\"],\n", 163 | " )\n", 164 | "\n", 165 | " meta = src_raster.meta\n", 166 | " meta.update(\n", 167 | " width=dst_shape[0],\n", 168 | " height=dst_shape[1],\n", 169 | " dtype=str(dst_band.dtype),\n", 170 | " crs=dst_crs.to_wkt(),\n", 171 | " transform=list(dst_transform),\n", 172 | " nodata=src_raster.meta[\"nodata\"],\n", 173 | " )\n", 174 | "\n", 175 | " varname = f\"{year}\"\n", 176 | " chunks = {\"x\": 512, \"y\": 512}\n", 177 | " ds = xr.DataArray(dst_band, dims=(\"y\", \"x\"), attrs=meta).to_dataset(name=varname)\n", 178 | " ds = ds.assign_coords(coords).chunk(chunks)\n", 179 | "\n", 180 | " if return_ds:\n", 181 | " return ds\n", 182 | " else:\n", 183 | " fs = gcsfs.GCSFileSystem(project=\"carbonplan\", token=\"cloud\", requester_pays=True)\n", 184 | " mapper = fs.get_mapper(scratch + f\"/MTBS.{year}.{resolution}m.zarr\")\n", 185 | " ds.to_zarr(store=mapper, mode=\"w\", encoding={varname: {\"compressor\": Zlib()}})" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "years = list(range(1984, 2018))\n", 195 | "\n", 196 | "dsets = [prepare_mtbs(y, 4000) for y in years]" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "varname = \"burned_area\"\n", 206 | "da = xr.merge(dsets).to_array(dim=\"time\", name=varname)\n", 207 | "da[\"time\"] = da.time.astype(int)\n", 208 | "ds = da.to_dataset()\n", 209 | "ds[varname].attrs.update(dsets[0][\"1984\"].attrs)\n", 210 | "ds" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "fs = gcsfs.GCSFileSystem(project=\"carbonplan\", token=\"cloud\", requester_pays=True)\n", 220 | "mapper = fs.get_mapper(\"carbonplan-data/processed/MTBS/raster.zarr\")\n", 221 | "\n", 222 | "ds.to_zarr(\n", 223 | " store=mapper,\n", 224 | " group=\"4000m\",\n", 225 | " mode=\"w\",\n", 226 | " encoding={varname: {\"compressor\": Zlib()}},\n", 227 | ")" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "ds[varname].sum(\"time\").plot(robust=True)" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "ds[varname]" 246 | ] 247 | } 248 | ], 249 | "metadata": { 250 | "kernelspec": { 251 | "display_name": "Python 3", 252 | "language": "python", 253 | "name": "python3" 254 | }, 255 | "language_info": { 256 | "codemirror_mode": { 257 | "name": "ipython", 258 | "version": 3 259 | }, 260 | "file_extension": ".py", 261 | "mimetype": "text/x-python", 262 | "name": "python", 263 | "nbconvert_exporter": "python", 264 | "pygments_lexer": "ipython3", 265 | "version": "3.7.8" 266 | } 267 | }, 268 | "nbformat": 4, 269 | "nbformat_minor": 4 270 | } 271 | -------------------------------------------------------------------------------- /scripts/mtbs/04_mtbs_perims_to_raster.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# MTBS Perimeters to Zarr\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), November 3, 2020_\n", 12 | "\n", 13 | "This notebook converts MTBS fire perimeters to monthly burned area rasters\n", 14 | "\n", 15 | "**Inputs:**\n", 16 | "\n", 17 | "- MTBS fire perimeters shapefile\n", 18 | "\n", 19 | "**Outputs:**\n", 20 | "\n", 21 | "- 1 Zarr archive:\n", 22 | " `gs://carbonplan-data/processed/mtbs/conus/{res}m/monthly_perims_raster.zarr`\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- Text defining large and very large fires from Barbero et al. (2015):\n", 27 | " > The Monitoring Trends in Burn Severity (MTBS) data- base was used to acquire\n", 28 | " > fire location, fire discovery date and burned area for LFs over the\n", 29 | " > contiguous US from 1984 to 2010. We excluded fires smaller than 404ha and\n", 30 | " > further eliminated 'unburned to low' burned area for each fire as classified\n", 31 | " > by MTBS to more accurately portray the true area burned (Kolden et al 2012).\n", 32 | " > While the definition of VLFs is subjective and likely geographically\n", 33 | " > dependent, we define VLFs as fires whose size exceeds the 90th percentile\n", 34 | " > (5073 ha) of MTBS fires greater than 404 ha (n = 927) (figure 1(b)) and LF\n", 35 | " > as fires whose size was below the 90th percentile but greater than 404 ha (n\n", 36 | " > = 8343)(figure 1(c)).\n" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "import geopandas\n", 46 | "import hvplot.pandas # noqa\n", 47 | "import numpy as np\n", 48 | "import pandas as pd\n", 49 | "import rasterio\n", 50 | "import xarray as xr\n", 51 | "from carbonplan.data import cat\n", 52 | "from rasterio.features import rasterize" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "months = pd.date_range(\"1984-01\", \"2018-12\", freq=\"MS\")" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "# mask = cat.nlcd.raster.read().squeeze(drop=True)\n", 71 | "\n", 72 | "region = \"conus\"\n", 73 | "\n", 74 | "mask = rasterio.open(cat.mtbs.raw_raster._urlpath)\n", 75 | "transform = mask.transform\n", 76 | "shape = mask.shape\n", 77 | "src_profile = mask.profile\n", 78 | "\n", 79 | "# TODO: replace with intake use\n", 80 | "perims = geopandas.GeoDataFrame.from_file(\"mtbs_perimeter_data/mtbs_perims_DD/mtbs_perims_DD.shp\")" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "# note we set all start days to 1 (so we can easily group by month later)\n", 90 | "dates = pd.DatetimeIndex(\n", 91 | " [pd.to_datetime(f\"{r.Year}-{r.StartMonth}-1\") for _, r in perims.iterrows()]\n", 92 | ")\n", 93 | "perims.index = dates\n", 94 | "perims = perims.sort_index()\n", 95 | "perims[\"ha\"] = perims[\"Acres\"] * 0.40468564224\n", 96 | "perims[\"ym\"] = dates\n", 97 | "perims = perims.to_crs(crs=mask.crs)\n", 98 | "perims" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "pattern = \"Wild*|Out*|Unknown|Complex\"\n", 108 | "perims = perims[perims.Fire_Type.str.contains(pattern)]\n", 109 | "\n", 110 | "perims_lf = perims[perims.ha.between(404, 5073)]\n", 111 | "perims_vlf = perims[perims.ha > 5073]\n", 112 | "perims_vlf" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "def rasterize_geom(geoms):\n", 122 | " r = rasterize(\n", 123 | " [(geom, 1) for geom in geoms],\n", 124 | " out_shape=shape,\n", 125 | " transform=transform,\n", 126 | " fill=0,\n", 127 | " merge_alg=rasterio.enums.MergeAlg.replace,\n", 128 | " all_touched=True,\n", 129 | " dtype=rasterio.uint8,\n", 130 | " )\n", 131 | " return r" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "perims_vlf[[\"ha\", \"geometry\", \"ym\"]][\"2018\":\"2018\"].to_crs(\"EPSG:4326\").hvplot(\n", 141 | " c=\"ha\", geo=True, coastline=True\n", 142 | ")" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "from rasterio.io import MemoryFile\n", 152 | "from rio_cogeo.cogeo import cog_translate\n", 153 | "from rio_cogeo.profiles import cog_profiles" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "from gcsfs import GCSFileSystem\n", 163 | "\n", 164 | "\n", 165 | "def copy_to_fs(source, dst, fs):\n", 166 | " with open(source, \"rb\") as fsource:\n", 167 | " with fs.open(dst, \"wb\") as fdst:\n", 168 | " fdst.write(fsource.read())\n", 169 | "\n", 170 | "\n", 171 | "def numpy_to_cog(data, out_fname=\"temp_cog.tif\"):\n", 172 | " with MemoryFile() as memfile:\n", 173 | " with memfile.open(**src_profile) as mem:\n", 174 | " # Populate the input file with numpy array\n", 175 | " mem.write(r, indexes=1)\n", 176 | "\n", 177 | " dst_profile = cog_profiles.get(\"deflate\")\n", 178 | " cog_translate(\n", 179 | " mem,\n", 180 | " out_fname,\n", 181 | " dst_profile,\n", 182 | " in_memory=True,\n", 183 | " quiet=True,\n", 184 | " )\n", 185 | "\n", 186 | "\n", 187 | "fs = GCSFileSystem()" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "# unocomment to start over\n", 197 | "# paths = fs.glob('carbonplan-data/processed/mtbs/conus/30m/*f_????.??.tif')\n", 198 | "# fs.rm(paths)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "# make an empty file we can copy to each month without any fires\n", 208 | "r = np.zeros(shape, dtype=rasterio.uint8)\n", 209 | "numpy_to_cog(r, \"empty_cog.tif\")" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "dst_profile = cog_profiles.get(\"deflate\")\n", 219 | "\n", 220 | "for month in months:\n", 221 | " for name, df in [(\"lf\", perims_lf), (\"vlf\", perims_vlf)]:\n", 222 | " out_fname = (\n", 223 | " f\"carbonplan-data/processed/mtbs/{region}/30m/{name}_{month.strftime('%Y.%m')}.tif\"\n", 224 | " )\n", 225 | "\n", 226 | " if fs.exists(out_fname):\n", 227 | " print(f\"{out_fname} exists, skipping...\")\n", 228 | " continue\n", 229 | "\n", 230 | " try:\n", 231 | " geom = df.loc[[month]].geometry\n", 232 | " print(geom)\n", 233 | " print(f\"rasterizing {month}\")\n", 234 | " r = rasterize_geom(geom)\n", 235 | " numpy_to_cog(r, \"temp_cog.tif\")\n", 236 | " copy_to_fs(\"temp_cog.tif\", out_fname, fs)\n", 237 | " except (KeyError, ValueError) as e:\n", 238 | " print(f\"raised error: {e}\")\n", 239 | " print(f\"copying empty cog to {out_fname}\")\n", 240 | " copy_to_fs(\"empty_cog.tif\", out_fname, fs)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "import intake\n", 250 | "from dask.diagnostics import ProgressBar\n", 251 | "\n", 252 | "cat2 = intake.open_catalog(\n", 253 | " \"https://raw.githubusercontent.com/carbonplan/data/master/carbonplan_data/catalogs/mtbs.yaml\"\n", 254 | ")\n", 255 | "dates = [f\"2018.{m:02d}\" for m in range(1, 13)]\n", 256 | "da = xr.concat(\n", 257 | " [cat2.rasterized_perims(size=\"vlf\", date=d).to_dask().squeeze(drop=True) for d in dates],\n", 258 | " dim=xr.Variable(\"time\", dates),\n", 259 | ")\n", 260 | "\n", 261 | "with ProgressBar():\n", 262 | " da_sum = da.sum(\"time\").coarsen(x=133, y=133, boundary=\"trim\").mean().load()\n", 263 | "da_sum" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "da_sum.where(da_sum).plot(vmax=0.01, vmin=0, cmap=\"Greys\")" 273 | ] 274 | } 275 | ], 276 | "metadata": { 277 | "kernelspec": { 278 | "display_name": "Python 3", 279 | "language": "python", 280 | "name": "python3" 281 | }, 282 | "language_info": { 283 | "codemirror_mode": { 284 | "name": "ipython", 285 | "version": 3 286 | }, 287 | "file_extension": ".py", 288 | "mimetype": "text/x-python", 289 | "name": "python", 290 | "nbconvert_exporter": "python", 291 | "pygments_lexer": "ipython3", 292 | "version": "3.8.5" 293 | } 294 | }, 295 | "nbformat": 4, 296 | "nbformat_minor": 4 297 | } 298 | -------------------------------------------------------------------------------- /scripts/mtbs/05_monthly_downsampling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# MTBS monthly downsampling and reprojection\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), August 5, 2020_\n", 12 | "\n", 13 | "This notebook downsamples and reprojects monthly MTBS 30m rasters stored in\n", 14 | "Cloud Optimized GeoTIFF 4000m GeoTIFFs.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- Monthly COGs\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- COG outputs after downsampling and reprojection\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- Source CRS and projection extent come from MTBS\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import os" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "from carbonplan_data.utils import projections, setup\n", 45 | "\n", 46 | "workdir, upload = setup(\"jeremy\")\n", 47 | "workdir" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "from rio_cogeo.cogeo import cog_translate\n", 57 | "from rio_cogeo.profiles import cog_profiles\n", 58 | "\n", 59 | "dst_profile = cog_profiles.get(\"deflate\")" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "def get_file(region, fire, year, month):\n", 69 | " return {\n", 70 | " \"source\": (\n", 71 | " workdir / f\"processed/mtbs/{region}/30m/{fire}_{year}.{month:02g}.tif\"\n", 72 | " ).as_posix(),\n", 73 | " \"target\": f\"processed/mtbs/{region}/4000m/tif/{fire}.{month:02g}.tif\",\n", 74 | " }" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "resolution = 4000\n", 84 | "\n", 85 | "for year in [1984]:\n", 86 | " for month in [1]:\n", 87 | " for fire in [\"vlf\"]:\n", 88 | " for region in [\"ak\", \"conus\"]:\n", 89 | " f = get_file(region, fire, year, month)\n", 90 | " crs, extent = projections(\"albers\", region)\n", 91 | " resampling = \"average\"\n", 92 | " cmd = (\"gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} {} {}\").format(\n", 93 | " crs,\n", 94 | " extent,\n", 95 | " resolution,\n", 96 | " resolution,\n", 97 | " resampling,\n", 98 | " f[\"source\"],\n", 99 | " \"./raster.tif\",\n", 100 | " )\n", 101 | " os.system(cmd)\n", 102 | " cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n", 103 | " upload(\"./raster.tif\", f[\"target\"])\n", 104 | " os.remove(\"./raster.tif\")" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "files = [get_file(\"conus\", \"vlf\", 1984, month)[\"source\"] for month in range(12)]" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "resolution = 4000\n", 123 | "\n", 124 | "for year in [2017]:\n", 125 | " for region in [\"conus\"]:\n", 126 | " for fire in [\"vlf\"]:\n", 127 | " files = [get_file(\"conus\", \"vlf\", year, month + 1)[\"source\"] for month in range(12)]\n", 128 | " crs, extent = projections(\"albers\", region)\n", 129 | " resampling = \"sum\"\n", 130 | " cmd = (\"gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} {} {}\").format(\n", 131 | " crs,\n", 132 | " extent,\n", 133 | " resolution,\n", 134 | " resolution,\n", 135 | " resampling,\n", 136 | " \" \".join(files),\n", 137 | " \"./raster.tif\",\n", 138 | " )\n", 139 | " print(cmd)\n", 140 | " os.system(cmd)\n", 141 | " # cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n", 142 | " # upload(\"./raster.tif\", f[\"target\"])\n", 143 | " # os.remove(\"./raster.tif\")" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "import rasterio\n", 153 | "\n", 154 | "r = rasterio.open(\"/Users/freeman/workdir/carbonplan-data/processed/mtbs/conus/30m/vlf_2018.09.tif\")\n", 155 | "im = r.read(1)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "r = rasterio.open(\"./raster.tif\")\n", 165 | "im = r.read(1)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "%matplotlib inline" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "from showit import image" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "im.sum()" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "image(im, size=12, clim=(0, 1))" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "from carbonplan_forests import load" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "mtbs = load.mtbs(store=\"local\")" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "before = mtbs.groupby(\"time.year\").sum().sel(year=2018)[\"vlf\"]" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "image(1 - before, size=12, clim=(0, 1))" 238 | ] 239 | } 240 | ], 241 | "metadata": { 242 | "kernelspec": { 243 | "display_name": "Python 3", 244 | "language": "python", 245 | "name": "python3" 246 | }, 247 | "language_info": { 248 | "codemirror_mode": { 249 | "name": "ipython", 250 | "version": 3 251 | }, 252 | "file_extension": ".py", 253 | "mimetype": "text/x-python", 254 | "name": "python", 255 | "nbconvert_exporter": "python", 256 | "pygments_lexer": "ipython3", 257 | "version": "3.7.9" 258 | } 259 | }, 260 | "nbformat": 4, 261 | "nbformat_minor": 4 262 | } 263 | -------------------------------------------------------------------------------- /scripts/mtbs/05_monthly_mtbs_to_zarr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import rasterio 4 | import xarray as xr 5 | from numcodecs.zlib import Zlib 6 | from rasterio import Affine 7 | from rasterio.crs import CRS 8 | from rasterio.warp import Resampling, reproject, transform 9 | 10 | 11 | def base_crs(): 12 | return ( 13 | 'PROJCS["Albers_Conical_Equal_Area",' 14 | 'GEOGCS["WGS 84",DATUM["WGS_1984",' 15 | 'SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],' 16 | "TOWGS84[0,0,0,-0,-0,-0,0]," 17 | 'AUTHORITY["EPSG","6326"]],' 18 | 'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],' 19 | 'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],' 20 | 'AUTHORITY["EPSG","4326"]],' 21 | 'PROJECTION["Albers_Conic_Equal_Area"],' 22 | 'PARAMETER["standard_parallel_1",29.5],' 23 | 'PARAMETER["standard_parallel_2",45.5],' 24 | 'PARAMETER["latitude_of_center",23],' 25 | 'PARAMETER["longitude_of_center",-96],' 26 | 'PARAMETER["false_easting",0],' 27 | 'PARAMETER["false_northing",0],' 28 | 'UNIT["meters",1]]' 29 | ) 30 | 31 | 32 | def make_dst_band(src_band, src_resolution): 33 | left = -2493045.0 34 | right = 2342655.0 35 | top = 3310005.0 36 | bottom = 177285.0 37 | dst_transform = Affine(30.0, 0.0, left, 0.0, -30.0, top) 38 | dst_resolution = dst_transform[0] 39 | dst_transform = dst_transform * Affine.scale( 40 | src_resolution / dst_resolution, src_resolution / dst_resolution 41 | ) 42 | dst_crs = CRS.from_wkt(base_crs()) 43 | dst_shape = [ 44 | round((top - bottom) / src_resolution), 45 | round((right - left) / src_resolution), 46 | ] 47 | dst_band = np.zeros(dst_shape, np.float32) 48 | return dst_band, dst_transform, dst_crs, dst_shape 49 | 50 | 51 | def calc_coords(shape, trans, crs): 52 | ny, nx = shape 53 | x, _ = trans * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) 54 | _, y = trans * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) 55 | xs, ys = np.meshgrid(x, y) 56 | lon, lat = transform(crs, {"init": "EPSG:4326"}, xs.flatten(), ys.flatten()) 57 | 58 | return { 59 | "x": xr.DataArray(x, dims=("x",)), 60 | "y": xr.DataArray(y, dims=("y",)), 61 | "lat": xr.DataArray(np.asarray(lat).reshape((ny, nx)), dims=("y", "x")), 62 | "lon": xr.DataArray(np.asarray(lon).reshape((ny, nx)), dims=("y", "x")), 63 | } 64 | 65 | 66 | src_nodata = 6 67 | resampling = Resampling.average 68 | resolution = 4000 69 | years = np.arange(1984, 2019) 70 | months = np.arange(1, 13) 71 | 72 | for year in years: 73 | print(f"starting year {year}") 74 | src_path_year = f"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/severity/{year}.tif" 75 | 76 | with rasterio.open(src_path_year, "r") as src_raster_year: 77 | src_transform = src_raster_year.meta["transform"] 78 | src_crs = src_raster_year.meta["crs"] 79 | src_band_year = src_raster_year.read(1) 80 | src_resolution = resolution 81 | 82 | dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(src_band_year, src_resolution) 83 | coords = calc_coords(dst_shape, dst_transform, dst_crs) 84 | 85 | for month in months: 86 | print(f"processing month {month}") 87 | varname = f"{year}.{month:02n}" 88 | src_path_month = ( 89 | f"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/area/{varname}.tif" 90 | ) 91 | 92 | with rasterio.open(src_path_month, "r") as src_raster_month: 93 | src_band_month = src_raster_month.read(1) 94 | src_band_tmp = src_band_month * ( 95 | (src_band_year == 3) | (src_band_year == 4) 96 | ).astype("uint8") 97 | src_band_tmp[src_band_year == src_nodata] = src_nodata 98 | src_band_month = src_band_tmp 99 | 100 | dst_band, dst_transform, dst_crs, dst_shape = make_dst_band( 101 | src_band_year, src_resolution 102 | ) 103 | dst_band = dst_band.astype("float32") 104 | 105 | # this seems to require rasterio=1.0.25 and gdal=2.4.2 106 | reproject( 107 | src_band_month, 108 | dst_band, 109 | src_transform=src_transform, 110 | src_crs=src_crs, 111 | dst_transform=dst_transform, 112 | dst_crs=dst_crs, 113 | resampling=resampling, 114 | src_nodata=src_nodata, 115 | dst_nodata=src_raster_year.meta["nodata"], 116 | ) 117 | 118 | meta = src_raster_year.meta 119 | meta.update( 120 | width=dst_shape[0], 121 | height=dst_shape[1], 122 | dtype=str(dst_band.dtype), 123 | crs=dst_crs.to_wkt(), 124 | transform=list(dst_transform), 125 | nodata=src_raster_year.meta["nodata"], 126 | ) 127 | 128 | chunks = {"x": 512, "y": 512} 129 | ds = xr.DataArray(dst_band, dims=("y", "x"), attrs=meta).to_dataset(name=varname) 130 | ds = ds.assign_coords(coords).chunk(chunks) 131 | 132 | ds.to_zarr(f"{varname}.zarr", mode="w", encoding={varname: {"compressor": Zlib()}}) 133 | 134 | results = [] 135 | for year in years: 136 | for month in months: 137 | varname = f"{year}.{month:02n}" 138 | ds = xr.open_zarr(f"{varname}.zarr") 139 | ds = ds.chunk({"x": 1209, "y": 783}) 140 | results.append(ds[varname]) 141 | 142 | dates = pd.date_range("1984-01", "2018-12", freq="MS") 143 | ds = xr.concat(results, xr.Variable("time", dates)) 144 | ds.name = "monthly" 145 | ds["x"] = range(len(ds["x"])) 146 | ds["y"] = range(len(ds["y"])) 147 | ds = ds.to_dataset() 148 | chunks = {"time": 1, "x": 1209, "y": 783} 149 | ds = ds.chunk(chunks) 150 | 151 | ds.to_zarr( 152 | "/Users/freeman/workdir/carbonplan-data/processed/mtbs/conus/4000m/monthly.zarr", 153 | mode="w", 154 | encoding={"monthly": {"compressor": Zlib()}}, 155 | consolidated=True, 156 | ) 157 | -------------------------------------------------------------------------------- /scripts/mtbs/06_annual_downsampling.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | from rio_cogeo.cogeo import cog_translate 5 | from rio_cogeo.profiles import cog_profiles 6 | 7 | from carbonplan_data.utils import projections, setup 8 | 9 | dst_profile = cog_profiles.get("deflate") 10 | 11 | 12 | workdir, upload = setup("jeremy") 13 | workdir 14 | 15 | resolution = 30 16 | 17 | for region in ["conus"]: 18 | for year in np.arange(1984, 2019): 19 | source = (workdir / f"raw/mtbs/{region}/30m/{year}.tif").as_posix() 20 | print(source) 21 | crs, extent = projections("albers", region) 22 | resampling = "nearest" 23 | cmd = ("gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} {} {}").format( 24 | crs, 25 | extent, 26 | resolution, 27 | resolution, 28 | resampling, 29 | source, 30 | "./raster.tif", 31 | ) 32 | os.system(cmd) 33 | cog_translate("./raster.tif", f"./{year}.tif", dst_profile) 34 | os.remove("./raster.tif") 35 | -------------------------------------------------------------------------------- /scripts/mtbs/06_annual_mtbs_to_zarr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import rasterio 3 | import xarray as xr 4 | from numcodecs.zlib import Zlib 5 | from rasterio import Affine 6 | from rasterio.crs import CRS 7 | from rasterio.warp import Resampling, reproject, transform 8 | 9 | 10 | def base_crs(): 11 | return ( 12 | 'PROJCS["Albers_Conical_Equal_Area",' 13 | 'GEOGCS["WGS 84",DATUM["WGS_1984",' 14 | 'SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],' 15 | "TOWGS84[0,0,0,-0,-0,-0,0]," 16 | 'AUTHORITY["EPSG","6326"]],' 17 | 'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],' 18 | 'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],' 19 | 'AUTHORITY["EPSG","4326"]],' 20 | 'PROJECTION["Albers_Conic_Equal_Area"],' 21 | 'PARAMETER["standard_parallel_1",29.5],' 22 | 'PARAMETER["standard_parallel_2",45.5],' 23 | 'PARAMETER["latitude_of_center",23],' 24 | 'PARAMETER["longitude_of_center",-96],' 25 | 'PARAMETER["false_easting",0],' 26 | 'PARAMETER["false_northing",0],' 27 | 'UNIT["meters",1]]' 28 | ) 29 | 30 | 31 | def make_dst_band(src_band, src_resolution): 32 | left = -2493045.0 33 | right = 2342655.0 34 | top = 3310005.0 35 | bottom = 177285.0 36 | dst_transform = Affine(30.0, 0.0, left, 0.0, -30.0, top) 37 | dst_resolution = dst_transform[0] 38 | dst_transform = dst_transform * Affine.scale( 39 | src_resolution / dst_resolution, src_resolution / dst_resolution 40 | ) 41 | dst_crs = CRS.from_wkt(base_crs()) 42 | dst_shape = [ 43 | round((top - bottom) / src_resolution), 44 | round((right - left) / src_resolution), 45 | ] 46 | dst_band = np.zeros(dst_shape, np.float32) 47 | return dst_band, dst_transform, dst_crs, dst_shape 48 | 49 | 50 | def calc_coords(shape, trans, crs): 51 | ny, nx = shape 52 | x, _ = trans * (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) 53 | _, y = trans * (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) 54 | xs, ys = np.meshgrid(x, y) 55 | lon, lat = transform(crs, {"init": "EPSG:4326"}, xs.flatten(), ys.flatten()) 56 | 57 | return { 58 | "x": xr.DataArray(x, dims=("x",)), 59 | "y": xr.DataArray(y, dims=("y",)), 60 | "lat": xr.DataArray(np.asarray(lat).reshape((ny, nx)), dims=("y", "x")), 61 | "lon": xr.DataArray(np.asarray(lon).reshape((ny, nx)), dims=("y", "x")), 62 | } 63 | 64 | 65 | src_nodata = 6 66 | resampling = Resampling.average 67 | resolution = 4000 68 | years = np.arange(1984, 2019) 69 | months = np.arange(1, 13) 70 | 71 | for year in years: 72 | print(f"starting year {year}") 73 | src_path_year = f"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/severity/{year}.tif" 74 | 75 | with rasterio.open(src_path_year, "r") as src_raster_year: 76 | src_transform = src_raster_year.meta["transform"] 77 | src_crs = src_raster_year.meta["crs"] 78 | src_band_year = src_raster_year.read(1) 79 | src_resolution = resolution 80 | 81 | dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(src_band_year, src_resolution) 82 | coords = calc_coords(dst_shape, dst_transform, dst_crs) 83 | 84 | for month in months: 85 | print(f"processing month {month}") 86 | varname = f"{year}.{month:02n}" 87 | src_path_month = ( 88 | f"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/area/{varname}.tif" 89 | ) 90 | 91 | with rasterio.open(src_path_month, "r") as src_raster_month: 92 | if month == 1: 93 | src_band_month = src_raster_month.read(1) 94 | else: 95 | src_band_month += src_raster_month.read(1) 96 | 97 | src_band_month[src_band_month > 1] = 1 98 | src_band_tmp = src_band_month * ((src_band_year == 3) | (src_band_year == 4)).astype( 99 | "uint8" 100 | ) 101 | src_band_tmp[src_band_year == src_nodata] = src_nodata 102 | 103 | dst_band = dst_band.astype("float32") 104 | 105 | # this seems to require rasterio=1.0.25 and gdal=2.4.2 106 | reproject( 107 | src_band_tmp, 108 | dst_band, 109 | src_transform=src_transform, 110 | src_crs=src_crs, 111 | dst_transform=dst_transform, 112 | dst_crs=dst_crs, 113 | resampling=resampling, 114 | src_nodata=src_nodata, 115 | dst_nodata=src_raster_year.meta["nodata"], 116 | ) 117 | 118 | meta = src_raster_year.meta 119 | meta.update( 120 | width=dst_shape[0], 121 | height=dst_shape[1], 122 | dtype=str(dst_band.dtype), 123 | crs=dst_crs.to_wkt(), 124 | transform=list(dst_transform), 125 | nodata=src_raster_year.meta["nodata"], 126 | ) 127 | 128 | chunks = {"x": 512, "y": 512} 129 | ds = xr.DataArray(dst_band, dims=("y", "x"), attrs=meta).to_dataset(name=f"{year}") 130 | ds = ds.assign_coords(coords).chunk(chunks) 131 | 132 | ds.to_zarr(f"{year}.zarr", mode="w", encoding={f"{year}": {"compressor": Zlib()}}) 133 | 134 | # results = [] 135 | # for year in years: 136 | # varname = f'{year}' 137 | # ds = xr.open_zarr(f'{varname}.zarr') 138 | # results.append(ds[varname]) 139 | 140 | # dates = pd.date_range('1984', '2018', freq='YS') 141 | # ds = xr.concat(results, xr.Variable('time', dates)) 142 | # ds.name = 'annual' 143 | # ds['x'] = range(len(ds['x'])) 144 | # ds['y'] = range(len(ds['y'])) 145 | # ds = ds.to_dataset() 146 | # chunks = ({'time': 1, 'x': 1209, 'y': 783}) 147 | # ds = ds.chunk(chunks) 148 | # ds.to_zarr( 149 | # '/Users/freeman/workdir/carbonplan-data/processed/mtbs/conus/4000m/annual.zarr', 150 | # mode='w', encoding={'annual': {'compressor': Zlib()}} 151 | # ) 152 | -------------------------------------------------------------------------------- /scripts/mtbs/README.md: -------------------------------------------------------------------------------- 1 | # MTBS Burned Area 2 | -------------------------------------------------------------------------------- /scripts/mtbs/prepare.py: -------------------------------------------------------------------------------- 1 | import rasterio 2 | import zarr 3 | from numcodecs.zlib import Zlib 4 | from numpy import zeros 5 | from rasterio import Affine 6 | from rasterio.crs import CRS 7 | from rasterio.warp import Resampling, reproject 8 | 9 | RAW_PATH = "/Users/freeman/data/treeplan/raw/" 10 | PROCESSED_PATH = "/Users/freeman/github/carbonplan/data/processed/" 11 | 12 | 13 | def base_crs(): 14 | return ( 15 | 'PROJCS["Albers_Conical_Equal_Area",' 16 | 'GEOGCS["WGS 84",DATUM["WGS_1984",' 17 | 'SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],' 18 | "TOWGS84[0,0,0,-0,-0,-0,0]," 19 | 'AUTHORITY["EPSG","6326"]],' 20 | 'PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],' 21 | 'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],' 22 | 'AUTHORITY["EPSG","4326"]],' 23 | 'PROJECTION["Albers_Conic_Equal_Area"],' 24 | 'PARAMETER["standard_parallel_1",29.5],' 25 | 'PARAMETER["standard_parallel_2",45.5],' 26 | 'PARAMETER["latitude_of_center",23],' 27 | 'PARAMETER["longitude_of_center",-96],' 28 | 'PARAMETER["false_easting",0],' 29 | 'PARAMETER["false_northing",0],' 30 | 'UNIT["meters",1]]' 31 | ) 32 | 33 | 34 | def make_dst_band(src_band, src_resolution): 35 | left = -2493045.0 36 | right = 2342655.0 37 | top = 3310005.0 38 | bottom = 177285.0 39 | dst_transform = Affine(30.0, 0.0, left, 0.0, -30.0, top) 40 | dst_resolution = dst_transform[0] 41 | dst_transform = dst_transform * Affine.scale( 42 | src_resolution / dst_resolution, src_resolution / dst_resolution 43 | ) 44 | dst_crs = CRS.from_wkt(base_crs()) 45 | 46 | dst_shape = [ 47 | round((top - bottom) / src_resolution), 48 | round((right - left) / src_resolution), 49 | ] 50 | 51 | dst_band = zeros(dst_shape, src_band.dtype) 52 | return dst_band, dst_transform, dst_crs, dst_shape 53 | 54 | 55 | def prepare_mtbs(year, resolution): 56 | src_path = RAW_PATH + f"MTBS/{year}/mtbs_CONUS_{year}.tif" 57 | src_raster = rasterio.open(src_path) 58 | src_transform = src_raster.meta["transform"] 59 | src_crs = src_raster.meta["crs"] 60 | src_band = src_raster.read(1) 61 | src_resolution = resolution 62 | 63 | dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(src_band, src_resolution) 64 | 65 | if resolution == 30: 66 | resampling = Resampling.nearest 67 | elif resolution > 30: 68 | resampling = Resampling.average 69 | # set moderate or high burn severity to 1 and others to 1 70 | src_band_tmp = ((src_band == 3) | (src_band == 4)).astype("uint8") 71 | # set masked regions to nodata value 72 | src_band_tmp[src_band == 6] = 6 73 | src_band = src_band_tmp 74 | dst_band = dst_band.astype("float32") # convert to float for averaging 75 | src_nodata = 6 76 | 77 | reproject( 78 | src_band, 79 | dst_band, 80 | src_transform=src_transform, 81 | src_crs=src_crs, 82 | dst_transform=dst_transform, 83 | dst_crs=dst_crs, 84 | resampling=resampling, 85 | src_nodata=src_nodata, 86 | dst_nodata=src_raster.meta["nodata"], 87 | ) 88 | 89 | meta = src_raster.meta 90 | meta.update( 91 | width=dst_shape[0], 92 | height=dst_shape[1], 93 | dtype=str(dst_band.dtype), 94 | crs=dst_crs.to_wkt(), 95 | transform=list(dst_transform), 96 | nodata=src_raster.meta["nodata"], 97 | ) 98 | 99 | store = zarr.open(PROCESSED_PATH + f"MTBS.{year}.{resolution}m.zarr", "w") 100 | store.attrs.put(meta) 101 | store.array("0", dst_band, chunks=(512, 512), compressor=Zlib()) 102 | 103 | 104 | years = ["%s" % (d + 1984) for d in range(2018 - 1984)] 105 | [prepare_mtbs(year, 500) for year in years] 106 | -------------------------------------------------------------------------------- /scripts/nftd/00_download.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# Download NFTD\n", 10 | "\n", 11 | "_by Jeremy Freeman (CarbonPlan), August 1, 2020_\n", 12 | "\n", 13 | "This notebook downloads local copies of NFTD (National Forest Type Dataset)\n", 14 | "datasets for processing.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- sources.yaml\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- Local copies of NFTD data\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- No reprojection or processing of the data is done in this notebook.\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import pathlib\n", 36 | "import zipfile\n", 37 | "\n", 38 | "import urlpath\n", 39 | "import wget\n", 40 | "import yaml\n", 41 | "\n", 42 | "workdir = pathlib.Path(\"/Users/freeman/workdir/carbonplan_data_downloads/nftd/\")\n", 43 | "workdir.mkdir(parents=True, exist_ok=True)\n", 44 | "workdir" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "with open(\"../../sources.yaml\") as f:\n", 54 | " sources = yaml.load(f, Loader=yaml.FullLoader)[\"usfs\"]" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "for key, dset in sources[\"data\"].items():\n", 64 | " if \"download\" in dset[\"actions\"]:\n", 65 | " for url in dset[\"urlpath\"]:\n", 66 | " url = urlpath.URL(url)\n", 67 | " out = workdir / url.name\n", 68 | " if not out.exists():\n", 69 | " print(f\"downloading {url}\")\n", 70 | " wget.download(str(url), out=str(out))\n", 71 | "\n", 72 | " if \"unzip\" in dset[\"actions\"]:\n", 73 | " outdir = workdir / out.stem\n", 74 | " if not outdir.exists():\n", 75 | " outdir.mkdir(parents=True)\n", 76 | " with zipfile.ZipFile(out, \"r\") as f:\n", 77 | " print(f\"extracting contents of {out}\")\n", 78 | " f.extractall(outdir)" 79 | ] 80 | } 81 | ], 82 | "metadata": { 83 | "kernelspec": { 84 | "display_name": "Python 3", 85 | "language": "python", 86 | "name": "python3" 87 | }, 88 | "language_info": { 89 | "codemirror_mode": { 90 | "name": "ipython", 91 | "version": 3 92 | }, 93 | "file_extension": ".py", 94 | "mimetype": "text/x-python", 95 | "name": "python", 96 | "nbconvert_exporter": "python", 97 | "pygments_lexer": "ipython3", 98 | "version": "3.7.6" 99 | } 100 | }, 101 | "nbformat": 4, 102 | "nbformat_minor": 2 103 | } 104 | -------------------------------------------------------------------------------- /scripts/nftd/01_nftd_to_cogs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# Convert NFTD to COGs\n", 10 | "\n", 11 | "_by Jeremy Freeman (CarbonPlan), August 1, 2020_\n", 12 | "\n", 13 | "This notebook converts local copies of NFTD datasets to cloud optimized\n", 14 | "GeoTIFFs.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- downloaded files from UFSF raster gateway\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- Cloud optimized GeoTIFFs\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- No reprojection or processing of the data is done in this notebook.\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import os\n", 36 | "import pathlib\n", 37 | "\n", 38 | "from google.cloud import storage\n", 39 | "from rio_cogeo.cogeo import cog_translate\n", 40 | "from rio_cogeo.profiles import cog_profiles" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "storage.blob._DEFAULT_CHUNKSIZE = 5 * 1024 * 1024 # 5 MB\n", 50 | "storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 # 5 MB" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "dst_profile = cog_profiles.get(\"deflate\")\n", 60 | "os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = (\n", 61 | " \"/Users/freeman/.config/gcloud/legacy_credentials/jeremy@carbonplan.org/adc.json\"\n", 62 | ")\n", 63 | "\n", 64 | "\n", 65 | "def upload(src, target, bucket=\"carbonplan-data\"):\n", 66 | " storage_client = storage.Client(\"carbonplan\")\n", 67 | " bucket = storage_client.bucket(bucket)\n", 68 | " blob = bucket.blob(target)\n", 69 | " blob.upload_from_filename(src)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "## Iterate over datasets\n" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "# conus forest group\n", 86 | "workdir = pathlib.Path(\"/Users/freeman/workdir/carbonplan_data_downloads/nftd/\")\n", 87 | "source = workdir / \"conus_forestgroup\" / \"conus_forestgroup.img\"\n", 88 | "target = workdir / \"conus_forestgroup\" / \"raster.tif\"\n", 89 | "cloud = \"raw/nftd/conus_forestgroup/30m/raster.tif\"" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "files = [\n", 99 | " {\n", 100 | " \"source\": workdir / \"conus_forestgroup\" / \"conus_forestgroup.img\",\n", 101 | " \"target\": workdir / \"conus_forestgroup\" / \"raster.tif\",\n", 102 | " \"cloud\": \"raw/nftd/conus_forestgroup/250m/raster.tif\",\n", 103 | " },\n", 104 | " {\n", 105 | " \"source\": workdir / \"conus_forestgroup\" / \"conus_forestgroup_error.img\",\n", 106 | " \"target\": workdir / \"conus_forestgroup\" / \"error.tif\",\n", 107 | " \"cloud\": \"raw/nftd/conus_forestgroup/250m/error.tif\",\n", 108 | " },\n", 109 | " {\n", 110 | " \"source\": workdir / \"conus_forest-type\" / \"conus_foresttype.img\",\n", 111 | " \"target\": workdir / \"conus_forest-type\" / \"raster.tif\",\n", 112 | " \"cloud\": \"raw/nftd/conus_foresttype/250m/raster.tif\",\n", 113 | " },\n", 114 | " {\n", 115 | " \"source\": workdir / \"conus_forest-type\" / \"conus_foresttype_error.img\",\n", 116 | " \"target\": workdir / \"conus_forest-type\" / \"error.tif\",\n", 117 | " \"cloud\": \"raw/nftd/conus_foresttype/250m/error.tif\",\n", 118 | " },\n", 119 | " {\n", 120 | " \"source\": workdir / \"ak_forestgroup\" / \"alaska_forestgroup_63360.img\",\n", 121 | " \"target\": workdir / \"ak_forestgroup\" / \"raster.tif\",\n", 122 | " \"cloud\": \"raw/nftd/ak_forestgroup/250m/raster.tif\",\n", 123 | " },\n", 124 | " {\n", 125 | " \"source\": workdir / \"ak_forestgroup\" / \"alaska_forestgroup_confidence_63360.img\",\n", 126 | " \"target\": workdir / \"ak_forestgroup\" / \"error.tif\",\n", 127 | " \"cloud\": \"raw/nftd/ak_forestgroup/250m/error.tif\",\n", 128 | " },\n", 129 | " {\n", 130 | " \"source\": workdir / \"ak_forest-type\" / \"alaska_foresttype_63360.img\",\n", 131 | " \"target\": workdir / \"ak_forest-type\" / \"raster.tif\",\n", 132 | " \"cloud\": \"raw/nftd/ak_foresttype/250m/raster.tif\",\n", 133 | " },\n", 134 | " {\n", 135 | " \"source\": workdir / \"ak_forest-type\" / \"alaska_foresttype_confidence_63360.img\",\n", 136 | " \"target\": workdir / \"ak_forest-type\" / \"error.tif\",\n", 137 | " \"cloud\": \"raw/nftd/ak_foresttype/250m/error.tif\",\n", 138 | " },\n", 139 | "]" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "# convert to cogs\n", 149 | "[cog_translate(f[\"source\"], f[\"target\"], dst_profile) for f in files]" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "# upload to cloud storage\n", 159 | "[upload(f[\"target\"], f[\"cloud\"]) for f in files]" 160 | ] 161 | } 162 | ], 163 | "metadata": { 164 | "kernelspec": { 165 | "display_name": "Python 3", 166 | "language": "python", 167 | "name": "python3" 168 | }, 169 | "language_info": { 170 | "codemirror_mode": { 171 | "name": "ipython", 172 | "version": 3 173 | }, 174 | "file_extension": ".py", 175 | "mimetype": "text/x-python", 176 | "name": "python", 177 | "nbconvert_exporter": "python", 178 | "pygments_lexer": "ipython3", 179 | "version": "3.8.2" 180 | } 181 | }, 182 | "nbformat": 4, 183 | "nbformat_minor": 4 184 | } 185 | -------------------------------------------------------------------------------- /scripts/nftd/02_downsampling_and_reprojection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# NFTD downsampling and reprojection\n", 10 | "\n", 11 | "_by Jeremy Freeman (CarbonPlan), August 2, 2020_\n", 12 | "\n", 13 | "This notebook downsamples and reprojects NFTD 250m yearly rasters stored in\n", 14 | "Cloud Optimized GeoTIFF into 250m and 4000m GeoTIFFs.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- COG outputs from `01_nftd_to_cogs.ipynb`\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- COG outputs after downsampling and reprojection\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- Source CRS and projection extent come from NLCD\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import os" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "from carbonplan_data.utils import projections, setup\n", 45 | "\n", 46 | "workdir, upload = setup(\"jeremy\")" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "import rasterio\n", 56 | "from rio_cogeo.cogeo import cog_translate\n", 57 | "from rio_cogeo.profiles import cog_profiles\n", 58 | "\n", 59 | "dst_profile = cog_profiles.get(\"deflate\")" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "def get_files(region, resolution):\n", 69 | " return [\n", 70 | " {\n", 71 | " \"source\": workdir / f\"raw/nftd/{region}_foresttype/250m/raster.tif\",\n", 72 | " \"target\": f\"processed/nftd/{region}/{resolution}m/type.tif\",\n", 73 | " },\n", 74 | " {\n", 75 | " \"source\": workdir / f\"raw/nftd/{region}_forestgroup/250m/raster.tif\",\n", 76 | " \"target\": f\"processed/nftd/{region}/{resolution}m/group.tif\",\n", 77 | " },\n", 78 | " {\n", 79 | " \"source\": workdir / f\"raw/nftd/{region}_foresttype/250m/error.tif\",\n", 80 | " \"target\": f\"processed/nftd/{region}/{resolution}m/type_error.tif\",\n", 81 | " },\n", 82 | " {\n", 83 | " \"source\": workdir / f\"raw/nftd/{region}_forestgroup/250m/error.tif\",\n", 84 | " \"target\": f\"processed/nftd/{region}/{resolution}m/group_error.tif\",\n", 85 | " },\n", 86 | " ]" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "for resolution in [250, 4000]:\n", 96 | " for region in [\"ak\", \"conus\"]:\n", 97 | " files = get_files(region, resolution)\n", 98 | " crs, extent = projections(\"albers\", region)\n", 99 | " for f in files:\n", 100 | " if \"error\" in str(f[\"source\"]):\n", 101 | " resampling = \"average\"\n", 102 | " elif resolution == 4000:\n", 103 | " resampling = \"mode\"\n", 104 | " else:\n", 105 | " resampling = \"near\"\n", 106 | " cmd = (\"gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} {} {}\").format(\n", 107 | " crs,\n", 108 | " extent,\n", 109 | " resolution,\n", 110 | " resolution,\n", 111 | " resampling,\n", 112 | " f[\"source\"],\n", 113 | " \"./raster.tif\",\n", 114 | " )\n", 115 | " os.system(cmd)\n", 116 | " cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n", 117 | " upload(\"./raster.tif\", f[\"target\"])\n", 118 | " os.remove(\"./raster.tif\")" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "### downsample using thresholding\n" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "groups = [\n", 135 | " 100,\n", 136 | " 120,\n", 137 | " 140,\n", 138 | " 160,\n", 139 | " 180,\n", 140 | " 200,\n", 141 | " 220,\n", 142 | " 240,\n", 143 | " 260,\n", 144 | " 280,\n", 145 | " 300,\n", 146 | " 320,\n", 147 | " 340,\n", 148 | " 360,\n", 149 | " 370,\n", 150 | " 380,\n", 151 | " 400,\n", 152 | " 500,\n", 153 | " 600,\n", 154 | " 700,\n", 155 | " 800,\n", 156 | " 900,\n", 157 | " 910,\n", 158 | " 920,\n", 159 | " 940,\n", 160 | " 950,\n", 161 | " 980,\n", 162 | " 990,\n", 163 | "]" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "for resolution in [4000]:\n", 173 | " for region in [\"ak\", \"conus\"]:\n", 174 | " src = rasterio.open(workdir / f\"processed/nftd/{region}/250m/group.tif\")\n", 175 | " band = src.read(1)\n", 176 | " profile = src.profile\n", 177 | " profile[\"dtype\"] = \"uint8\"\n", 178 | "\n", 179 | " for group in groups:\n", 180 | " print(f\"region {region} cat {group}\")\n", 181 | " crs, extent = projections(\"albers\", region)\n", 182 | " out = (band == group).astype(rasterio.uint8)\n", 183 | " resampling = \"average\"\n", 184 | "\n", 185 | " with rasterio.open(\"./thresholded.tif\", \"w\", **profile) as dst:\n", 186 | " dst.write(out, 1)\n", 187 | "\n", 188 | " cmd = (\"gdalwarp -t_srs '{}' -te {} -tr {} {} -r {} -ot Float32 {} {}\").format(\n", 189 | " crs,\n", 190 | " extent,\n", 191 | " resolution,\n", 192 | " resolution,\n", 193 | " resampling,\n", 194 | " \"./thresholded.tif\",\n", 195 | " \"./raster.tif\",\n", 196 | " )\n", 197 | "\n", 198 | " os.system(cmd)\n", 199 | " cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n", 200 | " upload(\n", 201 | " \"./raster.tif\",\n", 202 | " f\"processed/nftd/{region}/{resolution}m/group_g{group}.tif\",\n", 203 | " )\n", 204 | " os.remove(\"./thresholded.tif\")\n", 205 | " os.remove(\"./raster.tif\")" 206 | ] 207 | } 208 | ], 209 | "metadata": { 210 | "kernelspec": { 211 | "display_name": "Python 3", 212 | "language": "python", 213 | "name": "python3" 214 | }, 215 | "language_info": { 216 | "codemirror_mode": { 217 | "name": "ipython", 218 | "version": 3 219 | }, 220 | "file_extension": ".py", 221 | "mimetype": "text/x-python", 222 | "name": "python", 223 | "nbconvert_exporter": "python", 224 | "pygments_lexer": "ipython3", 225 | "version": "3.7.8" 226 | } 227 | }, 228 | "nbformat": 4, 229 | "nbformat_minor": 4 230 | } 231 | -------------------------------------------------------------------------------- /scripts/nlcd/00_download.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# Download NLCD\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), June 29, 2020_\n", 12 | "\n", 13 | "This notebook downloads local copies of MLRC datasets for processing.\n", 14 | "\n", 15 | "**Inputs:**\n", 16 | "\n", 17 | "- sources.yaml\n", 18 | "\n", 19 | "**Outputs:**\n", 20 | "\n", 21 | "- Local copies of MLRC data\n", 22 | "\n", 23 | "**Notes:**\n", 24 | "\n", 25 | "- No reprojection or processing of the data is done in this notebook.\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "import pathlib\n", 35 | "import zipfile\n", 36 | "\n", 37 | "import urlpath\n", 38 | "import wget\n", 39 | "import yaml\n", 40 | "\n", 41 | "workdir = pathlib.Path(\"/Users/jhamman/workdir/carbonplan_data_downloads/mlrc/\")\n", 42 | "workdir.mkdir(parents=True, exist_ok=True)\n", 43 | "workdir" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "with open(\"../../sources.yaml\") as f:\n", 53 | " sources = yaml.load(f, Loader=yaml.FullLoader)[\"mlrc\"]" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "sources" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "for key, dset in sources[\"data\"].items():\n", 72 | " if \"download\" in dset[\"actions\"]:\n", 73 | " for url in dset[\"urlpath\"]:\n", 74 | " url = urlpath.URL(url)\n", 75 | " out = workdir / url.name\n", 76 | " if not out.exists():\n", 77 | " print(f\"downloading {url}\")\n", 78 | " wget.download(str(url), out=str(out))\n", 79 | "\n", 80 | " if \"unzip\" in dset[\"actions\"]:\n", 81 | " outdir = workdir / out.stem\n", 82 | " if not outdir.exists():\n", 83 | " outdir.mkdir(parents=True)\n", 84 | " with zipfile.ZipFile(out, \"r\") as f:\n", 85 | " print(f\"extracting contents of {out}\")\n", 86 | " f.extractall(outdir)" 87 | ] 88 | } 89 | ], 90 | "metadata": { 91 | "kernelspec": { 92 | "display_name": "Python 3", 93 | "language": "python", 94 | "name": "python3" 95 | }, 96 | "language_info": { 97 | "codemirror_mode": { 98 | "name": "ipython", 99 | "version": 3 100 | }, 101 | "file_extension": ".py", 102 | "mimetype": "text/x-python", 103 | "name": "python", 104 | "nbconvert_exporter": "python", 105 | "pygments_lexer": "ipython3", 106 | "version": "3.7.8" 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 4 111 | } 112 | -------------------------------------------------------------------------------- /scripts/nlcd/01_nlcd_to_cogs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# Convert NLCD to COG\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), June 29, 2020_\n", 12 | "\n", 13 | "This notebook converts local copies of NLDC rasters to COG\n", 14 | "\n", 15 | "**Inputs:**\n", 16 | "\n", 17 | "- local copies of NLCD data\n", 18 | "\n", 19 | "**Outputs:**\n", 20 | "\n", 21 | "- COGs published to cloud storage\n", 22 | "\n", 23 | "**Notes:**\n", 24 | "\n", 25 | "- No reprojection or processing of the data is done in this notebook.\n", 26 | "- Includes both conus (L48) and alaska (AK)\n", 27 | "- Paths here need to be harmonized as it was run in two different local\n", 28 | " environments\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import os\n", 38 | "import pathlib\n", 39 | "\n", 40 | "from google.cloud import storage\n", 41 | "from rio_cogeo.cogeo import cog_translate\n", 42 | "from rio_cogeo.profiles import cog_profiles\n", 43 | "\n", 44 | "storage.blob._DEFAULT_CHUNKSIZE = 5 * 1024 * 1024 # 5 MB\n", 45 | "storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 # 5 MB" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# This is the COG profile:\n", 55 | "dst_profile = cog_profiles.get(\"deflate\")\n", 56 | "os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = (\n", 57 | " \"/Users/freeman/.config/gcloud/legacy_credentials/jeremy@carbonplan.org/adc.json\"\n", 58 | ")\n", 59 | "\n", 60 | "\n", 61 | "def upload(src, target, bucket=\"carbonplan-data\"):\n", 62 | " storage_client = storage.Client(\"carbonplan\")\n", 63 | " bucket = storage_client.bucket(bucket)\n", 64 | " blob = bucket.blob(target)\n", 65 | " blob.upload_from_filename(src)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "## NLCD_Land_Cover_Change_Index_L48_20190424\n" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "# raw file\n", 82 | "source = \"/Users/jhamman/workdir/carbonplan_data_downloads/mlrc/NLCD_Land_Cover_Change_Index_L48_20190424/NLCD_Land_Cover_Change_Index_L48_20190424.img\"\n", 83 | "\n", 84 | "# local target\n", 85 | "target = \"./raster.tif\"\n", 86 | "\n", 87 | "# This is where we'll write the COGs when we're done\n", 88 | "cloud_target = \"raw/mlrc/NLCD_Land_Cover_Change_Index_L48_20190424/30m/raster.tif\"" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# translate to COG\n", 98 | "cog_translate(source, target, dst_profile)\n", 99 | "\n", 100 | "# Upload to GCS\n", 101 | "upload(target, cloud_target)\n", 102 | "\n", 103 | "# Remove temporary file\n", 104 | "os.remove(target)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## NLCD_Land_Cover_L48_20190424_full_zip\n" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "target = \"./raster.tif\"\n", 121 | "\n", 122 | "for year in [2001, 2004, 2006, 2008, 2011, 2013, 2016]:\n", 123 | " print(year)\n", 124 | " source = f\"/Users/jhamman/workdir/carbonplan_data_downloads/mlrc/NLCD_Land_Cover_L48_20190424_full_zip/NLCD_{year}_Land_Cover_L48_20190424.img\"\n", 125 | " cloud_target = f\"raw/mlrc/NLCD_Land_Cover_L48_20190424_full/30m/{year}.tif\"\n", 126 | "\n", 127 | " # translate to COG\n", 128 | " cog_translate(source, target, dst_profile)\n", 129 | "\n", 130 | " # Upload to GCS\n", 131 | " upload(target, cloud_target)\n", 132 | "\n", 133 | " # Remove temporary file\n", 134 | " os.remove(target)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## NLCD_Land_Cover_AK_20200213\n" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "workdir = pathlib.Path(\"/Users/freeman/workdir/carbonplan_data_downloads/nlcd/\")\n", 151 | "\n", 152 | "target = \"./raster.tif\"\n", 153 | "\n", 154 | "for year in [2016]:\n", 155 | " print(year)\n", 156 | " source = workdir / f\"NLCD_{year}_Land_Cover_AK_20200213/NLCD_{year}_Land_Cover_AK_20200213.img\"\n", 157 | " cloud_target = f\"raw/nlcd/NLCD_Land_Cover_AK_20200213/30m/{year}.tif\"\n", 158 | "\n", 159 | " # translate to COG\n", 160 | " cog_translate(source, target, dst_profile)\n", 161 | "\n", 162 | " # Upload to GCS\n", 163 | " upload(target, cloud_target)\n", 164 | "\n", 165 | " # Remove temporary file\n", 166 | " os.remove(target)" 167 | ] 168 | } 169 | ], 170 | "metadata": { 171 | "kernelspec": { 172 | "display_name": "Python 3", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.7.8" 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 4 191 | } 192 | -------------------------------------------------------------------------------- /scripts/nlcd/README.md: -------------------------------------------------------------------------------- 1 | # National Land Cover Database (NLCD) 2 | -------------------------------------------------------------------------------- /scripts/prism/00_download.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pathlib\n", 10 | "import zipfile\n", 11 | "\n", 12 | "import wget\n", 13 | "\n", 14 | "from carbonplan_data.utils import setup\n", 15 | "\n", 16 | "workdir, upload = setup(\"joe\")" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "files = [\n", 26 | " (\n", 27 | " \"http://services.nacse.org/prism/data/public/normals/800m/ppt/14\",\n", 28 | " f\"{workdir}/ppt_normal.zip\",\n", 29 | " ),\n", 30 | " (\n", 31 | " \"http://services.nacse.org/prism/data/public/normals/800m/tmean/14\",\n", 32 | " f\"{workdir}/tmean_normal.zip\",\n", 33 | " ),\n", 34 | " (\n", 35 | " \"https://prism.oregonstate.edu/projects/public/alaska/grids/tmean/PRISM_tmean_ak_30yr_normal_800mM1_annual_asc.zip\",\n", 36 | " f\"{workdir}/PRISM_tmean_ak_30yr_normal_800mM1_annual_asc.zip\",\n", 37 | " ),\n", 38 | " (\n", 39 | " \"https://prism.oregonstate.edu/projects/public/alaska/grids/ppt/PRISM_ppt_ak_30yr_normal_800mM1_annual_asc.zip\",\n", 40 | " f\"{workdir}/PRISM_ppt_ak_30yr_normal_800mM1_annual_asc.zip\",\n", 41 | " ),\n", 42 | "]\n", 43 | "\n", 44 | "for src, dst in files:\n", 45 | " dst = pathlib.Path(dst)\n", 46 | " if not dst.exists:\n", 47 | " wget.download(src, out=dst)\n", 48 | "\n", 49 | " outdir = workdir / dst.stem\n", 50 | " with zipfile.ZipFile(dst, \"r\") as f:\n", 51 | " # print(f\"extracting contents of {dst}\")\n", 52 | " print(outdir)\n", 53 | " f.extractall(outdir)" 54 | ] 55 | } 56 | ], 57 | "metadata": { 58 | "kernelspec": { 59 | "display_name": "Python 3", 60 | "language": "python", 61 | "name": "python3" 62 | }, 63 | "language_info": { 64 | "codemirror_mode": { 65 | "name": "ipython", 66 | "version": 3 67 | }, 68 | "file_extension": ".py", 69 | "mimetype": "text/x-python", 70 | "name": "python", 71 | "nbconvert_exporter": "python", 72 | "pygments_lexer": "ipython3", 73 | "version": "3.9.2" 74 | } 75 | }, 76 | "nbformat": 4, 77 | "nbformat_minor": 4 78 | } 79 | -------------------------------------------------------------------------------- /scripts/prism/01_prism_to_cogs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# Convert PRISM to COG\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), February 16, 2021_\n", 12 | "\n", 13 | "This notebook converts local copies of PRISM normal rasters to COG\n", 14 | "\n", 15 | "**Inputs:**\n", 16 | "\n", 17 | "- local copies of PRISM data\n", 18 | "\n", 19 | "**Outputs:**\n", 20 | "\n", 21 | "- COGs published to cloud storage\n", 22 | "\n", 23 | "**Notes:**\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import os\n", 33 | "\n", 34 | "import xarray as xr\n", 35 | "from google.cloud import storage\n", 36 | "from rio_cogeo.cogeo import cog_translate\n", 37 | "from rio_cogeo.profiles import cog_profiles\n", 38 | "\n", 39 | "from carbonplan_data.utils import setup\n", 40 | "\n", 41 | "storage.blob._DEFAULT_CHUNKSIZE = 5 * 1024 * 1024 # 5 MB\n", 42 | "storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 # 5 MB" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "# This is the COG profile:\n", 52 | "dst_profile = cog_profiles.get(\"deflate\")\n", 53 | "\n", 54 | "workdir, upload = setup(\"joe\")" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## PRISM normals (CONUS and AK)\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "sources = [\n", 71 | " \"/Users/jhamman/workdir/carbonplan_data_downloads/PRISM_tmean_ak_30yr_normal_800mM1_annual_asc/ak_tmean_1981_2010.14.txt\",\n", 72 | " \"/Users/jhamman/workdir/carbonplan_data_downloads/PRISM_ppt_ak_30yr_normal_800mM1_annual_asc/ak_ppt_1981_2010.14.txt\",\n", 73 | "]\n", 74 | "\n", 75 | "crs = \"+proj=latlong +ellps=GRS80 +datum=NAD83\"\n", 76 | "\n", 77 | "for source in sources:\n", 78 | " target = source.replace(\"txt\", \"tif\")\n", 79 | " da = xr.open_rasterio(source)\n", 80 | " da = da.rio.set_crs(crs)\n", 81 | " da.rio.to_raster(target)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "sources = [\n", 91 | " \"/Users/jhamman/workdir/carbonplan_data_downloads/ppt_normal/PRISM_ppt_30yr_normal_800mM2_annual_bil.bil\",\n", 92 | " \"/Users/jhamman/workdir/carbonplan_data_downloads/tmean_normal/PRISM_tmean_30yr_normal_800mM2_annual_bil.bil\",\n", 93 | " \"/Users/jhamman/workdir/carbonplan_data_downloads/PRISM_tmean_ak_30yr_normal_800mM1_annual_asc/ak_tmean_1981_2010.14.tif\",\n", 94 | " \"/Users/jhamman/workdir/carbonplan_data_downloads/PRISM_ppt_ak_30yr_normal_800mM1_annual_asc/ak_ppt_1981_2010.14.tif\",\n", 95 | "]\n", 96 | "\n", 97 | "# local target\n", 98 | "target = \"./raster.tif\"\n", 99 | "\n", 100 | "for source in sources:\n", 101 | " for var in [\"ppt\", \"tmean\"]:\n", 102 | " if var in source:\n", 103 | " break\n", 104 | "\n", 105 | " if \"ak\" in source:\n", 106 | " region = \"ak\"\n", 107 | " else:\n", 108 | " region = \"conus\"\n", 109 | "\n", 110 | " # This is where we'll write the COGs when we're done\n", 111 | " cloud_target = f\"raw/prism/normals/{region}/800m/{var}.tif\"\n", 112 | "\n", 113 | " # translate to COG\n", 114 | " cog_translate(source, target, dst_profile)\n", 115 | "\n", 116 | " # Upload to GCS\n", 117 | " upload(target, cloud_target)\n", 118 | "\n", 119 | " # Remove temporary file\n", 120 | " os.remove(target)" 121 | ] 122 | } 123 | ], 124 | "metadata": { 125 | "kernelspec": { 126 | "display_name": "Python 3", 127 | "language": "python", 128 | "name": "python3" 129 | }, 130 | "language_info": { 131 | "codemirror_mode": { 132 | "name": "ipython", 133 | "version": 3 134 | }, 135 | "file_extension": ".py", 136 | "mimetype": "text/x-python", 137 | "name": "python", 138 | "nbconvert_exporter": "python", 139 | "pygments_lexer": "ipython3", 140 | "version": "3.9.2" 141 | } 142 | }, 143 | "nbformat": 4, 144 | "nbformat_minor": 4 145 | } 146 | -------------------------------------------------------------------------------- /scripts/prism/02_downsample_and_reproject.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# PRISM downsampling and reprojection\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), February 16, 2021_\n", 12 | "\n", 13 | "This notebook downsamples and reprojects PRISM 300m climate normal rasters\n", 14 | "stored in Cloud Optimized GeoTIFF into 4000m GeoTIFFs.\n", 15 | "\n", 16 | "**Inputs:**\n", 17 | "\n", 18 | "- COG outputs from `01_prism_to_cogs.ipynb`\n", 19 | "\n", 20 | "**Outputs:**\n", 21 | "\n", 22 | "- COG outputs after downsampling and reprojection\n", 23 | "\n", 24 | "**Notes:**\n", 25 | "\n", 26 | "- Source CRS and projection extent come from NLCD\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "import os\n", 36 | "\n", 37 | "import fsspec\n", 38 | "from rio_cogeo.cogeo import cog_translate\n", 39 | "from rio_cogeo.profiles import cog_profiles\n", 40 | "\n", 41 | "from carbonplan_data.utils import projections, setup\n", 42 | "\n", 43 | "dst_profile = cog_profiles.get(\"deflate\")\n", 44 | "\n", 45 | "workdir, upload = setup(\"joe\")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "fs = fsspec.get_filesystem_class(\"gs\")()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "resampling = \"average\"\n", 64 | "resolution = 4000\n", 65 | "\n", 66 | "for region in [\"conus\", \"ak\"]:\n", 67 | " crs, extent = projections(\"albers\", region)\n", 68 | "\n", 69 | " for var in [\"ppt\", \"tmean\"]:\n", 70 | " print(region, var)\n", 71 | "\n", 72 | " source = f\"carbonplan-data/raw/prism/normals/{region}/800m/{var}.tif\"\n", 73 | " cloud_target = f\"processed/prism/normals/{region}/{resolution}m/{var}.tif\"\n", 74 | "\n", 75 | " local_source = \"./local_source.tif\"\n", 76 | " fs.get_file(source, local_source)\n", 77 | "\n", 78 | " cmd = (\n", 79 | " \"gdalwarp\",\n", 80 | " \"-t_srs\",\n", 81 | " f\"'{crs}'\",\n", 82 | " \"-te\",\n", 83 | " extent,\n", 84 | " \"-tr\",\n", 85 | " f\"{resolution} {resolution}\",\n", 86 | " \"-r\",\n", 87 | " resampling,\n", 88 | " local_source,\n", 89 | " \"./raster.tif\",\n", 90 | " )\n", 91 | " print(\" \".join(cmd))\n", 92 | " os.system(\" \".join(cmd))\n", 93 | "\n", 94 | " cog_translate(\"./raster.tif\", \"./raster.tif\", dst_profile)\n", 95 | " upload(\"./raster.tif\", cloud_target)\n", 96 | "\n", 97 | " os.remove(\n", 98 | " \"raster.tif\",\n", 99 | " )\n", 100 | " os.remove(\n", 101 | " \"local_source.tif\",\n", 102 | " )" 103 | ] 104 | } 105 | ], 106 | "metadata": { 107 | "kernelspec": { 108 | "display_name": "Python 3", 109 | "language": "python", 110 | "name": "python3" 111 | }, 112 | "language_info": { 113 | "codemirror_mode": { 114 | "name": "ipython", 115 | "version": 3 116 | }, 117 | "file_extension": ".py", 118 | "mimetype": "text/x-python", 119 | "name": "python", 120 | "nbconvert_exporter": "python", 121 | "pygments_lexer": "ipython3", 122 | "version": "3.9.2" 123 | } 124 | }, 125 | "nbformat": 4, 126 | "nbformat_minor": 4 127 | } 128 | -------------------------------------------------------------------------------- /scripts/statsgo/01_raw_to_tif.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import rasterio 3 | 4 | grid = "mu_grid.e00" 5 | var = "awc" 6 | band = "awc.bsq" 7 | 8 | src_raster = rasterio.open(grid) 9 | profile = src_raster.profile 10 | src_resolution = 1000 11 | 12 | tmp_band = src_raster.read(1) 13 | dtype = "uint16" if var == "bd" else "uint8" 14 | f_band = np.fromfile(band, dtype=dtype) 15 | w = src_raster.meta["width"] 16 | h = src_raster.meta["height"] 17 | src_band = f_band.reshape(int(f_band.shape[0] / (w * h)), h, w) 18 | 19 | if var == "hsgpct": 20 | src_band = np.argmax(src_band, axis=0).astype(dtype) 21 | 22 | if len(src_band.shape) == 3: 23 | src_band = src_band[0] 24 | 25 | src_band[tmp_band == -2147483647] = 255 26 | 27 | profile.update(dtype=rasterio.uint8, driver="GTiff", nodata=255) 28 | 29 | with rasterio.open("awc.tif", "w", **profile) as dst: 30 | dst.write(src_band.astype(rasterio.uint8), 1) 31 | -------------------------------------------------------------------------------- /scripts/terraclimate/01_terraclimate_aux_fileds_to_zarr.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# TERRACLIMATE Aux fields to Zarr\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), June 29, 2020_\n", 12 | "\n", 13 | "This notebook converts the raw TERAACLIMATE auxiliary fields to Zarr format.\n", 14 | "\n", 15 | "**Inputs:**\n", 16 | "\n", 17 | "**Outputs:**\n", 18 | "\n", 19 | "- Cloud copy of TERRACLIMATE Aux fields\n", 20 | "\n", 21 | "**Notes:**\n", 22 | "\n", 23 | "- No reprojection or processing of the data is done in this notebook.\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import fsspec\n", 33 | "import xarray as xr\n", 34 | "import zarr" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# options\n", 44 | "name = \"terraclimate\"\n", 45 | "target_location = f\"gs://carbonplan-data/raw/{name}/4000m/raster.zarr\"\n", 46 | "\n", 47 | "\n", 48 | "awc_uri = \"https://github.com/abatz/WATERBALANCE/raw/master/terraclimate_awc.nc\"\n", 49 | "dem_uri = \"http://thredds.northwestknowledge.net:8080/thredds/dodsC/TERRACLIMATE_ALL/layers/terraclim_dem.nc\"" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "with fsspec.open(awc_uri).open() as f:\n", 59 | " awc = xr.open_dataset(f).load()" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "dem = xr.open_dataset(dem_uri).load()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "mapper = fsspec.get_mapper(target_location)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "ds_append = xr.merge([dem, awc]).chunk({\"lat\": 1440, \"lon\": 1440})" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "ds_append.to_zarr(mapper, mode=\"a\")" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "zarr.consolidate_metadata(mapper)" 105 | ] 106 | } 107 | ], 108 | "metadata": { 109 | "kernelspec": { 110 | "display_name": "Python 3", 111 | "language": "python", 112 | "name": "python3" 113 | }, 114 | "language_info": { 115 | "codemirror_mode": { 116 | "name": "ipython", 117 | "version": 3 118 | }, 119 | "file_extension": ".py", 120 | "mimetype": "text/x-python", 121 | "name": "python", 122 | "nbconvert_exporter": "python", 123 | "pygments_lexer": "ipython3", 124 | "version": "3.7.8" 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 4 129 | } 130 | -------------------------------------------------------------------------------- /scripts/terraclimate/02_terraclimate_regrid.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "\n", 9 | "# Regridding TERRACLIMATE with xesmf\n", 10 | "\n", 11 | "_by Joe Hamman (CarbonPlan), June 29, 2020_\n", 12 | "\n", 13 | "This notebook converts the raw TERAACLIMATE dataset to Zarr format.\n", 14 | "\n", 15 | "**Inputs:**\n", 16 | "\n", 17 | "**Outputs:**\n", 18 | "\n", 19 | "- Cloud copy of TERRACLIMATE\n", 20 | "\n", 21 | "**Notes:**\n", 22 | "\n", 23 | "- No reprojection or processing of the data is done in this notebook.\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "%pip install -U xarray==0.16.0 --no-deps" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "import fsspec\n", 42 | "import numpy as np\n", 43 | "import xarray as xr\n", 44 | "import xesmf as xe\n", 45 | "from dask.diagnostics import ProgressBar" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "variables = {\n", 55 | " # 'conservative': [\n", 56 | " # \"aet\",\n", 57 | " # \"def\",\n", 58 | " # \"pet\",\n", 59 | " # \"ppt\",\n", 60 | " # \"q\",\n", 61 | " # \"srad\",\n", 62 | " # ],\n", 63 | " \"bilinear\": [\n", 64 | " \"tmax\",\n", 65 | " \"tmin\",\n", 66 | " \"pdsi\",\n", 67 | " \"vap\",\n", 68 | " \"vpd\",\n", 69 | " \"ws\",\n", 70 | " \"soil\",\n", 71 | " \"swe\",\n", 72 | " # move to conservative after scrable is fixed\n", 73 | " \"aet\",\n", 74 | " \"def\",\n", 75 | " \"pet\",\n", 76 | " \"ppt\",\n", 77 | " \"q\",\n", 78 | " \"srad\",\n", 79 | " \"awc\",\n", 80 | " \"elevation\",\n", 81 | " ]\n", 82 | "}" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "# options\n", 92 | "name = \"terraclimate\"\n", 93 | "raw_location = \"gs://carbonplan-data/raw/terraclimate/4000m/raster.zarr\"\n", 94 | "target_grid = \"gs://carbonplan-data/processed/grids/conus/4000m/domain.zarr\"\n", 95 | "# getting weird errors when writing to carbonplan-data\n", 96 | "target_location = f\"gs://carbonplan-data/processed/{name}/conus/4000m/raster.zarr\"" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "mapper = fsspec.get_mapper(target_grid)\n", 106 | "target_ds = xr.open_zarr(mapper, consolidated=True) # .rename({'xc': 'lon', 'yc': 'lat'})\n", 107 | "target_ds" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "mapper = fsspec.get_mapper(raw_location)\n", 117 | "ds = xr.open_zarr(mapper, consolidated=True)\n", 118 | "ds" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "step = 360 / 8640 + 1e-9\n", 128 | "global_grid = xe.util.grid_global(step, step)\n", 129 | "global_grid = global_grid.isel(y=slice(None, None, -1)).isel(y_b=slice(None, None, -1))\n", 130 | "global_grid[\"lat_b\"].values = np.clip(global_grid[\"lat_b\"].values, -90, 90)\n", 131 | "display(global_grid)\n", 132 | "\n", 133 | "# check that this grid is a drop in replacement for the source grid\n", 134 | "assert np.abs(global_grid.lat.isel(x=0).values - ds.lat.values).max() < 1e-5\n", 135 | "assert np.abs(global_grid.lon.isel(y=0).values - ds.lon.values).max() < 1e-5\n", 136 | "assert np.abs(global_grid.lat).max().item() <= 90\n", 137 | "assert np.abs(global_grid.lat_b).max().item() <= 90\n", 138 | "\n", 139 | "# rename grid variables\n", 140 | "source_ds = ds.rename({\"lon\": \"x\", \"lat\": \"y\"}).assign_coords(coords=global_grid.coords)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "regridders = {}\n", 150 | "\n", 151 | "for method in variables:\n", 152 | " regridders[method] = xe.Regridder(source_ds, target_ds, method, reuse_weights=True)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "temp = []\n", 162 | "for method, var_list in variables.items():\n", 163 | " regridder = regridders[method]\n", 164 | " temp.append(regridder(ds[var_list].chunk({\"lat\": -1, \"lon\": -1})))\n", 165 | "\n", 166 | "ds_out = xr.merge(temp, compat=\"override\")" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "ds_out" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "# fs = fsspec.get_filesystem_class('gs')()\n", 185 | "# fs.rm(target_location, recursive=True)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "from multiprocessing.pool import ThreadPool\n", 195 | "\n", 196 | "import dask\n", 197 | "\n", 198 | "with dask.config.set(scheduler=\"threads\", pool=ThreadPool(3)):\n", 199 | " with ProgressBar():\n", 200 | " mapper2 = fsspec.get_mapper(target_location)\n", 201 | " ds_out.to_zarr(mapper2, mode=\"w\", consolidated=True)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "import zarr\n", 211 | "\n", 212 | "mapper2 = fsspec.get_mapper(target_location)\n", 213 | "\n", 214 | "\n", 215 | "zarr.consolidate_metadata(mapper2)" 216 | ] 217 | } 218 | ], 219 | "metadata": { 220 | "kernelspec": { 221 | "display_name": "Python 3", 222 | "language": "python", 223 | "name": "python3" 224 | }, 225 | "language_info": { 226 | "codemirror_mode": { 227 | "name": "ipython", 228 | "version": 3 229 | }, 230 | "file_extension": ".py", 231 | "mimetype": "text/x-python", 232 | "name": "python", 233 | "nbconvert_exporter": "python", 234 | "pygments_lexer": "ipython3", 235 | "version": "3.8.6" 236 | } 237 | }, 238 | "nbformat": 4, 239 | "nbformat_minor": 4 240 | } 241 | -------------------------------------------------------------------------------- /scripts/worldclim/01_raw_to_zarr.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "- https://www.worldclim.org/data/worldclim21.html\n", 9 | "- Citation:\n", 10 | "\n", 11 | " Fick, S.E. and R.J. Hijmans, 2017. WorldClim 2: new 1km spatial resolution\n", 12 | " climate surfaces for global land areas. International Journal of Climatology\n", 13 | " 37 (12): 4302-4315.\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "id": "1", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import fsspec\n", 24 | "import xarray as xr" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "id": "2", 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "month_vars = [\"prec\", \"srad\", \"tavg\", \"tmax\", \"tmin\", \"vapr\", \"wind\"]\n", 35 | "\n", 36 | "months = xr.Variable(\"month\", list(range(1, 13)))\n", 37 | "\n", 38 | "ds = xr.Dataset()\n", 39 | "\n", 40 | "for var in month_vars:\n", 41 | " ds[var] = xr.concat(\n", 42 | " [\n", 43 | " xr.open_rasterio(\n", 44 | " f\"gs://carbonplan-scratch/worldclim-raw/wc2.1_30s_{var}/wc2.1_30s_{var}_{m:02d}.tif\",\n", 45 | " chunks={\"x\": 8192, \"y\": 8192},\n", 46 | " ).squeeze(drop=True)\n", 47 | " for m in months.values\n", 48 | " ],\n", 49 | " dim=months,\n", 50 | " compat=\"override\",\n", 51 | " coords=\"minimal\",\n", 52 | " )" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "id": "3", 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "ds" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "id": "4", 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "ds[\"elev\"] = xr.open_rasterio(\n", 73 | " \"gs://carbonplan-scratch/worldclim-raw/wc2.1_30s_elev.tif\",\n", 74 | " chunks={\"x\": 8192, \"y\": 8192},\n", 75 | ").squeeze(drop=True)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "id": "5", 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "bio_names = {\n", 86 | " \"BIO1\": \"Annual Mean Temperature\",\n", 87 | " \"BIO2\": \"Mean Diurnal Range (Mean of monthly (max temp - min temp))\",\n", 88 | " \"BIO3\": \"Isothermality (BIO2/BIO7) (×100)\",\n", 89 | " \"BIO4\": \"Temperature Seasonality (standard deviation ×100)\",\n", 90 | " \"BIO5\": \"Max Temperature of Warmest Month\",\n", 91 | " \"BIO6\": \"Min Temperature of Coldest Month\",\n", 92 | " \"BIO7\": \"Temperature Annual Range (BIO5-BIO6)\",\n", 93 | " \"BIO8\": \"Mean Temperature of Wettest Quarter\",\n", 94 | " \"BIO9\": \"Mean Temperature of Driest Quarter\",\n", 95 | " \"BIO10\": \"Mean Temperature of Warmest Quarter\",\n", 96 | " \"BIO11\": \"Mean Temperature of Coldest Quarter\",\n", 97 | " \"BIO12\": \"Annual Precipitation\",\n", 98 | " \"BIO13\": \"Precipitation of Wettest Month\",\n", 99 | " \"BIO14\": \"Precipitation of Driest Month\",\n", 100 | " \"BIO15\": \"Precipitation Seasonality (Coefficient of Variation)\",\n", 101 | " \"BIO16\": \"Precipitation of Wettest Quarter\",\n", 102 | " \"BIO17\": \"Precipitation of Driest Quarter\",\n", 103 | " \"BIO18\": \"Precipitation of Warmest Quarter\",\n", 104 | " \"BIO19\": \"Precipitation of Coldest Quarter\",\n", 105 | "}\n", 106 | "\n", 107 | "\n", 108 | "for bionum in range(1, 20):\n", 109 | " ds[f\"BIO{bionum:02d}\"] = xr.open_rasterio(\n", 110 | " f\"gs://carbonplan-scratch/worldclim-raw/wc2.1_30s_bio/wc2.1_30s_bio_{bionum}.tif\",\n", 111 | " chunks={\"x\": 8192, \"y\": 8192},\n", 112 | " ).squeeze(drop=True)\n", 113 | " ds[f\"BIO{bionum:02d}\"].attrs[\"description\"] = bio_names[f\"BIO{bionum:d}\"]" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "id": "6", 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "mapper = fsspec.get_mapper(\"gs://carbonplan-data/raw/worldclim/30s/raster.zarr\")\n", 124 | "\n", 125 | "ds.to_zarr(mapper, consolidated=True)" 126 | ] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "Python [conda env:notebook] *", 132 | "language": "python", 133 | "name": "conda-env-notebook-py" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.8.8" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 5 150 | } 151 | -------------------------------------------------------------------------------- /sources.yaml: -------------------------------------------------------------------------------- 1 | mtbs: 2 | description: Raw datasets from MTBS 3 | metadata: 4 | url: https://www.mtbs.gov/ 5 | data: 6 | burned_area_extent_shapefile: 7 | actions: [download, unzip] 8 | urlpath: 9 | - https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/MTBS_Fire/data/composite_data/burned_area_extent_shapefile/mtbs_perimeter_data.zip 10 | fod_pt_shapefile: 11 | actions: [wget, unzip] 12 | urlpath: 13 | - https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/MTBS_Fire/data/composite_data/fod_pt_shapefile/mtbs_fod_pts_data.zip 14 | 15 | mlrc: 16 | description: Raw datasets from MRLC 17 | metadata: 18 | url: https://www.mrlc.gov/data 19 | data: 20 | NLCD_Land_Cover_Change_Index_L48_20190424: 21 | actions: [download, unzip] 22 | urlpath: 23 | - https://s3-us-west-2.amazonaws.com/mrlc/NLCD_Land_Cover_Change_Index_L48_20190424.zip 24 | NLCD_Land_Cover_L48_20190424_full_zip: 25 | actions: [wget, unzip] 26 | urlpath: 27 | - https://s3-us-west-2.amazonaws.com/mrlc/NLCD_Land_Cover_L48_20190424_full_zip.zip 28 | NLCD_Land_Cover_AK_20200213: 29 | actions: [download, unzip] 30 | urlpath: 31 | - https://s3-us-west-2.amazonaws.com/mrlc/NLCD_2001_Land_Cover_AK_20200213.zip 32 | - https://s3-us-west-2.amazonaws.com/mrlc/NLCD_2011_Land_Cover_AK_20200213.zip 33 | - https://s3-us-west-2.amazonaws.com/mrlc/NLCD_2016_Land_Cover_AK_20200213.zip 34 | nlcd_treecanopy_2019_08_31: 35 | actions: [download, unzip] 36 | urlpath: 37 | - https://s3-us-west-2.amazonaws.com/mrlc/nlcd_2011_treecanopy_2019_08_31.zip 38 | - https://s3-us-west-2.amazonaws.com/mrlc/nlcd_2016_treecanopy_2019_08_31.zip 39 | 40 | usfs: 41 | desription: Raw raster datasets from the US Forest Service raster gateway 42 | metadata: 43 | url: https://data.fs.usda.gov/geodata/rastergateway/ 44 | data: 45 | conus_forestgroup: 46 | actions: [download, unzip] 47 | urlpath: 48 | - https://data.fs.usda.gov/geodata/rastergateway/forest_type/conus_forestgroup.zip 49 | conus_foresttype: 50 | actions: [download, unzip] 51 | urlpath: 52 | - https://data.fs.usda.gov/geodata/rastergateway/forest_type/conus_forest-type.zip 53 | ak_forestgroup: 54 | actions: [download, unzip] 55 | urlpath: 56 | - https://data.fs.usda.gov/geodata/rastergateway/forest_type/ak_forestgroup.zip 57 | ak_foresttype: 58 | actions: [download, unzip] 59 | urlpath: 60 | - https://data.fs.usda.gov/geodata/rastergateway/forest_type/ak_forest-type.zip 61 | 62 | fia: 63 | description: Raw datasets from Forest Inventory Analysis 64 | metadata: 65 | url: https://apps.fs.usda.gov/fia/datamart/datamart.html 66 | data: 67 | entire: 68 | actions: [download, unzip] 69 | urlpath: 70 | - https://apps.fs.usda.gov/fia/datamart/CSV/ENTIRE.zip 71 | 72 | gcp: 73 | description: Raw datasets from the Global Carbon Budget. 74 | metadata: 75 | url: https://www.icos-cp.eu/global-carbon-budget-2019 76 | data: 77 | global_budget_2019: 78 | actions: [manual] 79 | urlpath: 80 | - https://doi.org/10.18160/GCP-2019 81 | national_emissions_2019: 82 | actions: [manual] 83 | urlpath: 84 | - https://doi.org/10.18160/GCP-2019 85 | 86 | iiasa: 87 | description: Raw datasets from the Global Carbon Budget. 88 | metadata: 89 | url: 90 | data: 91 | SSP_IAM_V2_201811: 92 | actions: [manual] 93 | urlpath: 94 | - https://tntcat.iiasa.ac.at/SspDb/download/iam_v2/SSP_IAM_V2_201811.csv.zip 95 | SSP_CMIP6_201811: 96 | actions: [manual] 97 | urlpath: 98 | - https://tntcat.iiasa.ac.at/SspDb/download/cmip6/SSP_CMIP6_201811.csv.zip 99 | - https://tntcat.iiasa.ac.at/SspDb/download/cmip6/cmip6_iam_model_region_mapping.xlsx 100 | SSP_BASIC_ELEMENTS: 101 | actions: [manual] 102 | urlpath: 103 | - https://tntcat.iiasa.ac.at/SspDb/download/basic_elements/SspDb_compare_regions_2013-06-12.csv.zip 104 | - https://tntcat.iiasa.ac.at/SspDb/download/basic_elements/SspDb_country_data_2013-06-12.csv.zip 105 | RCP_CMIP5: 106 | actions: [manual] 107 | urlpath: 108 | - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/PICNTRL_MIDYR_CONC.zip 109 | - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/PRE2005_MIDYR_CONC.zip 110 | - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/RCP3PD_MIDYR_CONC.zip 111 | - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/RCP45_MIDYR_CONC.zip 112 | - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/RCP6_MIDYR_CONC.zip 113 | - https://tntcat.iiasa.ac.at/RcpDb/download/CMIP5RECOMMENDATIONS/RCP85_MIDYR_CONC.zip 114 | --------------------------------------------------------------------------------